#!/bin/bash # Run archsetup test on bare metal ZFS system # Author: Craig Jennings # License: GNU GPLv3 # # This script: # 1. Connects to bare metal ZFS system via SSH # 2. Optionally rolls back to genesis snapshots first # 3. Transfers archsetup # 4. Executes archsetup # 5. Captures logs and validates results # 6. Can rollback to genesis if test fails set -e # Get script directory SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" # Source utilities source "$SCRIPT_DIR/lib/logging.sh" source "$SCRIPT_DIR/lib/validation.sh" # Parse arguments ROLLBACK_FIRST=false ROLLBACK_AFTER=false SKIP_SLOW_PACKAGES=false TARGET_HOST="" ROOT_PASSWORD="" usage() { echo "Usage: $0 --host --password [options]" echo "" echo "Required:" echo " --host Target bare metal host (e.g., ratio.local)" echo " --password Root password for SSH" echo "" echo "Options:" echo " --rollback-first Roll back to genesis snapshots before running" echo " --rollback-after Roll back to genesis snapshots after test (cleanup)" echo " --skip-slow-packages Skip slow packages for faster testing" echo " --validate-only Skip archsetup, only run validation checks" echo " --help Show this help" exit 1 } VALIDATE_ONLY=false while [[ $# -gt 0 ]]; do case $1 in --host) TARGET_HOST="$2" shift 2 ;; --password) ROOT_PASSWORD="$2" shift 2 ;; --rollback-first) ROLLBACK_FIRST=true shift ;; --rollback-after) ROLLBACK_AFTER=true shift ;; --skip-slow-packages) SKIP_SLOW_PACKAGES=true shift ;; --validate-only) VALIDATE_ONLY=true shift ;; --help) usage ;; *) echo "Unknown option: $1" usage ;; esac done # Validate required args if [ -z "$TARGET_HOST" ] || [ -z "$ROOT_PASSWORD" ]; then echo "Error: --host and --password are required" usage fi # Configuration TIMESTAMP=$(date +'%Y%m%d-%H%M%S') TEST_RESULTS_DIR="$PROJECT_ROOT/test-results/baremetal-$TIMESTAMP" ARCHZFS_INBOX="$HOME/code/archzfs/inbox" # Override VM_IP for validation.sh ssh_cmd function VM_IP="$TARGET_HOST" # Initialize logging mkdir -p "$TEST_RESULTS_DIR" LOGFILE="$TEST_RESULTS_DIR/test.log" init_logging "$LOGFILE" section "Bare Metal Test Run: $TIMESTAMP" info "Target: $TARGET_HOST" # Test SSH connectivity step "Testing SSH connectivity to $TARGET_HOST" if ! sshpass -p "$ROOT_PASSWORD" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ -o ConnectTimeout=10 "root@$TARGET_HOST" "echo connected" &>/dev/null; then fatal "Cannot connect to $TARGET_HOST via SSH" fi success "SSH connection OK" # Check it's a ZFS system step "Verifying ZFS root" if ! ssh_cmd "zfs list zroot" &>> "$LOGFILE"; then fatal "Target is not a ZFS system (no zroot pool)" fi success "ZFS root confirmed" # Rollback to genesis if requested if $ROLLBACK_FIRST; then section "Rolling Back to Genesis Snapshots" step "Getting list of datasets with genesis snapshots" DATASETS=$(ssh_cmd "zfs list -H -o name -t snapshot | grep '@genesis$' | sed 's/@genesis$//'") step "Rolling back all datasets to genesis" for ds in $DATASETS; do info "Rolling back $ds@genesis" if ! ssh_cmd "zfs rollback -r $ds@genesis" &>> "$LOGFILE"; then warn "Failed to rollback $ds@genesis" fi done success "Rollback complete" # Need to reconnect after rollback sleep 5 step "Reconnecting after rollback" if ! ssh_cmd "echo reconnected" &>/dev/null; then fatal "Lost connection after rollback" fi success "Reconnected" fi if ! $VALIDATE_ONLY; then # Capture pre-install state capture_pre_install_state "$TEST_RESULTS_DIR" # Transfer archsetup section "Transferring ArchSetup" step "Creating git bundle" BUNDLE_FILE=$(mktemp) git -C "$PROJECT_ROOT" bundle create "$BUNDLE_FILE" HEAD >> "$LOGFILE" 2>&1 step "Transferring to $TARGET_HOST" ssh_cmd "rm -rf /tmp/archsetup-test && mkdir -p /tmp/archsetup-test" sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ "$BUNDLE_FILE" "root@$TARGET_HOST:/tmp/archsetup.bundle" >> "$LOGFILE" 2>&1 step "Extracting on target" ssh_cmd "cd /tmp && git clone /tmp/archsetup.bundle archsetup-test && rm /tmp/archsetup.bundle" >> "$LOGFILE" 2>&1 rm -f "$BUNDLE_FILE" success "ArchSetup transferred" # Execute archsetup section "Executing ArchSetup" start_timer "archsetup" step "Starting archsetup on $TARGET_HOST" info "This will take 30-60 minutes" REMOTE_LOG="/tmp/archsetup-test/archsetup-output.log" ARCHSETUP_ARGS="" if $SKIP_SLOW_PACKAGES; then ARCHSETUP_ARGS="--skip-slow-packages" info "Running with --skip-slow-packages" fi # Start archsetup in background ssh_cmd "cd /tmp/archsetup-test && nohup bash archsetup $ARCHSETUP_ARGS > $REMOTE_LOG 2>&1 &" success "ArchSetup started in background" # Poll for completion step "Monitoring archsetup progress" POLL_COUNT=0 MAX_POLLS=180 # 90 minutes max while [ $POLL_COUNT -lt $MAX_POLLS ]; do if ssh_cmd "ps aux | grep '[b]ash archsetup' > /dev/null" 2>/dev/null; then sleep 30 POLL_COUNT=$((POLL_COUNT + 1)) if [ $((POLL_COUNT % 10)) -eq 0 ]; then ELAPSED_MINS=$((POLL_COUNT / 2)) info "Still running... ($ELAPSED_MINS minutes elapsed)" # Show last line of progress LAST_LINE=$(ssh_cmd "tail -1 $REMOTE_LOG 2>/dev/null" || echo "") [ -n "$LAST_LINE" ] && info " $LAST_LINE" fi else break fi done if [ $POLL_COUNT -ge $MAX_POLLS ]; then error "ArchSetup timed out after 90 minutes" ARCHSETUP_EXIT_CODE=124 else step "Retrieving archsetup exit status" if ssh_cmd "grep -q 'ARCHSETUP_EXECUTION_COMPLETE' /var/log/archsetup-*.log 2>/dev/null"; then ARCHSETUP_EXIT_CODE=0 success "ArchSetup completed successfully" else ARCHSETUP_EXIT_CODE=1 error "ArchSetup may have encountered errors" fi fi stop_timer "archsetup" # Copy logs section "Capturing Test Artifacts" step "Copying archsetup log" sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ "root@$TARGET_HOST:/var/log/archsetup-*.log" "$TEST_RESULTS_DIR/" 2>> "$LOGFILE" || \ warn "Could not copy archsetup log" step "Copying archsetup output" sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ "root@$TARGET_HOST:$REMOTE_LOG" "$TEST_RESULTS_DIR/archsetup-output.log" 2>> "$LOGFILE" || \ warn "Could not copy output log" # Capture post-install state capture_post_install_state "$TEST_RESULTS_DIR" else info "Skipping archsetup (--validate-only)" ARCHSETUP_EXIT_CODE=0 mkdir -p "$TEST_RESULTS_DIR/pre-install" "$TEST_RESULTS_DIR/post-install" fi # Run validations run_all_validations validate_all_services # Additional ZFS-specific validations section "ZFS-Specific Validations" validate_zfs_services # Analyze logs if we ran archsetup if ! $VALIDATE_ONLY; then analyze_log_diff "$TEST_RESULTS_DIR" fi # Generate reports generate_issue_report "$TEST_RESULTS_DIR" "$ARCHZFS_INBOX" # Set validation result if [ $VALIDATION_FAILED -eq 0 ]; then VALIDATION_PASSED=true else VALIDATION_PASSED=false fi # Generate test report section "Generating Test Report" REPORT_FILE="$TEST_RESULTS_DIR/test-report.txt" cat > "$REPORT_FILE" << EOFREPORT ======================================== Bare Metal ArchSetup Test Report ======================================== Test ID: $TIMESTAMP Date: $(date +'%Y-%m-%d %H:%M:%S') Target: $TARGET_HOST Test Method: Bare Metal ZFS Results: ArchSetup Exit Code: $ARCHSETUP_EXIT_CODE Validation: $(if $VALIDATION_PASSED; then echo "PASSED"; else echo "FAILED"; fi) Validation Summary: Passed: $VALIDATION_PASSED_COUNT Failed: $VALIDATION_FAILED Warnings: $VALIDATION_WARNINGS Artifacts: Log file: $LOGFILE Report: $REPORT_FILE Results: $TEST_RESULTS_DIR/ EOFREPORT info "Test report saved: $REPORT_FILE" # Rollback after if requested if $ROLLBACK_AFTER; then section "Rolling Back to Genesis (cleanup)" DATASETS=$(ssh_cmd "zfs list -H -o name -t snapshot | grep '@genesis$' | sed 's/@genesis$//'") for ds in $DATASETS; do info "Rolling back $ds@genesis" ssh_cmd "zfs rollback -r $ds@genesis" &>> "$LOGFILE" || true done success "Rollback complete" fi # Final summary section "Test Complete" if [ $ARCHSETUP_EXIT_CODE -eq 0 ] && $VALIDATION_PASSED; then success "TEST PASSED" exit 0 else error "TEST FAILED" info "Check logs in: $TEST_RESULTS_DIR" exit 1 fi