diff options
| author | Craig Jennings <c@cjennings.net> | 2026-01-20 06:52:36 -0600 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-01-20 06:52:36 -0600 |
| commit | a21f2007a11000b578596057048616a50ed0431e (patch) | |
| tree | 212f82e4f7d87193f9e877b59ef7f457fdeaafd9 /scripts/testing/run-test-baremetal.sh | |
| parent | 2b696749af25b43a88ccc56111ed17eb73d162ca (diff) | |
feat(testing): add bare metal ZFS test script + fix scrub timer check
Add run-test-baremetal.sh for testing on physical ZFS systems:
- SSH to target host and run archsetup
- Support for ZFS genesis snapshot rollback
- Validate-only mode for existing installs
- Same validation checks as VM tests
Fix grep -c multi-line output issue in ZFS scrub timer check.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat (limited to 'scripts/testing/run-test-baremetal.sh')
| -rwxr-xr-x | scripts/testing/run-test-baremetal.sh | 321 |
1 files changed, 321 insertions, 0 deletions
diff --git a/scripts/testing/run-test-baremetal.sh b/scripts/testing/run-test-baremetal.sh new file mode 100755 index 0000000..709f030 --- /dev/null +++ b/scripts/testing/run-test-baremetal.sh @@ -0,0 +1,321 @@ +#!/bin/bash +# Run archsetup test on bare metal ZFS system +# Author: Craig Jennings <craigmartinjennings@gmail.com> +# License: GNU GPLv3 +# +# This script: +# 1. Connects to bare metal ZFS system via SSH +# 2. Optionally rolls back to genesis snapshots first +# 3. Transfers archsetup +# 4. Executes archsetup +# 5. Captures logs and validates results +# 6. Can rollback to genesis if test fails + +set -e + +# Get script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +# Source utilities +source "$SCRIPT_DIR/lib/logging.sh" +source "$SCRIPT_DIR/lib/validation.sh" + +# Parse arguments +ROLLBACK_FIRST=false +ROLLBACK_AFTER=false +SKIP_SLOW_PACKAGES=false +TARGET_HOST="" +ROOT_PASSWORD="" + +usage() { + echo "Usage: $0 --host <hostname> --password <root_password> [options]" + echo "" + echo "Required:" + echo " --host <hostname> Target bare metal host (e.g., ratio.local)" + echo " --password <password> Root password for SSH" + echo "" + echo "Options:" + echo " --rollback-first Roll back to genesis snapshots before running" + echo " --rollback-after Roll back to genesis snapshots after test (cleanup)" + echo " --skip-slow-packages Skip slow packages for faster testing" + echo " --validate-only Skip archsetup, only run validation checks" + echo " --help Show this help" + exit 1 +} + +VALIDATE_ONLY=false + +while [[ $# -gt 0 ]]; do + case $1 in + --host) + TARGET_HOST="$2" + shift 2 + ;; + --password) + ROOT_PASSWORD="$2" + shift 2 + ;; + --rollback-first) + ROLLBACK_FIRST=true + shift + ;; + --rollback-after) + ROLLBACK_AFTER=true + shift + ;; + --skip-slow-packages) + SKIP_SLOW_PACKAGES=true + shift + ;; + --validate-only) + VALIDATE_ONLY=true + shift + ;; + --help) + usage + ;; + *) + echo "Unknown option: $1" + usage + ;; + esac +done + +# Validate required args +if [ -z "$TARGET_HOST" ] || [ -z "$ROOT_PASSWORD" ]; then + echo "Error: --host and --password are required" + usage +fi + +# Configuration +TIMESTAMP=$(date +'%Y%m%d-%H%M%S') +TEST_RESULTS_DIR="$PROJECT_ROOT/test-results/baremetal-$TIMESTAMP" +ARCHZFS_INBOX="$HOME/code/archzfs/inbox" + +# Override VM_IP for validation.sh ssh_cmd function +VM_IP="$TARGET_HOST" + +# Initialize logging +mkdir -p "$TEST_RESULTS_DIR" +LOGFILE="$TEST_RESULTS_DIR/test.log" +init_logging "$LOGFILE" + +section "Bare Metal Test Run: $TIMESTAMP" +info "Target: $TARGET_HOST" + +# Test SSH connectivity +step "Testing SSH connectivity to $TARGET_HOST" +if ! sshpass -p "$ROOT_PASSWORD" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + -o ConnectTimeout=10 "root@$TARGET_HOST" "echo connected" &>/dev/null; then + fatal "Cannot connect to $TARGET_HOST via SSH" +fi +success "SSH connection OK" + +# Check it's a ZFS system +step "Verifying ZFS root" +if ! ssh_cmd "zfs list zroot" &>> "$LOGFILE"; then + fatal "Target is not a ZFS system (no zroot pool)" +fi +success "ZFS root confirmed" + +# Rollback to genesis if requested +if $ROLLBACK_FIRST; then + section "Rolling Back to Genesis Snapshots" + + step "Getting list of datasets with genesis snapshots" + DATASETS=$(ssh_cmd "zfs list -H -o name -t snapshot | grep '@genesis$' | sed 's/@genesis$//'") + + step "Rolling back all datasets to genesis" + for ds in $DATASETS; do + info "Rolling back $ds@genesis" + if ! ssh_cmd "zfs rollback -r $ds@genesis" &>> "$LOGFILE"; then + warn "Failed to rollback $ds@genesis" + fi + done + success "Rollback complete" + + # Need to reconnect after rollback + sleep 5 + step "Reconnecting after rollback" + if ! ssh_cmd "echo reconnected" &>/dev/null; then + fatal "Lost connection after rollback" + fi + success "Reconnected" +fi + +if ! $VALIDATE_ONLY; then + # Capture pre-install state + capture_pre_install_state "$TEST_RESULTS_DIR" + + # Transfer archsetup + section "Transferring ArchSetup" + + step "Creating git bundle" + BUNDLE_FILE=$(mktemp) + git -C "$PROJECT_ROOT" bundle create "$BUNDLE_FILE" HEAD >> "$LOGFILE" 2>&1 + + step "Transferring to $TARGET_HOST" + ssh_cmd "rm -rf /tmp/archsetup-test && mkdir -p /tmp/archsetup-test" + sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + "$BUNDLE_FILE" "root@$TARGET_HOST:/tmp/archsetup.bundle" >> "$LOGFILE" 2>&1 + + step "Extracting on target" + ssh_cmd "cd /tmp && git clone /tmp/archsetup.bundle archsetup-test && rm /tmp/archsetup.bundle" >> "$LOGFILE" 2>&1 + rm -f "$BUNDLE_FILE" + success "ArchSetup transferred" + + # Execute archsetup + section "Executing ArchSetup" + + start_timer "archsetup" + step "Starting archsetup on $TARGET_HOST" + info "This will take 30-60 minutes" + + REMOTE_LOG="/tmp/archsetup-test/archsetup-output.log" + ARCHSETUP_ARGS="" + if $SKIP_SLOW_PACKAGES; then + ARCHSETUP_ARGS="--skip-slow-packages" + info "Running with --skip-slow-packages" + fi + + # Start archsetup in background + ssh_cmd "cd /tmp/archsetup-test && nohup bash archsetup $ARCHSETUP_ARGS > $REMOTE_LOG 2>&1 &" + success "ArchSetup started in background" + + # Poll for completion + step "Monitoring archsetup progress" + POLL_COUNT=0 + MAX_POLLS=180 # 90 minutes max + + while [ $POLL_COUNT -lt $MAX_POLLS ]; do + if ssh_cmd "ps aux | grep '[b]ash archsetup' > /dev/null" 2>/dev/null; then + sleep 30 + POLL_COUNT=$((POLL_COUNT + 1)) + if [ $((POLL_COUNT % 10)) -eq 0 ]; then + ELAPSED_MINS=$((POLL_COUNT / 2)) + info "Still running... ($ELAPSED_MINS minutes elapsed)" + # Show last line of progress + LAST_LINE=$(ssh_cmd "tail -1 $REMOTE_LOG 2>/dev/null" || echo "") + [ -n "$LAST_LINE" ] && info " $LAST_LINE" + fi + else + break + fi + done + + if [ $POLL_COUNT -ge $MAX_POLLS ]; then + error "ArchSetup timed out after 90 minutes" + ARCHSETUP_EXIT_CODE=124 + else + step "Retrieving archsetup exit status" + if ssh_cmd "grep -q 'ARCHSETUP_EXECUTION_COMPLETE' /var/log/archsetup-*.log 2>/dev/null"; then + ARCHSETUP_EXIT_CODE=0 + success "ArchSetup completed successfully" + else + ARCHSETUP_EXIT_CODE=1 + error "ArchSetup may have encountered errors" + fi + fi + + stop_timer "archsetup" + + # Copy logs + section "Capturing Test Artifacts" + + step "Copying archsetup log" + sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + "root@$TARGET_HOST:/var/log/archsetup-*.log" "$TEST_RESULTS_DIR/" 2>> "$LOGFILE" || \ + warn "Could not copy archsetup log" + + step "Copying archsetup output" + sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + "root@$TARGET_HOST:$REMOTE_LOG" "$TEST_RESULTS_DIR/archsetup-output.log" 2>> "$LOGFILE" || \ + warn "Could not copy output log" + + # Capture post-install state + capture_post_install_state "$TEST_RESULTS_DIR" +else + info "Skipping archsetup (--validate-only)" + ARCHSETUP_EXIT_CODE=0 + mkdir -p "$TEST_RESULTS_DIR/pre-install" "$TEST_RESULTS_DIR/post-install" +fi + +# Run validations +run_all_validations +validate_all_services + +# Additional ZFS-specific validations +section "ZFS-Specific Validations" +validate_zfs_services + +# Analyze logs if we ran archsetup +if ! $VALIDATE_ONLY; then + analyze_log_diff "$TEST_RESULTS_DIR" +fi + +# Generate reports +generate_issue_report "$TEST_RESULTS_DIR" "$ARCHZFS_INBOX" + +# Set validation result +if [ $VALIDATION_FAILED -eq 0 ]; then + VALIDATION_PASSED=true +else + VALIDATION_PASSED=false +fi + +# Generate test report +section "Generating Test Report" + +REPORT_FILE="$TEST_RESULTS_DIR/test-report.txt" +cat > "$REPORT_FILE" << EOFREPORT +======================================== +Bare Metal ArchSetup Test Report +======================================== + +Test ID: $TIMESTAMP +Date: $(date +'%Y-%m-%d %H:%M:%S') +Target: $TARGET_HOST +Test Method: Bare Metal ZFS + +Results: + ArchSetup Exit Code: $ARCHSETUP_EXIT_CODE + Validation: $(if $VALIDATION_PASSED; then echo "PASSED"; else echo "FAILED"; fi) + +Validation Summary: + Passed: $VALIDATION_PASSED_COUNT + Failed: $VALIDATION_FAILED + Warnings: $VALIDATION_WARNINGS + +Artifacts: + Log file: $LOGFILE + Report: $REPORT_FILE + Results: $TEST_RESULTS_DIR/ + +EOFREPORT + +info "Test report saved: $REPORT_FILE" + +# Rollback after if requested +if $ROLLBACK_AFTER; then + section "Rolling Back to Genesis (cleanup)" + + DATASETS=$(ssh_cmd "zfs list -H -o name -t snapshot | grep '@genesis$' | sed 's/@genesis$//'") + for ds in $DATASETS; do + info "Rolling back $ds@genesis" + ssh_cmd "zfs rollback -r $ds@genesis" &>> "$LOGFILE" || true + done + success "Rollback complete" +fi + +# Final summary +section "Test Complete" + +if [ $ARCHSETUP_EXIT_CODE -eq 0 ] && $VALIDATION_PASSED; then + success "TEST PASSED" + exit 0 +else + error "TEST FAILED" + info "Check logs in: $TEST_RESULTS_DIR" + exit 1 +fi |
