summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-01-20 06:52:36 -0600
committerCraig Jennings <c@cjennings.net>2026-01-20 06:52:36 -0600
commita21f2007a11000b578596057048616a50ed0431e (patch)
tree212f82e4f7d87193f9e877b59ef7f457fdeaafd9 /scripts
parent2b696749af25b43a88ccc56111ed17eb73d162ca (diff)
feat(testing): add bare metal ZFS test script + fix scrub timer check
Add run-test-baremetal.sh for testing on physical ZFS systems: - SSH to target host and run archsetup - Support for ZFS genesis snapshot rollback - Validate-only mode for existing installs - Same validation checks as VM tests Fix grep -c multi-line output issue in ZFS scrub timer check. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat (limited to 'scripts')
-rw-r--r--scripts/testing/lib/validation.sh4
-rwxr-xr-xscripts/testing/run-test-baremetal.sh321
2 files changed, 324 insertions, 1 deletions
diff --git a/scripts/testing/lib/validation.sh b/scripts/testing/lib/validation.sh
index 1ed84d9..a2e375c 100644
--- a/scripts/testing/lib/validation.sh
+++ b/scripts/testing/lib/validation.sh
@@ -611,7 +611,9 @@ validate_zfs_services() {
validate_service_optional "sanoid.timer" "enabled"
# Check for zfs-scrub timer (pool name varies)
- local scrub_enabled=$(ssh_cmd "systemctl list-unit-files 'zfs-scrub*' 2>/dev/null | grep -c enabled || echo 0")
+ local scrub_enabled
+ scrub_enabled=$(ssh_cmd "systemctl list-unit-files 'zfs-scrub*' 2>/dev/null | grep -c enabled" | tr -d '[:space:]')
+ scrub_enabled=${scrub_enabled:-0}
if [ "$scrub_enabled" -gt 0 ]; then
validation_pass "ZFS scrub timer enabled"
else
diff --git a/scripts/testing/run-test-baremetal.sh b/scripts/testing/run-test-baremetal.sh
new file mode 100755
index 0000000..709f030
--- /dev/null
+++ b/scripts/testing/run-test-baremetal.sh
@@ -0,0 +1,321 @@
+#!/bin/bash
+# Run archsetup test on bare metal ZFS system
+# Author: Craig Jennings <craigmartinjennings@gmail.com>
+# License: GNU GPLv3
+#
+# This script:
+# 1. Connects to bare metal ZFS system via SSH
+# 2. Optionally rolls back to genesis snapshots first
+# 3. Transfers archsetup
+# 4. Executes archsetup
+# 5. Captures logs and validates results
+# 6. Can rollback to genesis if test fails
+
+set -e
+
+# Get script directory
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+# Source utilities
+source "$SCRIPT_DIR/lib/logging.sh"
+source "$SCRIPT_DIR/lib/validation.sh"
+
+# Parse arguments
+ROLLBACK_FIRST=false
+ROLLBACK_AFTER=false
+SKIP_SLOW_PACKAGES=false
+TARGET_HOST=""
+ROOT_PASSWORD=""
+
+usage() {
+ echo "Usage: $0 --host <hostname> --password <root_password> [options]"
+ echo ""
+ echo "Required:"
+ echo " --host <hostname> Target bare metal host (e.g., ratio.local)"
+ echo " --password <password> Root password for SSH"
+ echo ""
+ echo "Options:"
+ echo " --rollback-first Roll back to genesis snapshots before running"
+ echo " --rollback-after Roll back to genesis snapshots after test (cleanup)"
+ echo " --skip-slow-packages Skip slow packages for faster testing"
+ echo " --validate-only Skip archsetup, only run validation checks"
+ echo " --help Show this help"
+ exit 1
+}
+
+VALIDATE_ONLY=false
+
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --host)
+ TARGET_HOST="$2"
+ shift 2
+ ;;
+ --password)
+ ROOT_PASSWORD="$2"
+ shift 2
+ ;;
+ --rollback-first)
+ ROLLBACK_FIRST=true
+ shift
+ ;;
+ --rollback-after)
+ ROLLBACK_AFTER=true
+ shift
+ ;;
+ --skip-slow-packages)
+ SKIP_SLOW_PACKAGES=true
+ shift
+ ;;
+ --validate-only)
+ VALIDATE_ONLY=true
+ shift
+ ;;
+ --help)
+ usage
+ ;;
+ *)
+ echo "Unknown option: $1"
+ usage
+ ;;
+ esac
+done
+
+# Validate required args
+if [ -z "$TARGET_HOST" ] || [ -z "$ROOT_PASSWORD" ]; then
+ echo "Error: --host and --password are required"
+ usage
+fi
+
+# Configuration
+TIMESTAMP=$(date +'%Y%m%d-%H%M%S')
+TEST_RESULTS_DIR="$PROJECT_ROOT/test-results/baremetal-$TIMESTAMP"
+ARCHZFS_INBOX="$HOME/code/archzfs/inbox"
+
+# Override VM_IP for validation.sh ssh_cmd function
+VM_IP="$TARGET_HOST"
+
+# Initialize logging
+mkdir -p "$TEST_RESULTS_DIR"
+LOGFILE="$TEST_RESULTS_DIR/test.log"
+init_logging "$LOGFILE"
+
+section "Bare Metal Test Run: $TIMESTAMP"
+info "Target: $TARGET_HOST"
+
+# Test SSH connectivity
+step "Testing SSH connectivity to $TARGET_HOST"
+if ! sshpass -p "$ROOT_PASSWORD" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+ -o ConnectTimeout=10 "root@$TARGET_HOST" "echo connected" &>/dev/null; then
+ fatal "Cannot connect to $TARGET_HOST via SSH"
+fi
+success "SSH connection OK"
+
+# Check it's a ZFS system
+step "Verifying ZFS root"
+if ! ssh_cmd "zfs list zroot" &>> "$LOGFILE"; then
+ fatal "Target is not a ZFS system (no zroot pool)"
+fi
+success "ZFS root confirmed"
+
+# Rollback to genesis if requested
+if $ROLLBACK_FIRST; then
+ section "Rolling Back to Genesis Snapshots"
+
+ step "Getting list of datasets with genesis snapshots"
+ DATASETS=$(ssh_cmd "zfs list -H -o name -t snapshot | grep '@genesis$' | sed 's/@genesis$//'")
+
+ step "Rolling back all datasets to genesis"
+ for ds in $DATASETS; do
+ info "Rolling back $ds@genesis"
+ if ! ssh_cmd "zfs rollback -r $ds@genesis" &>> "$LOGFILE"; then
+ warn "Failed to rollback $ds@genesis"
+ fi
+ done
+ success "Rollback complete"
+
+ # Need to reconnect after rollback
+ sleep 5
+ step "Reconnecting after rollback"
+ if ! ssh_cmd "echo reconnected" &>/dev/null; then
+ fatal "Lost connection after rollback"
+ fi
+ success "Reconnected"
+fi
+
+if ! $VALIDATE_ONLY; then
+ # Capture pre-install state
+ capture_pre_install_state "$TEST_RESULTS_DIR"
+
+ # Transfer archsetup
+ section "Transferring ArchSetup"
+
+ step "Creating git bundle"
+ BUNDLE_FILE=$(mktemp)
+ git -C "$PROJECT_ROOT" bundle create "$BUNDLE_FILE" HEAD >> "$LOGFILE" 2>&1
+
+ step "Transferring to $TARGET_HOST"
+ ssh_cmd "rm -rf /tmp/archsetup-test && mkdir -p /tmp/archsetup-test"
+ sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+ "$BUNDLE_FILE" "root@$TARGET_HOST:/tmp/archsetup.bundle" >> "$LOGFILE" 2>&1
+
+ step "Extracting on target"
+ ssh_cmd "cd /tmp && git clone /tmp/archsetup.bundle archsetup-test && rm /tmp/archsetup.bundle" >> "$LOGFILE" 2>&1
+ rm -f "$BUNDLE_FILE"
+ success "ArchSetup transferred"
+
+ # Execute archsetup
+ section "Executing ArchSetup"
+
+ start_timer "archsetup"
+ step "Starting archsetup on $TARGET_HOST"
+ info "This will take 30-60 minutes"
+
+ REMOTE_LOG="/tmp/archsetup-test/archsetup-output.log"
+ ARCHSETUP_ARGS=""
+ if $SKIP_SLOW_PACKAGES; then
+ ARCHSETUP_ARGS="--skip-slow-packages"
+ info "Running with --skip-slow-packages"
+ fi
+
+ # Start archsetup in background
+ ssh_cmd "cd /tmp/archsetup-test && nohup bash archsetup $ARCHSETUP_ARGS > $REMOTE_LOG 2>&1 &"
+ success "ArchSetup started in background"
+
+ # Poll for completion
+ step "Monitoring archsetup progress"
+ POLL_COUNT=0
+ MAX_POLLS=180 # 90 minutes max
+
+ while [ $POLL_COUNT -lt $MAX_POLLS ]; do
+ if ssh_cmd "ps aux | grep '[b]ash archsetup' > /dev/null" 2>/dev/null; then
+ sleep 30
+ POLL_COUNT=$((POLL_COUNT + 1))
+ if [ $((POLL_COUNT % 10)) -eq 0 ]; then
+ ELAPSED_MINS=$((POLL_COUNT / 2))
+ info "Still running... ($ELAPSED_MINS minutes elapsed)"
+ # Show last line of progress
+ LAST_LINE=$(ssh_cmd "tail -1 $REMOTE_LOG 2>/dev/null" || echo "")
+ [ -n "$LAST_LINE" ] && info " $LAST_LINE"
+ fi
+ else
+ break
+ fi
+ done
+
+ if [ $POLL_COUNT -ge $MAX_POLLS ]; then
+ error "ArchSetup timed out after 90 minutes"
+ ARCHSETUP_EXIT_CODE=124
+ else
+ step "Retrieving archsetup exit status"
+ if ssh_cmd "grep -q 'ARCHSETUP_EXECUTION_COMPLETE' /var/log/archsetup-*.log 2>/dev/null"; then
+ ARCHSETUP_EXIT_CODE=0
+ success "ArchSetup completed successfully"
+ else
+ ARCHSETUP_EXIT_CODE=1
+ error "ArchSetup may have encountered errors"
+ fi
+ fi
+
+ stop_timer "archsetup"
+
+ # Copy logs
+ section "Capturing Test Artifacts"
+
+ step "Copying archsetup log"
+ sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+ "root@$TARGET_HOST:/var/log/archsetup-*.log" "$TEST_RESULTS_DIR/" 2>> "$LOGFILE" || \
+ warn "Could not copy archsetup log"
+
+ step "Copying archsetup output"
+ sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+ "root@$TARGET_HOST:$REMOTE_LOG" "$TEST_RESULTS_DIR/archsetup-output.log" 2>> "$LOGFILE" || \
+ warn "Could not copy output log"
+
+ # Capture post-install state
+ capture_post_install_state "$TEST_RESULTS_DIR"
+else
+ info "Skipping archsetup (--validate-only)"
+ ARCHSETUP_EXIT_CODE=0
+ mkdir -p "$TEST_RESULTS_DIR/pre-install" "$TEST_RESULTS_DIR/post-install"
+fi
+
+# Run validations
+run_all_validations
+validate_all_services
+
+# Additional ZFS-specific validations
+section "ZFS-Specific Validations"
+validate_zfs_services
+
+# Analyze logs if we ran archsetup
+if ! $VALIDATE_ONLY; then
+ analyze_log_diff "$TEST_RESULTS_DIR"
+fi
+
+# Generate reports
+generate_issue_report "$TEST_RESULTS_DIR" "$ARCHZFS_INBOX"
+
+# Set validation result
+if [ $VALIDATION_FAILED -eq 0 ]; then
+ VALIDATION_PASSED=true
+else
+ VALIDATION_PASSED=false
+fi
+
+# Generate test report
+section "Generating Test Report"
+
+REPORT_FILE="$TEST_RESULTS_DIR/test-report.txt"
+cat > "$REPORT_FILE" << EOFREPORT
+========================================
+Bare Metal ArchSetup Test Report
+========================================
+
+Test ID: $TIMESTAMP
+Date: $(date +'%Y-%m-%d %H:%M:%S')
+Target: $TARGET_HOST
+Test Method: Bare Metal ZFS
+
+Results:
+ ArchSetup Exit Code: $ARCHSETUP_EXIT_CODE
+ Validation: $(if $VALIDATION_PASSED; then echo "PASSED"; else echo "FAILED"; fi)
+
+Validation Summary:
+ Passed: $VALIDATION_PASSED_COUNT
+ Failed: $VALIDATION_FAILED
+ Warnings: $VALIDATION_WARNINGS
+
+Artifacts:
+ Log file: $LOGFILE
+ Report: $REPORT_FILE
+ Results: $TEST_RESULTS_DIR/
+
+EOFREPORT
+
+info "Test report saved: $REPORT_FILE"
+
+# Rollback after if requested
+if $ROLLBACK_AFTER; then
+ section "Rolling Back to Genesis (cleanup)"
+
+ DATASETS=$(ssh_cmd "zfs list -H -o name -t snapshot | grep '@genesis$' | sed 's/@genesis$//'")
+ for ds in $DATASETS; do
+ info "Rolling back $ds@genesis"
+ ssh_cmd "zfs rollback -r $ds@genesis" &>> "$LOGFILE" || true
+ done
+ success "Rollback complete"
+fi
+
+# Final summary
+section "Test Complete"
+
+if [ $ARCHSETUP_EXIT_CODE -eq 0 ] && $VALIDATION_PASSED; then
+ success "TEST PASSED"
+ exit 0
+else
+ error "TEST FAILED"
+ info "Check logs in: $TEST_RESULTS_DIR"
+ exit 1
+fi