summaryrefslogtreecommitdiff
path: root/scripts/testing/run-test-baremetal.sh
blob: 709f030dfa8f19198c9ea095c639665518993002 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
#!/bin/bash
# Run archsetup test on bare metal ZFS system
# Author: Craig Jennings <craigmartinjennings@gmail.com>
# License: GNU GPLv3
#
# This script:
# 1. Connects to bare metal ZFS system via SSH
# 2. Optionally rolls back to genesis snapshots first
# 3. Transfers archsetup
# 4. Executes archsetup
# 5. Captures logs and validates results
# 6. Can rollback to genesis if test fails

set -e

# Get script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"

# Source utilities
source "$SCRIPT_DIR/lib/logging.sh"
source "$SCRIPT_DIR/lib/validation.sh"

# Parse arguments
ROLLBACK_FIRST=false
ROLLBACK_AFTER=false
SKIP_SLOW_PACKAGES=false
TARGET_HOST=""
ROOT_PASSWORD=""

usage() {
    echo "Usage: $0 --host <hostname> --password <root_password> [options]"
    echo ""
    echo "Required:"
    echo "  --host <hostname>       Target bare metal host (e.g., ratio.local)"
    echo "  --password <password>   Root password for SSH"
    echo ""
    echo "Options:"
    echo "  --rollback-first        Roll back to genesis snapshots before running"
    echo "  --rollback-after        Roll back to genesis snapshots after test (cleanup)"
    echo "  --skip-slow-packages    Skip slow packages for faster testing"
    echo "  --validate-only         Skip archsetup, only run validation checks"
    echo "  --help                  Show this help"
    exit 1
}

VALIDATE_ONLY=false

while [[ $# -gt 0 ]]; do
    case $1 in
        --host)
            TARGET_HOST="$2"
            shift 2
            ;;
        --password)
            ROOT_PASSWORD="$2"
            shift 2
            ;;
        --rollback-first)
            ROLLBACK_FIRST=true
            shift
            ;;
        --rollback-after)
            ROLLBACK_AFTER=true
            shift
            ;;
        --skip-slow-packages)
            SKIP_SLOW_PACKAGES=true
            shift
            ;;
        --validate-only)
            VALIDATE_ONLY=true
            shift
            ;;
        --help)
            usage
            ;;
        *)
            echo "Unknown option: $1"
            usage
            ;;
    esac
done

# Validate required args
if [ -z "$TARGET_HOST" ] || [ -z "$ROOT_PASSWORD" ]; then
    echo "Error: --host and --password are required"
    usage
fi

# Configuration
TIMESTAMP=$(date +'%Y%m%d-%H%M%S')
TEST_RESULTS_DIR="$PROJECT_ROOT/test-results/baremetal-$TIMESTAMP"
ARCHZFS_INBOX="$HOME/code/archzfs/inbox"

# Override VM_IP for validation.sh ssh_cmd function
VM_IP="$TARGET_HOST"

# Initialize logging
mkdir -p "$TEST_RESULTS_DIR"
LOGFILE="$TEST_RESULTS_DIR/test.log"
init_logging "$LOGFILE"

section "Bare Metal Test Run: $TIMESTAMP"
info "Target: $TARGET_HOST"

# Test SSH connectivity
step "Testing SSH connectivity to $TARGET_HOST"
if ! sshpass -p "$ROOT_PASSWORD" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
       -o ConnectTimeout=10 "root@$TARGET_HOST" "echo connected" &>/dev/null; then
    fatal "Cannot connect to $TARGET_HOST via SSH"
fi
success "SSH connection OK"

# Check it's a ZFS system
step "Verifying ZFS root"
if ! ssh_cmd "zfs list zroot" &>> "$LOGFILE"; then
    fatal "Target is not a ZFS system (no zroot pool)"
fi
success "ZFS root confirmed"

# Rollback to genesis if requested
if $ROLLBACK_FIRST; then
    section "Rolling Back to Genesis Snapshots"

    step "Getting list of datasets with genesis snapshots"
    DATASETS=$(ssh_cmd "zfs list -H -o name -t snapshot | grep '@genesis$' | sed 's/@genesis$//'")

    step "Rolling back all datasets to genesis"
    for ds in $DATASETS; do
        info "Rolling back $ds@genesis"
        if ! ssh_cmd "zfs rollback -r $ds@genesis" &>> "$LOGFILE"; then
            warn "Failed to rollback $ds@genesis"
        fi
    done
    success "Rollback complete"

    # Need to reconnect after rollback
    sleep 5
    step "Reconnecting after rollback"
    if ! ssh_cmd "echo reconnected" &>/dev/null; then
        fatal "Lost connection after rollback"
    fi
    success "Reconnected"
fi

if ! $VALIDATE_ONLY; then
    # Capture pre-install state
    capture_pre_install_state "$TEST_RESULTS_DIR"

    # Transfer archsetup
    section "Transferring ArchSetup"

    step "Creating git bundle"
    BUNDLE_FILE=$(mktemp)
    git -C "$PROJECT_ROOT" bundle create "$BUNDLE_FILE" HEAD >> "$LOGFILE" 2>&1

    step "Transferring to $TARGET_HOST"
    ssh_cmd "rm -rf /tmp/archsetup-test && mkdir -p /tmp/archsetup-test"
    sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
        "$BUNDLE_FILE" "root@$TARGET_HOST:/tmp/archsetup.bundle" >> "$LOGFILE" 2>&1

    step "Extracting on target"
    ssh_cmd "cd /tmp && git clone /tmp/archsetup.bundle archsetup-test && rm /tmp/archsetup.bundle" >> "$LOGFILE" 2>&1
    rm -f "$BUNDLE_FILE"
    success "ArchSetup transferred"

    # Execute archsetup
    section "Executing ArchSetup"

    start_timer "archsetup"
    step "Starting archsetup on $TARGET_HOST"
    info "This will take 30-60 minutes"

    REMOTE_LOG="/tmp/archsetup-test/archsetup-output.log"
    ARCHSETUP_ARGS=""
    if $SKIP_SLOW_PACKAGES; then
        ARCHSETUP_ARGS="--skip-slow-packages"
        info "Running with --skip-slow-packages"
    fi

    # Start archsetup in background
    ssh_cmd "cd /tmp/archsetup-test && nohup bash archsetup $ARCHSETUP_ARGS > $REMOTE_LOG 2>&1 &"
    success "ArchSetup started in background"

    # Poll for completion
    step "Monitoring archsetup progress"
    POLL_COUNT=0
    MAX_POLLS=180  # 90 minutes max

    while [ $POLL_COUNT -lt $MAX_POLLS ]; do
        if ssh_cmd "ps aux | grep '[b]ash archsetup' > /dev/null" 2>/dev/null; then
            sleep 30
            POLL_COUNT=$((POLL_COUNT + 1))
            if [ $((POLL_COUNT % 10)) -eq 0 ]; then
                ELAPSED_MINS=$((POLL_COUNT / 2))
                info "Still running... ($ELAPSED_MINS minutes elapsed)"
                # Show last line of progress
                LAST_LINE=$(ssh_cmd "tail -1 $REMOTE_LOG 2>/dev/null" || echo "")
                [ -n "$LAST_LINE" ] && info "  $LAST_LINE"
            fi
        else
            break
        fi
    done

    if [ $POLL_COUNT -ge $MAX_POLLS ]; then
        error "ArchSetup timed out after 90 minutes"
        ARCHSETUP_EXIT_CODE=124
    else
        step "Retrieving archsetup exit status"
        if ssh_cmd "grep -q 'ARCHSETUP_EXECUTION_COMPLETE' /var/log/archsetup-*.log 2>/dev/null"; then
            ARCHSETUP_EXIT_CODE=0
            success "ArchSetup completed successfully"
        else
            ARCHSETUP_EXIT_CODE=1
            error "ArchSetup may have encountered errors"
        fi
    fi

    stop_timer "archsetup"

    # Copy logs
    section "Capturing Test Artifacts"

    step "Copying archsetup log"
    sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
        "root@$TARGET_HOST:/var/log/archsetup-*.log" "$TEST_RESULTS_DIR/" 2>> "$LOGFILE" || \
        warn "Could not copy archsetup log"

    step "Copying archsetup output"
    sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
        "root@$TARGET_HOST:$REMOTE_LOG" "$TEST_RESULTS_DIR/archsetup-output.log" 2>> "$LOGFILE" || \
        warn "Could not copy output log"

    # Capture post-install state
    capture_post_install_state "$TEST_RESULTS_DIR"
else
    info "Skipping archsetup (--validate-only)"
    ARCHSETUP_EXIT_CODE=0
    mkdir -p "$TEST_RESULTS_DIR/pre-install" "$TEST_RESULTS_DIR/post-install"
fi

# Run validations
run_all_validations
validate_all_services

# Additional ZFS-specific validations
section "ZFS-Specific Validations"
validate_zfs_services

# Analyze logs if we ran archsetup
if ! $VALIDATE_ONLY; then
    analyze_log_diff "$TEST_RESULTS_DIR"
fi

# Generate reports
generate_issue_report "$TEST_RESULTS_DIR" "$ARCHZFS_INBOX"

# Set validation result
if [ $VALIDATION_FAILED -eq 0 ]; then
    VALIDATION_PASSED=true
else
    VALIDATION_PASSED=false
fi

# Generate test report
section "Generating Test Report"

REPORT_FILE="$TEST_RESULTS_DIR/test-report.txt"
cat > "$REPORT_FILE" << EOFREPORT
========================================
Bare Metal ArchSetup Test Report
========================================

Test ID: $TIMESTAMP
Date: $(date +'%Y-%m-%d %H:%M:%S')
Target: $TARGET_HOST
Test Method: Bare Metal ZFS

Results:
  ArchSetup Exit Code: $ARCHSETUP_EXIT_CODE
  Validation: $(if $VALIDATION_PASSED; then echo "PASSED"; else echo "FAILED"; fi)

Validation Summary:
  Passed:   $VALIDATION_PASSED_COUNT
  Failed:   $VALIDATION_FAILED
  Warnings: $VALIDATION_WARNINGS

Artifacts:
  Log file: $LOGFILE
  Report:   $REPORT_FILE
  Results:  $TEST_RESULTS_DIR/

EOFREPORT

info "Test report saved: $REPORT_FILE"

# Rollback after if requested
if $ROLLBACK_AFTER; then
    section "Rolling Back to Genesis (cleanup)"

    DATASETS=$(ssh_cmd "zfs list -H -o name -t snapshot | grep '@genesis$' | sed 's/@genesis$//'")
    for ds in $DATASETS; do
        info "Rolling back $ds@genesis"
        ssh_cmd "zfs rollback -r $ds@genesis" &>> "$LOGFILE" || true
    done
    success "Rollback complete"
fi

# Final summary
section "Test Complete"

if [ $ARCHSETUP_EXIT_CODE -eq 0 ] && $VALIDATION_PASSED; then
    success "TEST PASSED"
    exit 0
else
    error "TEST FAILED"
    info "Check logs in: $TEST_RESULTS_DIR"
    exit 1
fi