blob: c108e6ffcd3f8c06d18f19bd1f62bd64bd377f81 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
|
#!/bin/bash
# Run archsetup test on bare metal ZFS system
# Author: Craig Jennings <craigmartinjennings@gmail.com>
# License: GNU GPLv3
#
# This script:
# 1. Connects to bare metal ZFS system via SSH
# 2. Optionally rolls back to genesis snapshots first
# 3. Transfers archsetup
# 4. Executes archsetup
# 5. Captures logs and validates results
# 6. Can rollback to genesis if test fails
set -e
# Get script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# Source utilities
source "$SCRIPT_DIR/lib/logging.sh"
source "$SCRIPT_DIR/lib/validation.sh"
# Parse arguments
ROLLBACK_FIRST=false
ROLLBACK_AFTER=false
TARGET_HOST=""
ROOT_PASSWORD=""
usage() {
echo "Usage: $0 --host <hostname> --password <root_password> [options]"
echo ""
echo "Required:"
echo " --host <hostname> Target bare metal host (e.g., ratio.local)"
echo " --password <password> Root password for SSH"
echo ""
echo "Options:"
echo " --rollback-first Roll back to genesis snapshots before running"
echo " --rollback-after Roll back to genesis snapshots after test (cleanup)"
echo " --validate-only Skip archsetup, only run validation checks"
echo " --help Show this help"
exit 1
}
VALIDATE_ONLY=false
while [[ $# -gt 0 ]]; do
case $1 in
--host)
TARGET_HOST="$2"
shift 2
;;
--password)
ROOT_PASSWORD="$2"
shift 2
;;
--rollback-first)
ROLLBACK_FIRST=true
shift
;;
--rollback-after)
ROLLBACK_AFTER=true
shift
;;
--validate-only)
VALIDATE_ONLY=true
shift
;;
--help)
usage
;;
*)
echo "Unknown option: $1"
usage
;;
esac
done
# Validate required args
if [ -z "$TARGET_HOST" ] || [ -z "$ROOT_PASSWORD" ]; then
echo "Error: --host and --password are required"
usage
fi
# Configuration
TIMESTAMP=$(date +'%Y%m%d-%H%M%S')
TEST_RESULTS_DIR="$PROJECT_ROOT/test-results/baremetal-$TIMESTAMP"
ARCHZFS_INBOX="$HOME/code/archzfs/inbox"
# Override VM_IP for validation.sh ssh_cmd function
VM_IP="$TARGET_HOST"
# Initialize logging
mkdir -p "$TEST_RESULTS_DIR"
LOGFILE="$TEST_RESULTS_DIR/test.log"
init_logging "$LOGFILE"
section "Bare Metal Test Run: $TIMESTAMP"
info "Target: $TARGET_HOST"
# Test SSH connectivity
step "Testing SSH connectivity to $TARGET_HOST"
if ! sshpass -p "$ROOT_PASSWORD" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ConnectTimeout=10 "root@$TARGET_HOST" "echo connected" &>/dev/null; then
fatal "Cannot connect to $TARGET_HOST via SSH"
fi
success "SSH connection OK"
# Check it's a ZFS system
step "Verifying ZFS root"
if ! ssh_cmd "zfs list zroot" &>> "$LOGFILE"; then
fatal "Target is not a ZFS system (no zroot pool)"
fi
success "ZFS root confirmed"
# Rollback to genesis if requested
if $ROLLBACK_FIRST; then
section "Rolling Back to Genesis Snapshots"
step "Getting list of datasets with genesis snapshots"
DATASETS=$(ssh_cmd "zfs list -H -o name -t snapshot | grep '@genesis$' | sed 's/@genesis$//'")
step "Rolling back all datasets to genesis"
for ds in $DATASETS; do
info "Rolling back $ds@genesis"
if ! ssh_cmd "zfs rollback -r $ds@genesis" &>> "$LOGFILE"; then
warn "Failed to rollback $ds@genesis"
fi
done
success "Rollback complete"
# Need to reconnect after rollback
sleep 5
step "Reconnecting after rollback"
if ! ssh_cmd "echo reconnected" &>/dev/null; then
fatal "Lost connection after rollback"
fi
success "Reconnected"
fi
if ! $VALIDATE_ONLY; then
# Capture pre-install state
capture_pre_install_state "$TEST_RESULTS_DIR"
# Transfer archsetup
section "Transferring ArchSetup"
step "Creating git bundle"
BUNDLE_FILE=$(mktemp)
git -C "$PROJECT_ROOT" bundle create "$BUNDLE_FILE" HEAD >> "$LOGFILE" 2>&1
step "Transferring to $TARGET_HOST"
ssh_cmd "rm -rf /tmp/archsetup-test && mkdir -p /tmp/archsetup-test"
sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
"$BUNDLE_FILE" "root@$TARGET_HOST:/tmp/archsetup.bundle" >> "$LOGFILE" 2>&1
step "Extracting on target"
ssh_cmd "cd /tmp && git clone /tmp/archsetup.bundle archsetup-test && rm /tmp/archsetup.bundle" >> "$LOGFILE" 2>&1
rm -f "$BUNDLE_FILE"
success "ArchSetup transferred"
# Execute archsetup
section "Executing ArchSetup"
start_timer "archsetup"
step "Starting archsetup on $TARGET_HOST"
info "This will take 30-60 minutes"
REMOTE_LOG="/tmp/archsetup-test/archsetup-output.log"
# Start archsetup in background
ssh_cmd "cd /tmp/archsetup-test && nohup bash archsetup > $REMOTE_LOG 2>&1 &"
success "ArchSetup started in background"
# Poll for completion
step "Monitoring archsetup progress"
POLL_COUNT=0
MAX_POLLS=180 # 90 minutes max
while [ $POLL_COUNT -lt $MAX_POLLS ]; do
if ssh_cmd "ps aux | grep '[b]ash archsetup' > /dev/null" 2>/dev/null; then
sleep 30
POLL_COUNT=$((POLL_COUNT + 1))
if [ $((POLL_COUNT % 10)) -eq 0 ]; then
ELAPSED_MINS=$((POLL_COUNT / 2))
info "Still running... ($ELAPSED_MINS minutes elapsed)"
# Show last line of progress
LAST_LINE=$(ssh_cmd "tail -1 $REMOTE_LOG 2>/dev/null" || echo "")
[ -n "$LAST_LINE" ] && info " $LAST_LINE"
fi
else
break
fi
done
if [ $POLL_COUNT -ge $MAX_POLLS ]; then
error "ArchSetup timed out after 90 minutes"
ARCHSETUP_EXIT_CODE=124
else
step "Retrieving archsetup exit status"
if ssh_cmd "grep -q 'ARCHSETUP_EXECUTION_COMPLETE' /var/log/archsetup-*.log 2>/dev/null"; then
ARCHSETUP_EXIT_CODE=0
success "ArchSetup completed successfully"
else
ARCHSETUP_EXIT_CODE=1
error "ArchSetup may have encountered errors"
fi
fi
stop_timer "archsetup"
# Copy logs
section "Capturing Test Artifacts"
step "Copying archsetup log"
sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
"root@$TARGET_HOST:/var/log/archsetup-*.log" "$TEST_RESULTS_DIR/" 2>> "$LOGFILE" || \
warn "Could not copy archsetup log"
step "Copying archsetup output"
sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
"root@$TARGET_HOST:$REMOTE_LOG" "$TEST_RESULTS_DIR/archsetup-output.log" 2>> "$LOGFILE" || \
warn "Could not copy output log"
# Capture post-install state
capture_post_install_state "$TEST_RESULTS_DIR"
else
info "Skipping archsetup (--validate-only)"
ARCHSETUP_EXIT_CODE=0
mkdir -p "$TEST_RESULTS_DIR/pre-install" "$TEST_RESULTS_DIR/post-install"
fi
# Run validations
run_all_validations
validate_all_services
# Additional ZFS-specific validations
section "ZFS-Specific Validations"
validate_zfs_services
# Analyze logs if we ran archsetup
if ! $VALIDATE_ONLY; then
analyze_log_diff "$TEST_RESULTS_DIR"
fi
# Generate reports
generate_issue_report "$TEST_RESULTS_DIR" "$ARCHZFS_INBOX"
# Set validation result
if [ $VALIDATION_FAILED -eq 0 ]; then
VALIDATION_PASSED=true
else
VALIDATION_PASSED=false
fi
# Generate test report
section "Generating Test Report"
REPORT_FILE="$TEST_RESULTS_DIR/test-report.txt"
cat > "$REPORT_FILE" << EOFREPORT
========================================
Bare Metal ArchSetup Test Report
========================================
Test ID: $TIMESTAMP
Date: $(date +'%Y-%m-%d %H:%M:%S')
Target: $TARGET_HOST
Test Method: Bare Metal ZFS
Results:
ArchSetup Exit Code: $ARCHSETUP_EXIT_CODE
Validation: $(if $VALIDATION_PASSED; then echo "PASSED"; else echo "FAILED"; fi)
Validation Summary:
Passed: $VALIDATION_PASSED_COUNT
Failed: $VALIDATION_FAILED
Warnings: $VALIDATION_WARNINGS
Artifacts:
Log file: $LOGFILE
Report: $REPORT_FILE
Results: $TEST_RESULTS_DIR/
EOFREPORT
info "Test report saved: $REPORT_FILE"
# Rollback after if requested
if $ROLLBACK_AFTER; then
section "Rolling Back to Genesis (cleanup)"
DATASETS=$(ssh_cmd "zfs list -H -o name -t snapshot | grep '@genesis$' | sed 's/@genesis$//'")
for ds in $DATASETS; do
info "Rolling back $ds@genesis"
ssh_cmd "zfs rollback -r $ds@genesis" &>> "$LOGFILE" || true
done
success "Rollback complete"
fi
# Final summary
section "Test Complete"
if [ $ARCHSETUP_EXIT_CODE -eq 0 ] && $VALIDATION_PASSED; then
success "TEST PASSED"
exit 0
else
error "TEST FAILED"
info "Check logs in: $TEST_RESULTS_DIR"
exit 1
fi
|