aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-06-25 01:24:33 -0400
committerCraig Jennings <c@cjennings.net>2026-06-25 01:24:33 -0400
commitf50fc1def85c1dbbb0ec781be4071b7ec9285785 (patch)
tree256b852c91a0a9289d130fcd8e79f5146b73c6cf
parent3cac3b3dfcd432395201a309920c2491ee9caf01 (diff)
downloadarchsetup-f50fc1def85c1dbbb0ec781be4071b7ec9285785.tar.gz
archsetup-f50fc1def85c1dbbb0ec781be4071b7ec9285785.zip
fix(testing): authorize a root key so make test survives sshd hardening
The VM test SSHes into the guest as root with a password for the whole run. archsetup hardens sshd to PermitRootLogin prohibit-password and reloads it partway through the install, so every SSH after that step failed with "Permission denied" and the run aborted before any validation — make test had been silently broken since the hardening landed. inject_root_key authorizes a throwaway root key right after the first SSH (before archsetup runs) and the ssh/scp helpers now add -i <key> via SSH_KEY_OPT. prohibit-password still allows root key auth, so the harness survives the very hardening it validates. Password stays as the fallback, so the change is additive.
-rw-r--r--scripts/testing/lib/testinfra.sh34
-rw-r--r--scripts/testing/lib/validation.sh2
-rwxr-xr-xscripts/testing/lib/vm-utils.sh39
-rwxr-xr-xscripts/testing/run-test.sh7
-rw-r--r--todo.org2
5 files changed, 65 insertions, 19 deletions
diff --git a/scripts/testing/lib/testinfra.sh b/scripts/testing/lib/testinfra.sh
index 0db0ec9..bfcd43a 100644
--- a/scripts/testing/lib/testinfra.sh
+++ b/scripts/testing/lib/testinfra.sh
@@ -32,20 +32,26 @@ run_testinfra_validation() {
step "Running Testinfra validation sweep (advisory)"
- # Ephemeral keypair; authorize the pubkey in the VM over the existing channel.
- rm -f "$key" "$key.pub"
- if ! ssh-keygen -t ed25519 -N "" -q -f "$key"; then
- warn "testinfra: ssh-keygen failed - skipping"
- return 0
- fi
- if ! copy_to_vm "$key.pub" "/tmp/testinfra_key.pub" "$ROOT_PASSWORD"; then
- warn "testinfra: pubkey copy failed - skipping"
- return 0
- fi
- if ! vm_exec "$ROOT_PASSWORD" \
- "mkdir -p /root/.ssh && chmod 700 /root/.ssh && cat /tmp/testinfra_key.pub >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"; then
- warn "testinfra: authorizing key in VM failed - skipping"
- return 0
+ # Prefer the root key the harness already authorized (inject_root_key). It
+ # survives the sshd prohibit-password hardening, so reuse it rather than
+ # authorizing a second key. Fall back to minting our own for standalone use.
+ if [ -n "${ROOT_SSH_KEY:-}" ] && [ -f "${ROOT_SSH_KEY}" ]; then
+ key="$ROOT_SSH_KEY"
+ else
+ rm -f "$key" "$key.pub"
+ if ! ssh-keygen -t ed25519 -N "" -q -f "$key"; then
+ warn "testinfra: ssh-keygen failed - skipping"
+ return 0
+ fi
+ if ! copy_to_vm "$key.pub" "/tmp/testinfra_key.pub" "$ROOT_PASSWORD"; then
+ warn "testinfra: pubkey copy failed - skipping"
+ return 0
+ fi
+ if ! vm_exec "$ROOT_PASSWORD" \
+ "mkdir -p /root/.ssh && chmod 700 /root/.ssh && cat /tmp/testinfra_key.pub >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"; then
+ warn "testinfra: authorizing key in VM failed - skipping"
+ return 0
+ fi
fi
# ssh-config so testinfra connects key-only, no host-key prompt.
diff --git a/scripts/testing/lib/validation.sh b/scripts/testing/lib/validation.sh
index 6855da7..900d675 100644
--- a/scripts/testing/lib/validation.sh
+++ b/scripts/testing/lib/validation.sh
@@ -21,7 +21,7 @@ declare -a UNKNOWN_ISSUES
# SSH helper (uses globals: VM_IP, ROOT_PASSWORD)
ssh_cmd() {
sshpass -p "$ROOT_PASSWORD" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
- -o ConnectTimeout=10 -p "${SSH_PORT:-22}" "root@$VM_IP" "$@" 2>/dev/null
+ -o ConnectTimeout=10 ${SSH_KEY_OPT:-} -p "${SSH_PORT:-22}" "root@$VM_IP" "$@" 2>/dev/null
}
# Validation result helpers
diff --git a/scripts/testing/lib/vm-utils.sh b/scripts/testing/lib/vm-utils.sh
index d029d58..f86e583 100755
--- a/scripts/testing/lib/vm-utils.sh
+++ b/scripts/testing/lib/vm-utils.sh
@@ -18,6 +18,11 @@ VM_DISK_SIZE="${VM_DISK_SIZE:-50}" # GB
SSH_PORT="${SSH_PORT:-2222}"
SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10"
ROOT_PASSWORD="${ROOT_PASSWORD:-archsetup}"
+# Set by inject_root_key once a root key is authorized in the VM. When set, the
+# ssh/scp helpers add "-i <key>" so they keep working after archsetup hardens
+# sshd to PermitRootLogin prohibit-password (which kills root *password* login
+# but still allows key auth). Left unquoted at use sites, like SSH_OPTS.
+SSH_KEY_OPT="${SSH_KEY_OPT:-}"
# OVMF firmware paths
OVMF_CODE="/usr/share/edk2/x64/OVMF_CODE.4m.fd"
@@ -351,7 +356,7 @@ wait_for_ssh() {
progress "Waiting for SSH on localhost:$SSH_PORT..."
while [ "$elapsed" -lt "$timeout" ]; do
- if sshpass -p "$password" ssh $SSH_OPTS -p "$SSH_PORT" root@localhost true 2>/dev/null; then
+ if sshpass -p "$password" ssh $SSH_OPTS $SSH_KEY_OPT -p "$SSH_PORT" root@localhost true 2>/dev/null; then
success "SSH is available"
return 0
fi
@@ -367,7 +372,7 @@ wait_for_ssh() {
vm_exec() {
local password="${1:-$ROOT_PASSWORD}"
shift
- sshpass -p "$password" ssh $SSH_OPTS \
+ sshpass -p "$password" ssh $SSH_OPTS $SSH_KEY_OPT \
-o ServerAliveInterval=30 -o ServerAliveCountMax=10 \
-p "$SSH_PORT" root@localhost "$@" 2>> "$LOGFILE"
}
@@ -379,7 +384,7 @@ copy_to_vm() {
local password="${3:-$ROOT_PASSWORD}"
step "Copying $(basename "$local_file") to VM:$remote_path"
- if sshpass -p "$password" scp $SSH_OPTS -P "$SSH_PORT" \
+ if sshpass -p "$password" scp $SSH_OPTS $SSH_KEY_OPT -P "$SSH_PORT" \
"$local_file" "root@localhost:$remote_path" >> "$LOGFILE" 2>&1; then
success "File copied to VM"
return 0
@@ -396,7 +401,7 @@ copy_from_vm() {
local password="${3:-$ROOT_PASSWORD}"
step "Copying $remote_file from VM"
- if sshpass -p "$password" scp $SSH_OPTS -P "$SSH_PORT" \
+ if sshpass -p "$password" scp $SSH_OPTS $SSH_KEY_OPT -P "$SSH_PORT" \
"root@localhost:$remote_file" "$local_path" >> "$LOGFILE" 2>&1; then
success "File copied from VM"
return 0
@@ -405,3 +410,29 @@ copy_from_vm() {
return 1
fi
}
+
+# inject_root_key <key_path>
+# Authorize a throwaway root key over the initial password session and switch
+# all the helpers above to key auth (sets SSH_KEY_OPT + ROOT_SSH_KEY). Call once,
+# right after wait_for_ssh and before running archsetup: archsetup sets
+# PermitRootLogin prohibit-password and reloads sshd partway through, which kills
+# root *password* login. Without a key in place first, every SSH after that step
+# fails and the run aborts before any validation. Key auth survives the hardening.
+inject_root_key() {
+ local key="$1"
+ rm -f "$key" "$key.pub"
+ if ! ssh-keygen -t ed25519 -N "" -q -f "$key"; then
+ warn "Root key generation failed - run may break at sshd hardening"
+ return 1
+ fi
+ if sshpass -p "$ROOT_PASSWORD" ssh $SSH_OPTS -p "$SSH_PORT" root@localhost \
+ "mkdir -p /root/.ssh && chmod 700 /root/.ssh && cat >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys" \
+ < "$key.pub" >> "$LOGFILE" 2>&1; then
+ SSH_KEY_OPT="-i $key"
+ export ROOT_SSH_KEY="$key"
+ success "Root SSH key authorized (survives sshd prohibit-password hardening)"
+ return 0
+ fi
+ warn "Root key authorization failed - run may break at sshd hardening"
+ return 1
+}
diff --git a/scripts/testing/run-test.sh b/scripts/testing/run-test.sh
index 314097a..90022d3 100755
--- a/scripts/testing/run-test.sh
+++ b/scripts/testing/run-test.sh
@@ -142,6 +142,13 @@ start_qemu "$DISK_PATH" "disk" "" "none" || fatal "Failed to start VM"
wait_for_ssh "$ROOT_PASSWORD" 120 || fatal "VM SSH not available"
stop_timer "boot"
+# Authorize a root key now, before archsetup runs. archsetup hardens sshd to
+# PermitRootLogin prohibit-password partway through, which breaks the harness's
+# root password SSH; key auth survives it. Without this, the run aborts mid-way
+# (before any validation) once the hardening step lands.
+inject_root_key "$TEST_RESULTS_DIR/root_key" || \
+ warn "Continuing without root key - run may break at the sshd hardening step"
+
# Run network diagnostics
if ! run_network_diagnostics; then
fatal "Network diagnostics failed - aborting test"
diff --git a/todo.org b/todo.org
index e0da8e6..8329094 100644
--- a/todo.org
+++ b/todo.org
@@ -542,6 +542,8 @@ Reviewed against the existing harness: =scripts/testing/lib/validation.sh= alrea
Built the Testinfra harness skeleton: =scripts/testing/tests/= (conftest.py with the attribution marker + report hook + =target_user= fixture; 3 parity checks — user exists/shell, ufw enabled, dotfiles stowed+readable), =scripts/testing/lib/testinfra.sh= (=run_testinfra_validation=: ephemeral-key injection, ssh-config, pytest-over-SSH; advisory + non-fatal, =RUN_TESTINFRA= toggle), wired into run-test.sh after the shell sweep, and added =python-pytest python-pytest-testinfra= to =make deps=. Verified on host: py_compile clean, =pytest --collect-only= green in a throwaway venv (4 tests, fixtures resolve), =bash -n= + shellcheck clean, unit suite still green. Integration (the pytest sweep actually running against a VM) is unverified here — needs a =make test= run. Decisions locked: inject test key; run both through parity; full expansion (P4) in this task after the P3 cutover.
*** 2026-06-25 Thu @ 01:12:09 -0400 P2 full parity port (88 tests)
Ported the whole shell sweep to pytest: test_users (exists/shell/15 groups parametrized), test_packages (yay+functional, pacman, terminus-font, emacs+config readable, git, 5 dev tools), test_services (required enabled/active, enabled-only, timers, optional skip-if-absent, DoT drop-in, fail2ban/nmcli responds, log-cleanup cron, syncthing lingering, DNS/mDNS/docker skips), test_desktop (Hyprland tools+configs+portal+socket gated on install/compositor, DWM suckless, autologin), test_boot (grub, mkinitcpio hooks branched on zfs_root, console-font-in-initramfs, nvme gated, zfs/sanoid), test_keyring (dir 700/owner/default=login), test_archsetup (log no Error:, ≥12 state markers). conftest fixtures: target_user/home/zfs_root/has_nvme/hyprland_installed/dwm_installed/compositor_running/on_slirp. 88 tests collected, py_compile clean. Correctness fix vs the shell sweep: check =awww= not the stale =swww=. Installed python-pytest-testinfra on velox so the harness gate passes. Next: VM run to diff pytest vs shell sweep for parity.
+*** 2026-06-25 Thu @ 01:24:11 -0400 Fixed: sshd hardening had silently broken =make test=
+VM run #1 aborted ~6 min in (Error 5), before any validation ran. Root cause (pre-existing, not the Testinfra work): the 2026-06-24 sshd hardening sets =PermitRootLogin prohibit-password= + reloads sshd mid-install, and the harness SSHes as root by *password* throughout — so every op after that step got "Permission denied" and run-test.sh fataled before validations. Fix: =inject_root_key= authorizes a throwaway root key right after first SSH (before archsetup runs) and all helpers (=wait_for_ssh=/=vm_exec=/=copy_to_vm=/=copy_from_vm=/=ssh_cmd=) gained =$SSH_KEY_OPT= so they use key auth, which =prohibit-password= still allows. testinfra.sh reuses that key. Additive (password stays as fallback). bash -n + shellcheck clean. Re-running the VM suite to confirm it now reaches the validation + pytest phases.
Create comprehensive integration tests using Testinfra (Python + pytest) to validate archsetup installations
Tests should cover: