3 files changed, 71 insertions, 39 deletions
diff --git a/scripts/testing/lib/testinfra.sh b/scripts/testing/lib/testinfra.sh
index bfcd43a..0822a9f 100644
--- a/scripts/testing/lib/testinfra.sh
+++ b/scripts/testing/lib/testinfra.sh
@@ -3,19 +3,40 @@
 #
 # Testinfra post-install validation sweep (runs on the host, over SSH).
 #
-# P1 status: advisory. This runs alongside the shell sweep (run_all_validations)
-# so a real VM run can diff the two and prove parity before pytest becomes the
-# primary validator (P3 cutover). It never sets the run's pass/fail here.
+# This is the primary post-install validator (it replaced the shell
+# run_all_validations sweep). It connects to the freshly-installed VM over SSH
+# and runs the pytest suite under scripts/testing/tests/. Its result drives the
+# run's pass/fail, and per-test failures are bucketed (archsetup / base_install
+# / unknown) into the same issue-attribution report the shell sweep produced.
 #
-# Auth: a throwaway ed25519 keypair is generated per run, its pubkey authorized
-# in the VM over the existing sshpass channel, and pytest/testinfra connects
-# key-only via a generated ssh-config. The keypair lives in the results dir and
-# is discarded with it.
+# Auth: reuse the root key the harness already authorized (inject_root_key),
+# which survives the sshd prohibit-password hardening; mint our own only if the
+# harness didn't (standalone use). pytest connects key-only via a generated
+# ssh-config. Key + config live in the results dir and are discarded with it.
 #
 # Uses globals from run-test.sh / vm-utils.sh: SCRIPT_DIR, VM_IP, SSH_PORT,
-# ROOT_PASSWORD, ARCHSETUP_VM_CONF. Toggle with RUN_TESTINFRA=false.
+# ROOT_PASSWORD, ROOT_SSH_KEY, ARCHSETUP_VM_CONF, plus the validation.sh
+# helpers attribute_issue / VALIDATION_*. Toggle with RUN_TESTINFRA=false.
+
+# Record each pytest failure from the attribution file into the issue arrays
+# (validation.sh's attribute_issue), so generate_issue_report covers them.
+_testinfra_record_attribution() {
+    local file="$1" bucket=""
+    [ -f "$file" ] || return 0
+    while IFS= read -r line; do
+        case "$line" in
+            "[archsetup]")    bucket=archsetup ;;
+            "[base_install]") bucket=base ;;
+            "[unknown]")      bucket=unknown ;;
+            "  "*)            attribute_issue "testinfra: ${line#  }" "$bucket" ;;
+        esac
+    done < "$file"
+}
 
 # run_testinfra_validation <results_dir>
+# Returns 0 only when the pytest sweep ran and passed. Returns non-zero when it
+# failed OR could not run (missing tooling / SSH setup) — a sweep that can't run
+# is not a pass. RUN_TESTINFRA=false is the one explicit opt-out (returns 0).
 run_testinfra_validation() {
     local results_dir="$1"
     local tests_dir="$SCRIPT_DIR/tests"
@@ -23,34 +44,33 @@ run_testinfra_validation() {
     local sshcfg="$results_dir/testinfra_ssh_config"
 
     if [ "${RUN_TESTINFRA:-true}" != "true" ]; then
+        warn "RUN_TESTINFRA=false - skipping the Testinfra validation sweep"
         return 0
     fi
     if ! command -v pytest >/dev/null 2>&1 || ! python3 -c 'import testinfra' >/dev/null 2>&1; then
-        warn "Testinfra/pytest not installed on host - skipping pytest sweep (run: make deps)"
-        return 0
+        error "Testinfra/pytest not installed on host - cannot validate (run: make deps)"
+        return 1
     fi
 
-    step "Running Testinfra validation sweep (advisory)"
+    section "Running Validation Checks (Testinfra)"
 
-    # Prefer the root key the harness already authorized (inject_root_key). It
-    # survives the sshd prohibit-password hardening, so reuse it rather than
-    # authorizing a second key. Fall back to minting our own for standalone use.
+    # Prefer the harness's already-authorized root key; mint one if absent.
     if [ -n "${ROOT_SSH_KEY:-}" ] && [ -f "${ROOT_SSH_KEY}" ]; then
         key="$ROOT_SSH_KEY"
     else
         rm -f "$key" "$key.pub"
         if ! ssh-keygen -t ed25519 -N "" -q -f "$key"; then
-            warn "testinfra: ssh-keygen failed - skipping"
-            return 0
+            error "testinfra: ssh-keygen failed"
+            return 1
         fi
         if ! copy_to_vm "$key.pub" "/tmp/testinfra_key.pub" "$ROOT_PASSWORD"; then
-            warn "testinfra: pubkey copy failed - skipping"
-            return 0
+            error "testinfra: pubkey copy failed"
+            return 1
         fi
         if ! vm_exec "$ROOT_PASSWORD" \
             "mkdir -p /root/.ssh && chmod 700 /root/.ssh && cat /tmp/testinfra_key.pub >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"; then
-            warn "testinfra: authorizing key in VM failed - skipping"
-            return 0
+            error "testinfra: authorizing key in VM failed"
+            return 1
         fi
     fi
 
@@ -71,17 +91,30 @@ EOF
     test_user=$(sed -n 's/^USERNAME=//p' "$ARCHSETUP_VM_CONF" 2>/dev/null | head -n1)
     : "${test_user:=cjennings}"
 
+    local logf="$results_dir/testinfra.log"
     ARCHSETUP_TEST_USER="$test_user" pytest "$tests_dir" \
         --hosts="ssh://testinfra-target" \
         --ssh-config="$sshcfg" \
         --attribution-file="$results_dir/testinfra-attribution.txt" \
-        -v >> "$results_dir/testinfra.log" 2>&1
+        -v >> "$logf" 2>&1
     local rc=$?
 
+    # Surface pytest's counts through the shared validation counters so the
+    # issue report summary is meaningful (the shell sweep no longer runs).
+    local summary
+    summary=$(grep -oE '[0-9]+ (passed|failed|error|errors|skipped)' "$logf" | tail -10)
+    VALIDATION_PASSED=$(echo "$summary" | awk '/passed/{print $1}' | tail -1); VALIDATION_PASSED=${VALIDATION_PASSED:-0}
+    VALIDATION_WARNINGS=$(echo "$summary" | awk '/skipped/{print $1}' | tail -1); VALIDATION_WARNINGS=${VALIDATION_WARNINGS:-0}
+    local nfail nerr
+    nfail=$(echo "$summary" | awk '/failed/{print $1}' | tail -1); nfail=${nfail:-0}
+    nerr=$(echo "$summary" | awk '/error/{print $1}' | tail -1); nerr=${nerr:-0}
+    VALIDATION_FAILED=$((nfail + nerr))
+
     if [ "$rc" -eq 0 ]; then
-        success "Testinfra sweep passed (advisory; see testinfra.log)"
+        success "Testinfra validation passed ($VALIDATION_PASSED passed, $VALIDATION_WARNINGS skipped)"
     else
-        warn "Testinfra sweep reported failures (advisory; see testinfra.log + testinfra-attribution.txt)"
+        error "Testinfra validation failed ($VALIDATION_FAILED failed/error; see testinfra.log)"
+        _testinfra_record_attribution "$results_dir/testinfra-attribution.txt"
     fi
-    return 0
+    return "$rc"
 }
diff --git a/scripts/testing/run-test.sh b/scripts/testing/run-test.sh
index 90022d3..6e51fc2 100755
--- a/scripts/testing/run-test.sh
+++ b/scripts/testing/run-test.sh
@@ -316,23 +316,17 @@ copy_from_vm "/var/log/archsetup-installed-packages.txt" "$TEST_RESULTS_DIR/" "$
 # Capture post-install state
 capture_post_install_state "$TEST_RESULTS_DIR"
 
-# Run comprehensive validation
-# This uses the validation.sh library for all checks.
+# Run comprehensive validation (Testinfra/pytest is the primary validator; the
+# old shell run_all_validations sweep was retired once pytest reached parity).
 #
 # From here to the end of the script, errexit is disabled on purpose: the
-# validation functions are designed to fail-and-count (see VALIDATION_FAILED)
-# rather than abort, and the analysis/report-generation steps below can also
-# legitimately return non-zero. With `set -e` active, a single failed check
-# would kill the run before the test report is written or the VM is cleaned
-# up. Pass/fail is signalled explicitly by the exit code at the bottom.
+# analysis/report-generation steps below can legitimately return non-zero, and
+# with `set -e` active a single failed check would kill the run before the test
+# report is written or the VM is cleaned up. Pass/fail is signalled explicitly
+# by the exit code at the bottom.
 set +e
-run_all_validations
-validate_all_services
-
-# Advisory Testinfra sweep alongside the shell sweep (P1). Compare the two on a
-# real run to confirm parity before pytest becomes primary. Does not affect
-# pass/fail yet.
 run_testinfra_validation "$TEST_RESULTS_DIR"
+testinfra_rc=$?
 
 # Analyze log differences (pre vs post install)
 analyze_log_diff "$TEST_RESULTS_DIR"
@@ -341,8 +335,8 @@ analyze_log_diff "$TEST_RESULTS_DIR"
 # If base install issues found and archzfs inbox exists, create issue file
 generate_issue_report "$TEST_RESULTS_DIR" "$ARCHZFS_INBOX"
 
-# Set validation result based on failure count
-if [ "$VALIDATION_FAILED" -eq 0 ]; then
+# The run passes only if the Testinfra sweep passed.
+if [ "$testinfra_rc" -eq 0 ]; then
     TEST_PASSED=true
 else
     TEST_PASSED=false
diff --git a/todo.org b/todo.org
index b0235d2..ea3734e 100644
--- a/todo.org
+++ b/todo.org
@@ -532,6 +532,9 @@ If modifications fail or are incorrect, difficult to recover - should backup fil
 
 Done 2026-06-25: added a =backup_system_file <path>= helper next to =safe_rm_rf= — it snapshots a pre-existing file to =<path>.archsetup.bak= before an in-place edit, idempotent (never clobbers an existing backup, so the pristine original survives repeated edits and re-runs), =cp -p= to preserve mode/ownership, no-op when the file is absent. Took the narrow scope (Craig's call): route only the in-place =sed -i= / append edits to *pre-existing* files through it — locale.gen, makepkg.conf, pacman.conf, sudoers, conf.d/wireless-regdom, geoclue.conf, conf.d/pacman-contrib, fstab, mkinitcpio.conf, vconsole.conf — and skip the brand-new drop-in files archsetup fully owns (nothing to back up; recovery is just deleting them). Tests: =tests/backup-system-file/= (7 Normal/Boundary/Error, incl. mode-preserved, existing-backup-not-overwritten, missing-target no-op, cp-failure). =make test-unit= green across all 5 suites; =bash -n= clean; only shellcheck note is the known SC2329 false positive (indirect STEPS dispatch). Integration verification is the next VM run.
 
+** TODO [#B] Migrate bare-metal test runner to Testinfra, then delete the shell sweep :test:
+=run-test.sh= (VM) now uses the Testinfra/pytest sweep as its authoritative validator, but =run-test-baremetal.sh= (lines ~243-244) still calls the old =run_all_validations= / =validate_all_services= from =scripts/testing/lib/validation.sh=. Migrate the bare-metal runner to =run_testinfra_validation= too (same key + ssh-config approach, adapted for a real host), then delete the now-dead shell-sweep functions from =validation.sh=. Keep the live helpers: =ssh_cmd=, =attribute_issue=, =capture_pre/post_install_state=, =analyze_log_diff=, =categorize_errors=, =generate_issue_report=, and the =VALIDATION_*= counters/arrays. Deferred from the Testinfra cutover because it needs a bare-metal test loop to validate, out of scope for the VM-only autonomous run.
+
 ** DOING [#B] Implement Testinfra test suite for archsetup
 :PROPERTIES:
 :LAST_REVIEWED: 2026-06-24
@@ -546,6 +549,8 @@ Ported the whole shell sweep to pytest: test_users (exists/shell/15 groups param
 VM run #1 aborted ~6 min in (Error 5), before any validation ran. Root cause (pre-existing, not the Testinfra work): the 2026-06-24 sshd hardening sets =PermitRootLogin prohibit-password= + reloads sshd mid-install, and the harness SSHes as root by *password* throughout — so every op after that step got "Permission denied" and run-test.sh fataled before validations. Fix: =inject_root_key= authorizes a throwaway root key right after first SSH (before archsetup runs) and all helpers (=wait_for_ssh=/=vm_exec=/=copy_to_vm=/=copy_from_vm=/=ssh_cmd=) gained =$SSH_KEY_OPT= so they use key auth, which =prohibit-password= still allows. testinfra.sh reuses that key. Additive (password stays as fallback). bash -n + shellcheck clean. Re-running the VM suite to confirm it now reaches the validation + pytest phases.
 *** 2026-06-25 Thu @ 03:33:33 -0400 Parity proven + P4 expansion validated on a live VM
 VM run #3 (=make test-keep=, kept VM up): pytest parity = 78 passed / 10 skipped / 0 fail / 0 err — matches & exceeds the shell sweep (53/0/0). Then built P4 expansion against the live VM (iterating in ~30s, no rebuild): test_hardening (sshd prohibit-password, sysctl printk, /etc/issue emptied, vconsole font, /efi fmask), test_config_applied (pacman ParallelDownloads/Color/multilib, makepkg MAKEFLAGS/OPTIONS, NM dns+wifi-privacy drop-ins, fail2ban jail, reflector), test_backups (=.archsetup.bak= present for pacman.conf/makepkg.conf/sudoers/mkinitcpio.conf — end-to-end proof of the backup feature). Full suite vs live VM: 95 passed / 10 skipped / 1 fail. The 1 fail = a REAL archsetup bug the tests caught: =ParallelDownloads= stayed at the Arch default 5 because the sed only matched a commented =#ParallelDownloads=, but current Arch ships it uncommented — fixed the sed to match both (=^#\?ParallelDownloads=). Also fixed a test bug (=grep -qx '[multilib]'= → =grep -Fxq=, the brackets were a regex char class). Remaining: P3 cutover (pytest authoritative) + P5 retire shell sweep, then a final fresh =make test=.
+*** 2026-06-25 Thu @ 03:38:28 -0400 P3 cutover: Testinfra is now the authoritative validator
+run-test.sh dropped the =run_all_validations= + =validate_all_services= shell-sweep calls; =run_testinfra_validation= now drives =TEST_PASSED= (returns pytest's rc; "couldn't run" = fail, not a silent pass). It surfaces pytest's pass/skip/fail counts through the shared =VALIDATION_*= counters and parses =testinfra-attribution.txt= into the issue arrays so =generate_issue_report= still buckets failures archsetup/base/unknown. Validated the failure path against the still-up VM: pytest rc=1, failure correctly bucketed to [archsetup]. P5 (physically delete the dead shell-sweep functions) is NOT done here — =run-test-baremetal.sh= still calls =run_all_validations=/=validate_all_services=, so deletion must wait until the bare-metal runner is migrated too (filed below). Final step: fresh =make test= to confirm the pass path (ParallelDownloads now 10) with pytest as the gate.
 Create comprehensive integration tests using Testinfra (Python + pytest) to validate archsetup installations
 
 Tests should cover: