aboutsummaryrefslogtreecommitdiff
path: root/scripts/testing/lib
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/testing/lib')
-rwxr-xr-xscripts/testing/lib/logging.sh1
-rw-r--r--scripts/testing/lib/network-diagnostics.sh1
-rw-r--r--scripts/testing/lib/testinfra.sh120
-rw-r--r--scripts/testing/lib/validation.sh845
-rwxr-xr-xscripts/testing/lib/vm-utils.sh69
5 files changed, 186 insertions, 850 deletions
diff --git a/scripts/testing/lib/logging.sh b/scripts/testing/lib/logging.sh
index ed20707..809d396 100755
--- a/scripts/testing/lib/logging.sh
+++ b/scripts/testing/lib/logging.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0-or-later
# Logging utilities for archsetup testing
# Author: Craig Jennings <craigmartinjennings@gmail.com>
# License: GNU GPLv3
diff --git a/scripts/testing/lib/network-diagnostics.sh b/scripts/testing/lib/network-diagnostics.sh
index 674aeba..38788e5 100644
--- a/scripts/testing/lib/network-diagnostics.sh
+++ b/scripts/testing/lib/network-diagnostics.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0-or-later
# Network diagnostics for VM testing
# Author: Craig Jennings <craigmartinjennings@gmail.com>
# License: GNU GPLv3
diff --git a/scripts/testing/lib/testinfra.sh b/scripts/testing/lib/testinfra.sh
new file mode 100644
index 0000000..0822a9f
--- /dev/null
+++ b/scripts/testing/lib/testinfra.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# Testinfra post-install validation sweep (runs on the host, over SSH).
+#
+# This is the primary post-install validator (it replaced the shell
+# run_all_validations sweep). It connects to the freshly-installed VM over SSH
+# and runs the pytest suite under scripts/testing/tests/. Its result drives the
+# run's pass/fail, and per-test failures are bucketed (archsetup / base_install
+# / unknown) into the same issue-attribution report the shell sweep produced.
+#
+# Auth: reuse the root key the harness already authorized (inject_root_key),
+# which survives the sshd prohibit-password hardening; mint our own only if the
+# harness didn't (standalone use). pytest connects key-only via a generated
+# ssh-config. Key + config live in the results dir and are discarded with it.
+#
+# Uses globals from run-test.sh / vm-utils.sh: SCRIPT_DIR, VM_IP, SSH_PORT,
+# ROOT_PASSWORD, ROOT_SSH_KEY, ARCHSETUP_VM_CONF, plus the validation.sh
+# helpers attribute_issue / VALIDATION_*. Toggle with RUN_TESTINFRA=false.
+
+# Record each pytest failure from the attribution file into the issue arrays
+# (validation.sh's attribute_issue), so generate_issue_report covers them.
+_testinfra_record_attribution() {
+ local file="$1" bucket=""
+ [ -f "$file" ] || return 0
+ while IFS= read -r line; do
+ case "$line" in
+ "[archsetup]") bucket=archsetup ;;
+ "[base_install]") bucket=base ;;
+ "[unknown]") bucket=unknown ;;
+ " "*) attribute_issue "testinfra: ${line# }" "$bucket" ;;
+ esac
+ done < "$file"
+}
+
+# run_testinfra_validation <results_dir>
+# Returns 0 only when the pytest sweep ran and passed. Returns non-zero when it
+# failed OR could not run (missing tooling / SSH setup) — a sweep that can't run
+# is not a pass. RUN_TESTINFRA=false is the one explicit opt-out (returns 0).
+run_testinfra_validation() {
+ local results_dir="$1"
+ local tests_dir="$SCRIPT_DIR/tests"
+ local key="$results_dir/testinfra_key"
+ local sshcfg="$results_dir/testinfra_ssh_config"
+
+ if [ "${RUN_TESTINFRA:-true}" != "true" ]; then
+ warn "RUN_TESTINFRA=false - skipping the Testinfra validation sweep"
+ return 0
+ fi
+ if ! command -v pytest >/dev/null 2>&1 || ! python3 -c 'import testinfra' >/dev/null 2>&1; then
+ error "Testinfra/pytest not installed on host - cannot validate (run: make deps)"
+ return 1
+ fi
+
+ section "Running Validation Checks (Testinfra)"
+
+ # Prefer the harness's already-authorized root key; mint one if absent.
+ if [ -n "${ROOT_SSH_KEY:-}" ] && [ -f "${ROOT_SSH_KEY}" ]; then
+ key="$ROOT_SSH_KEY"
+ else
+ rm -f "$key" "$key.pub"
+ if ! ssh-keygen -t ed25519 -N "" -q -f "$key"; then
+ error "testinfra: ssh-keygen failed"
+ return 1
+ fi
+ if ! copy_to_vm "$key.pub" "/tmp/testinfra_key.pub" "$ROOT_PASSWORD"; then
+ error "testinfra: pubkey copy failed"
+ return 1
+ fi
+ if ! vm_exec "$ROOT_PASSWORD" \
+ "mkdir -p /root/.ssh && chmod 700 /root/.ssh && cat /tmp/testinfra_key.pub >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"; then
+ error "testinfra: authorizing key in VM failed"
+ return 1
+ fi
+ fi
+
+ # ssh-config so testinfra connects key-only, no host-key prompt.
+ cat > "$sshcfg" <<EOF
+Host testinfra-target
+ HostName ${VM_IP:-localhost}
+ Port ${SSH_PORT:-2222}
+ User root
+ IdentityFile $key
+ IdentitiesOnly yes
+ StrictHostKeyChecking no
+ UserKnownHostsFile /dev/null
+EOF
+
+ # The account archsetup created, for the tests that need it.
+ local test_user
+ test_user=$(sed -n 's/^USERNAME=//p' "$ARCHSETUP_VM_CONF" 2>/dev/null | head -n1)
+ : "${test_user:=cjennings}"
+
+ local logf="$results_dir/testinfra.log"
+ ARCHSETUP_TEST_USER="$test_user" pytest "$tests_dir" \
+ --hosts="ssh://testinfra-target" \
+ --ssh-config="$sshcfg" \
+ --attribution-file="$results_dir/testinfra-attribution.txt" \
+ -v >> "$logf" 2>&1
+ local rc=$?
+
+ # Surface pytest's counts through the shared validation counters so the
+ # issue report summary is meaningful (the shell sweep no longer runs).
+ local summary
+ summary=$(grep -oE '[0-9]+ (passed|failed|error|errors|skipped)' "$logf" | tail -10)
+ VALIDATION_PASSED=$(echo "$summary" | awk '/passed/{print $1}' | tail -1); VALIDATION_PASSED=${VALIDATION_PASSED:-0}
+ VALIDATION_WARNINGS=$(echo "$summary" | awk '/skipped/{print $1}' | tail -1); VALIDATION_WARNINGS=${VALIDATION_WARNINGS:-0}
+ local nfail nerr
+ nfail=$(echo "$summary" | awk '/failed/{print $1}' | tail -1); nfail=${nfail:-0}
+ nerr=$(echo "$summary" | awk '/error/{print $1}' | tail -1); nerr=${nerr:-0}
+ VALIDATION_FAILED=$((nfail + nerr))
+
+ if [ "$rc" -eq 0 ]; then
+ success "Testinfra validation passed ($VALIDATION_PASSED passed, $VALIDATION_WARNINGS skipped)"
+ else
+ error "Testinfra validation failed ($VALIDATION_FAILED failed/error; see testinfra.log)"
+ _testinfra_record_attribution "$results_dir/testinfra-attribution.txt"
+ fi
+ return "$rc"
+}
diff --git a/scripts/testing/lib/validation.sh b/scripts/testing/lib/validation.sh
index 91270ef..fa7ddcc 100644
--- a/scripts/testing/lib/validation.sh
+++ b/scripts/testing/lib/validation.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0-or-later
# Validation utilities for archsetup testing
# Author: Craig Jennings <craigmartinjennings@gmail.com>
# License: GNU GPLv3
@@ -20,38 +21,7 @@ declare -a UNKNOWN_ISSUES
# SSH helper (uses globals: VM_IP, ROOT_PASSWORD)
ssh_cmd() {
sshpass -p "$ROOT_PASSWORD" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
- -o ConnectTimeout=10 -p "${SSH_PORT:-22}" "root@$VM_IP" "$@" 2>/dev/null
-}
-
-# Validation result helpers
-validation_pass() {
- local test_name="$1"
- success "$test_name"
- ((VALIDATION_PASSED++)) || true
-}
-
-validation_fail() {
- local test_name="$1"
- local details="${2:-}"
- error "$test_name"
- [ -n "$details" ] && info " Details: $details"
- ((VALIDATION_FAILED++)) || true
-}
-
-validation_warn() {
- local test_name="$1"
- local details="${2:-}"
- warn "$test_name"
- [ -n "$details" ] && info " Details: $details"
- ((VALIDATION_WARNINGS++)) || true
-}
-
-# A check whose precondition can't hold in this environment (headless VM,
-# slirp networking, pre-reboot state). Logged for the record, counted nowhere
-# — a warning that fires on every run trains readers to ignore warnings.
-validation_skip() {
- local test_name="$1"
- info "SKIP: $test_name"
+ -o ConnectTimeout=10 ${SSH_KEY_OPT:-} -p "${SSH_PORT:-22}" "root@$VM_IP" "$@" 2>/dev/null
}
# Attribute an issue to archsetup or base install
@@ -264,802 +234,6 @@ categorize_errors() {
}
#=============================================================================
-# VALIDATION CHECKS
-#=============================================================================
-
-run_all_validations() {
- section "Running Validation Checks"
-
- # User & Authentication
- validate_user_created
- validate_user_shell
- validate_user_groups
-
- # Dotfiles
- validate_dotfiles
-
- # Package Managers
- validate_yay_installed
- validate_pacman_working
-
- # Window Manager (detects DWM or Hyprland automatically)
- validate_window_manager
-
- # Essential Services
- validate_firewall
- validate_dns_config
- validate_avahi
- validate_fail2ban
- validate_networkmanager
-
- # Developer Tools
- validate_emacs
- validate_git_config
- validate_dev_tools
-
- # System Configuration
- validate_zfs_config
- validate_boot_config
- validate_autologin_config
- validate_gnome_keyring_setup
-
- # Boot & Initramfs (critical for ZFS systems)
- validate_terminus_font
- validate_mkinitcpio_hooks
- validate_initramfs_consolefont
- validate_nvme_module
-
- # Archsetup Specific
- validate_archsetup_log
- validate_state_markers
-}
-
-#-----------------------------------------------------------------------------
-# User & Authentication Validations
-#-----------------------------------------------------------------------------
-
-validate_user_created() {
- step "Checking if user 'cjennings' exists"
- if ssh_cmd "id cjennings" &>> "$LOGFILE"; then
- validation_pass "User cjennings exists"
- else
- validation_fail "User cjennings not found"
- attribute_issue "User cjennings not created" "archsetup"
- fi
-}
-
-validate_user_shell() {
- step "Checking if ZSH is default shell"
- local shell=$(ssh_cmd "getent passwd cjennings | cut -d: -f7")
- if [ "$shell" = "/bin/zsh" ] || [ "$shell" = "/usr/bin/zsh" ]; then
- validation_pass "ZSH is default shell"
- else
- validation_fail "ZSH not default shell (got: $shell)"
- attribute_issue "ZSH not set as default shell" "archsetup"
- fi
-}
-
-validate_user_groups() {
- step "Checking user group memberships"
- # Groups added by archsetup:
- # - wheel (useradd -G wheel)
- # - sys,adm,network,scanner,power,uucp,audio,lp,rfkill,video,storage,optical,users (usermod -aG)
- # - docker (gpasswd -a, added later in developer_workstation)
- local expected_groups="wheel sys adm network scanner power uucp audio lp rfkill video storage optical users docker"
- local missing_groups=""
-
- for group in $expected_groups; do
- if ! ssh_cmd "groups cjennings" | grep -q "\b$group\b"; then
- missing_groups="$missing_groups $group"
- fi
- done
-
- if [ -z "$missing_groups" ]; then
- validation_pass "User in all expected groups (15 groups)"
- else
- validation_fail "User missing groups:$missing_groups"
- attribute_issue "User missing groups:$missing_groups" "archsetup"
- fi
-}
-
-#-----------------------------------------------------------------------------
-# Dotfiles Validations
-#-----------------------------------------------------------------------------
-
-validate_dotfiles() {
- step "Checking dotfiles setup"
-
- # 1. Check if .zshrc is a symlink
- if ! ssh_cmd "test -L /home/cjennings/.zshrc"; then
- validation_fail "Dotfiles not stowed (.zshrc is not a symlink)"
- attribute_issue "Dotfiles stow failed" "archsetup"
- return 1
- fi
-
- # 2. Check symlink points to correct location. archsetup now clones the
- # dotfiles repo to ~/.dotfiles and stows from there (DOTFILES_DIR default).
- # Which tree owns .zshrc depends on DESKTOP_ENV: none stows the standalone
- # minimal/ tree; dwm and hyprland stow common/.
- local target=$(ssh_cmd "readlink /home/cjennings/.zshrc")
- local desktop_env=$(sed -n 's/^DESKTOP_ENV=//p' "$ARCHSETUP_VM_CONF" 2>/dev/null | head -n1)
- local expected_pattern=".dotfiles/common/.zshrc"
- [ "$desktop_env" = "none" ] && expected_pattern=".dotfiles/minimal/.zshrc"
-
- if ! echo "$target" | grep -q "$expected_pattern"; then
- validation_fail "Dotfiles symlink points to wrong location: $target"
- attribute_issue "Dotfiles symlink incorrect: $target" "archsetup"
- return 1
- fi
-
- # 3. Check the target file actually exists (not a broken symlink)
- if ! ssh_cmd "test -f /home/cjennings/.zshrc"; then
- validation_fail "Dotfiles symlink is broken (target doesn't exist)"
- ssh_cmd "ls -la /home/cjennings/.zshrc" >> "$LOGFILE" 2>&1
- attribute_issue "Dotfiles symlink broken" "archsetup"
- return 1
- fi
-
- # 4. Check user can actually read the file (not just root)
- local result=$(ssh_cmd "sudo -u cjennings cat /home/cjennings/.zshrc > /dev/null 2>&1 && echo OK || echo FAIL")
- if [ "$result" != "OK" ]; then
- validation_fail "Dotfiles not readable by user (permission issue)"
- ssh_cmd "ls -la /home/cjennings/.zshrc" >> "$LOGFILE" 2>&1
- attribute_issue "Dotfiles not readable by user" "archsetup"
- return 1
- fi
-
- validation_pass "Dotfiles configured correctly (symlink to $target, readable by user)"
-}
-
-#-----------------------------------------------------------------------------
-# Package Manager Validations
-#-----------------------------------------------------------------------------
-
-validate_yay_installed() {
- step "Checking if yay (AUR helper) is installed and functional"
-
- # Check binary exists
- if ! ssh_cmd "which yay" &>> "$LOGFILE"; then
- validation_fail "yay not found"
- attribute_issue "yay not installed" "archsetup"
- return 1
- fi
-
- # Check yay can query packages (functional test)
- if ssh_cmd "sudo -u cjennings yay -Qi yay" &>> "$LOGFILE"; then
- validation_pass "yay is installed and functional"
- else
- validation_fail "yay binary exists but query failed"
- attribute_issue "yay not functional" "archsetup"
- fi
-}
-
-validate_pacman_working() {
- step "Checking if pacman is functional"
- if ssh_cmd "pacman -Qi base" &>> "$LOGFILE"; then
- validation_pass "pacman is functional"
- else
- validation_fail "pacman query failed"
- attribute_issue "pacman not functional" "unknown"
- fi
-}
-
-#-----------------------------------------------------------------------------
-# Window Manager Validations
-#-----------------------------------------------------------------------------
-
-validate_suckless_tools() {
- step "Checking suckless tools (dwm, st, dmenu, slock)"
- local missing=""
-
- for tool in dwm st dmenu slock; do
- if ! ssh_cmd "test -f /usr/local/bin/$tool"; then
- missing="$missing $tool"
- fi
- done
-
- if [ -z "$missing" ]; then
- validation_pass "All suckless tools installed (dwm, st, dmenu, slock)"
- else
- validation_fail "Missing suckless tools:$missing"
- attribute_issue "Missing suckless tools:$missing" "archsetup"
- fi
-}
-
-validate_hyprland_tools() {
- step "Checking Hyprland tools"
- local missing=""
-
- # Check core Hyprland packages
- for pkg in hyprland hypridle hyprlock waybar fuzzel swww grim slurp gammastep foot; do
- if ! ssh_cmd "pacman -Q $pkg &>/dev/null"; then
- missing="$missing $pkg"
- fi
- done
-
- if [ -z "$missing" ]; then
- validation_pass "All Hyprland tools installed"
- else
- validation_fail "Missing Hyprland tools:$missing"
- attribute_issue "Missing Hyprland tools:$missing" "archsetup"
- fi
-}
-
-validate_hyprland_config() {
- step "Checking Hyprland configuration files"
- local missing=""
-
- for config in ".config/hypr/hyprland.conf" ".config/hypr/hypridle.conf" \
- ".config/hypr/hyprlock.conf" ".config/waybar/config" \
- ".config/fuzzel/fuzzel.ini" ".config/gammastep/config.ini"; do
- if ! ssh_cmd "test -f /home/cjennings/$config"; then
- missing="$missing $config"
- fi
- done
-
- if [ -z "$missing" ]; then
- validation_pass "All Hyprland config files present"
- else
- validation_fail "Missing Hyprland configs:$missing"
- attribute_issue "Missing Hyprland configs:$missing" "archsetup"
- fi
-}
-
-validate_hyprland_socket() {
- step "Checking Hyprland IPC socket"
- # The socket only exists while the compositor runs. In the headless test
- # VM nobody logs in graphically, so a missing socket with no Hyprland
- # process is the expected state, not a finding.
- if ssh_cmd "test -S /tmp/hypr/*/.socket.sock 2>/dev/null"; then
- validation_pass "Hyprland socket exists"
- elif ! ssh_cmd "pgrep -x Hyprland >/dev/null 2>&1"; then
- validation_skip "Hyprland not running (headless) — socket check not applicable"
- else
- validation_warn "Hyprland running but IPC socket not found"
- fi
-}
-
-validate_portal_dark_mode() {
- step "Checking Settings portal returns dark mode"
-
- # Check portals.conf exists and uses gtk for Settings
- local portals_conf="/home/cjennings/.config/xdg-desktop-portal/portals.conf"
- if ! ssh_cmd "test -f $portals_conf"; then
- validation_fail "portals.conf not found"
- attribute_issue "xdg-desktop-portal portals.conf missing" "archsetup"
- return 1
- fi
-
- local settings_backend=$(ssh_cmd "grep 'org.freedesktop.impl.portal.Settings' $portals_conf 2>/dev/null | cut -d= -f2")
- if [ "$settings_backend" = "none" ]; then
- validation_fail "Settings portal disabled (set to 'none')"
- attribute_issue "Settings portal disabled in portals.conf" "archsetup"
- return 1
- fi
-
- # Query the portal for color-scheme (requires portal services running)
- # Returns "v v u 1" for dark mode (1 = prefer-dark)
- local color_scheme=$(ssh_cmd "sudo -u cjennings busctl --user call org.freedesktop.portal.Desktop /org/freedesktop/portal/desktop org.freedesktop.portal.Settings Read 'ss' 'org.freedesktop.appearance' 'color-scheme' 2>/dev/null | grep -o 'u [0-9]' | cut -d' ' -f2")
-
- if [ "$color_scheme" = "1" ]; then
- validation_pass "Settings portal returns dark mode (color-scheme=1)"
- elif [ -z "$color_scheme" ] && ! ssh_cmd "pgrep -x Hyprland >/dev/null 2>&1"; then
- # No compositor → no graphical session bus to query. A socket-activated
- # xdg-desktop-portal process can exist even headless, so the compositor
- # is the real precondition (same condition as the socket check). The
- # conf-file checks above already validated what install controls.
- validation_skip "No compositor running (headless) — portal query not applicable"
- elif [ -z "$color_scheme" ]; then
- validation_warn "Could not query Settings portal (portal may not be running)"
- else
- validation_fail "Settings portal not returning dark mode (color-scheme=$color_scheme, expected 1)"
- attribute_issue "Settings portal not configured for dark mode" "archsetup"
- fi
-}
-
-validate_window_manager() {
- # Detect which desktop environment is installed and validate accordingly
- if ssh_cmd "pacman -Q hyprland &>/dev/null"; then
- section "Hyprland Desktop Environment"
- validate_hyprland_tools
- validate_hyprland_config
- validate_hyprland_socket
- validate_portal_dark_mode
- elif ssh_cmd "test -f /usr/local/bin/dwm"; then
- section "DWM Desktop Environment"
- validate_suckless_tools
- else
- validation_warn "No window manager detected (DESKTOP_ENV=none?)"
- fi
-}
-
-#-----------------------------------------------------------------------------
-# Essential Services Validations
-#-----------------------------------------------------------------------------
-
-validate_firewall() {
- step "Checking if firewall (ufw) is enabled"
- local status=$(ssh_cmd "systemctl is-enabled ufw.service 2>/dev/null || echo disabled")
- if [ "$status" = "enabled" ]; then
- validation_pass "UFW firewall is enabled"
- else
- validation_fail "UFW firewall not enabled"
- attribute_issue "UFW not enabled" "archsetup"
- fi
-}
-
-validate_dns_config() {
- step "Checking DNS-over-TLS configuration"
- if ssh_cmd "grep -q 'DNS=.*#' /etc/systemd/resolved.conf 2>/dev/null"; then
- validation_pass "DNS-over-TLS configured"
- else
- validation_warn "DNS-over-TLS may not be configured"
- fi
-}
-
-validate_avahi() {
- step "Checking avahi-daemon status"
- local status=$(ssh_cmd "systemctl is-enabled avahi-daemon.service 2>/dev/null || echo disabled")
- if [ "$status" = "enabled" ]; then
- validation_pass "avahi-daemon is enabled"
-
- # Full-stack mDNS test: ping hostname.local. QEMU user-mode (slirp,
- # 10.0.2.x) doesn't pass multicast, so mDNS genuinely can't resolve
- # there — only run the ping on real networking.
- if ssh_cmd "ip -4 addr show" 2>/dev/null | grep -q "10\.0\.2\."; then
- validation_skip "mDNS ping not possible on slirp networking (no multicast)"
- else
- local hostname=$(ssh_cmd "hostname")
- if ssh_cmd "ping -c 1 -W 2 ${hostname}.local" &>> "$LOGFILE"; then
- validation_pass "mDNS working (${hostname}.local responds to ping)"
- else
- validation_warn "mDNS ping failed (avahi may need time to propagate)"
- fi
- fi
- else
- # This might be OK if avahi was pre-installed
- validation_warn "avahi-daemon not enabled (may have been pre-configured)"
- fi
-}
-
-validate_fail2ban() {
- step "Checking fail2ban status"
- local status=$(ssh_cmd "systemctl is-enabled fail2ban.service 2>/dev/null || echo disabled")
- if [ "$status" = "enabled" ]; then
- validation_pass "fail2ban is enabled"
- else
- validation_fail "fail2ban not enabled"
- attribute_issue "fail2ban not enabled" "archsetup"
- fi
-}
-
-validate_networkmanager() {
- step "Checking NetworkManager status"
- local status=$(ssh_cmd "systemctl is-enabled NetworkManager.service 2>/dev/null || echo disabled")
- if [ "$status" = "enabled" ]; then
- validation_pass "NetworkManager is enabled"
- # Functional test
- if ssh_cmd "nmcli general status" &>> "$LOGFILE"; then
- validation_pass "NetworkManager is functional"
- else
- validation_warn "NetworkManager enabled but not responding"
- fi
- else
- validation_fail "NetworkManager not enabled"
- attribute_issue "NetworkManager not enabled" "archsetup"
- fi
-}
-
-#-----------------------------------------------------------------------------
-# Service-Specific Validations
-#-----------------------------------------------------------------------------
-
-validate_all_services() {
- section "Service Validations"
-
- # Core services (always expected)
- validate_service "sshd" "enabled" "active"
- validate_service "systemd-resolved" "enabled" "active"
- validate_service "ufw" "enabled" "" # VM lacks iptables modules, can't be active
- validate_service "fail2ban" "enabled" "active"
- validate_service "NetworkManager" "enabled" "active"
- validate_service "rngd" "enabled" "active"
- validate_service "cronie" "enabled" ""
- validate_service "atd" "enabled" ""
-
- # Cron job: log cleanup
- step "Checking log-cleanup cron job"
- local crontab_entry=$(ssh_cmd "sudo -u cjennings crontab -l 2>/dev/null | grep log-cleanup")
- if [ -n "$crontab_entry" ]; then
- validation_pass "log-cleanup cron job installed"
- else
- validation_fail "log-cleanup cron job not in crontab"
- attribute_issue "log-cleanup cron job missing from user crontab" "archsetup"
- fi
-
- # Timer services
- validate_service "reflector.timer" "enabled" ""
- validate_service "paccache.timer" "enabled" ""
-
- # Optional services (warn if missing, don't fail)
- validate_service_optional "avahi-daemon" "enabled"
- validate_service_optional "bluetooth" "enabled"
- validate_service_optional "cups" "enabled"
- validate_service_optional "docker" "enabled"
- validate_service_optional "tailscaled" "enabled"
- # Syncthing uses user service (not system), check lingering is enabled.
- # test -e, not ls: ls prints the path on success, so the old capture held
- # "path\nyes" and could never equal "yes" — the check warned on every
- # run, even with lingering correctly enabled.
- step "Checking user lingering for syncthing"
- local linger_enabled=$(ssh_cmd "test -e /var/lib/systemd/linger/cjennings && echo yes || echo no")
- if [ "$linger_enabled" = "yes" ]; then
- validation_pass "User lingering enabled for syncthing user service"
- else
- validation_warn "User lingering not enabled (syncthing may not autostart)"
- fi
-
- # Filesystem-specific
- validate_zfs_services
- validate_btrfs_services
-
- # Functional tests
- validate_service_functions
-}
-
-validate_service() {
- local service="$1"
- local expected_enabled="$2" # "enabled" or ""
- local expected_active="$3" # "active" or ""
-
- step "Checking $service"
-
- if [ -n "$expected_enabled" ]; then
- local enabled=$(ssh_cmd "systemctl is-enabled $service 2>/dev/null || echo disabled")
- if [ "$enabled" = "enabled" ]; then
- validation_pass "$service is enabled"
- else
- validation_fail "$service not enabled (got: $enabled)"
- attribute_issue "$service not enabled" "archsetup"
- return 1
- fi
- fi
-
- if [ -n "$expected_active" ]; then
- local active=$(ssh_cmd "systemctl is-active $service 2>/dev/null || echo inactive")
- if [ "$active" = "active" ]; then
- validation_pass "$service is active"
- else
- validation_fail "$service not active (got: $active)"
- attribute_issue "$service not active" "archsetup"
- return 1
- fi
- fi
-
- return 0
-}
-
-validate_service_optional() {
- local service="$1"
- local expected_enabled="$2"
-
- step "Checking optional service: $service"
-
- local enabled=$(ssh_cmd "systemctl is-enabled $service 2>/dev/null || echo disabled")
- if [ "$enabled" = "enabled" ]; then
- validation_pass "$service is enabled"
- else
- validation_warn "$service not enabled (optional)"
- fi
-}
-
-validate_zfs_services() {
- # Only check if ZFS is installed
- if ! ssh_cmd "which zfs" &>> "$LOGFILE"; then
- return 0
- fi
-
- step "Checking ZFS-specific services"
-
- validate_service_optional "sanoid.timer" "enabled"
-
- # Check for zfs-scrub timer (pool name varies)
- local scrub_enabled
- scrub_enabled=$(ssh_cmd "systemctl list-unit-files 'zfs-scrub*' 2>/dev/null | grep -c enabled" | tr -d '[:space:]')
- scrub_enabled=${scrub_enabled:-0}
- if [ "$scrub_enabled" -gt 0 ]; then
- validation_pass "ZFS scrub timer enabled"
- else
- validation_warn "ZFS scrub timer not found"
- fi
-}
-
-validate_btrfs_services() {
- # Only check if btrfs root
- if ! ssh_cmd "mount | grep 'on / ' | grep -q btrfs"; then
- return 0
- fi
-
- step "Checking btrfs-specific services"
- validate_service_optional "grub-btrfsd" "enabled"
-}
-
-validate_service_functions() {
- section "Service Functional Tests"
-
- # UFW functional test
- # NOTE: VM environment lacks iptables kernel modules, so UFW cannot activate.
- # We only verify it's enabled; active status requires real hardware.
- step "Testing UFW functionality"
- local ufw_enabled
- ufw_enabled=$(ssh_cmd "systemctl is-enabled ufw.service 2>/dev/null || echo disabled")
- if [ "$ufw_enabled" = "enabled" ]; then
- validation_pass "UFW is enabled (activation requires iptables kernel modules)"
- else
- validation_fail "UFW not enabled"
- attribute_issue "UFW not enabled" "archsetup"
- fi
-
- # fail2ban functional test
- step "Testing fail2ban functionality"
- if ssh_cmd "fail2ban-client status" &>> "$LOGFILE"; then
- validation_pass "fail2ban is responding"
- else
- validation_fail "fail2ban not responding"
- attribute_issue "fail2ban not functioning" "archsetup"
- fi
-
- # DNS resolution test
- step "Testing DNS resolution"
- if ssh_cmd "resolvectl query archlinux.org" &>> "$LOGFILE"; then
- validation_pass "DNS resolution working"
- else
- validation_warn "DNS resolution test failed (may be network issue)"
- fi
-
- # Docker functional test (if enabled)
- if ssh_cmd "systemctl is-enabled docker" &>> "$LOGFILE"; then
- step "Testing Docker functionality"
- if ssh_cmd "docker info" &>> "$LOGFILE"; then
- validation_pass "Docker is responding"
- elif ! ssh_cmd "systemctl is-active --quiet docker"; then
- # archsetup enables docker for next boot (enable, not enable --now,
- # by design — the daemon is heavy). Validation runs pre-reboot, so
- # enabled-but-not-started is the correct installed state.
- validation_skip "Docker enabled but not started (starts on boot by design)"
- else
- validation_warn "Docker active but not responding"
- fi
- fi
-}
-
-#-----------------------------------------------------------------------------
-# Developer Tools Validations
-#-----------------------------------------------------------------------------
-
-validate_emacs() {
- step "Checking if Emacs is installed"
- if ssh_cmd "which emacs" &>> "$LOGFILE"; then
- validation_pass "Emacs is installed"
-
- # Check if config exists
- if ssh_cmd "test -d /home/cjennings/.emacs.d"; then
- validation_pass "Emacs config directory exists"
-
- # Check user can access the directory
- local result
- result=$(ssh_cmd "sudo -u cjennings ls /home/cjennings/.emacs.d > /dev/null 2>&1 && echo OK || echo FAIL")
- if [ "$result" = "OK" ]; then
- validation_pass "Emacs config readable by user"
- else
- validation_fail "Emacs config not readable by user (permission issue)"
- attribute_issue "Emacs .emacs.d not readable by user" "archsetup"
- fi
- else
- validation_warn "Emacs config directory not found"
- fi
- else
- validation_fail "Emacs not found"
- attribute_issue "Emacs not installed" "archsetup"
- fi
-}
-
-validate_git_config() {
- step "Checking git installation"
- if ssh_cmd "which git" &>> "$LOGFILE"; then
- validation_pass "git is installed"
- else
- validation_fail "git not found"
- attribute_issue "git not installed" "archsetup"
- fi
-}
-
-validate_dev_tools() {
- step "Checking developer tools"
- local tools="python node npm go rustc"
- local missing=""
-
- for tool in $tools; do
- if ! ssh_cmd "which $tool" &>> "$LOGFILE"; then
- missing="$missing $tool"
- fi
- done
-
- if [ -z "$missing" ]; then
- validation_pass "Core dev tools installed"
- else
- validation_warn "Some dev tools missing:$missing"
- fi
-}
-
-#-----------------------------------------------------------------------------
-# System Configuration Validations
-#-----------------------------------------------------------------------------
-
-validate_zfs_config() {
- step "Checking ZFS configuration (if applicable)"
- if ssh_cmd "which zfs" &>> "$LOGFILE"; then
- # ZFS is installed, check for sanoid
- if ssh_cmd "which sanoid" &>> "$LOGFILE"; then
- validation_pass "ZFS with sanoid detected"
- else
- validation_warn "ZFS detected but sanoid not installed"
- fi
- else
- info "ZFS not installed (non-ZFS system)"
- fi
-}
-
-validate_boot_config() {
- step "Checking GRUB configuration"
- if ssh_cmd "test -f /boot/grub/grub.cfg" &>> "$LOGFILE"; then
- validation_pass "GRUB config exists"
- else
- validation_warn "GRUB config not found (may use different bootloader)"
- fi
-}
-
-validate_terminus_font() {
- step "Checking terminus-font installation"
- if ssh_cmd "pacman -Q terminus-font" &>> "$LOGFILE"; then
- validation_pass "terminus-font package installed"
- else
- validation_fail "terminus-font package not installed"
- attribute_issue "terminus-font not installed via pacman" "archsetup"
- fi
-}
-
-validate_mkinitcpio_hooks() {
- step "Checking mkinitcpio HOOKS configuration"
- local hooks=$(ssh_cmd "grep '^HOOKS=' /etc/mkinitcpio.conf")
- local is_zfs=$(ssh_cmd "findmnt -n -o FSTYPE / 2>/dev/null")
-
- if [ "$is_zfs" = "zfs" ]; then
- # ZFS system: must use udev, not systemd
- if echo "$hooks" | grep -q '\budev\b'; then
- validation_pass "ZFS system uses udev hook (correct)"
- elif echo "$hooks" | grep -q '\bsystemd\b'; then
- validation_fail "ZFS system uses systemd hook (will break boot)"
- attribute_issue "mkinitcpio uses systemd hook on ZFS system" "archsetup"
- else
- validation_warn "Could not determine init hook type"
- fi
- else
- # Non-ZFS: systemd hook is fine
- if echo "$hooks" | grep -q '\bsystemd\b'; then
- validation_pass "Non-ZFS system uses systemd hook"
- elif echo "$hooks" | grep -q '\budev\b'; then
- validation_pass "Non-ZFS system uses udev hook"
- fi
- fi
-}
-
-validate_initramfs_consolefont() {
- step "Checking console font in initramfs"
- local font_in_initramfs=$(ssh_cmd "lsinitcpio /boot/initramfs-linux*.img 2>/dev/null | grep -c 'consolefont.psf\\|ter-'")
-
- if [ "${font_in_initramfs:-0}" -gt 0 ]; then
- validation_pass "Console font included in initramfs"
- else
- validation_warn "Console font may not be in initramfs"
- fi
-}
-
-validate_nvme_module() {
- step "Checking NVMe module configuration"
- local has_nvme=$(ssh_cmd "ls /dev/nvme* 2>/dev/null | head -1")
-
- if [ -n "$has_nvme" ]; then
- # System has NVMe drives
- local modules=$(ssh_cmd "grep '^MODULES=' /etc/mkinitcpio.conf")
- if echo "$modules" | grep -q 'nvme'; then
- validation_pass "NVMe module in mkinitcpio MODULES"
- else
- validation_warn "NVMe system but nvme not in MODULES (may cause slow boot)"
- fi
- else
- info "No NVMe drives detected"
- fi
-}
-
-validate_autologin_config() {
- step "Checking autologin configuration"
- if ssh_cmd "test -f /etc/systemd/system/getty@tty1.service.d/autologin.conf" &>> "$LOGFILE"; then
- validation_pass "Autologin configured"
- else
- info "Autologin not configured (may be intentional)"
- fi
-}
-
-validate_gnome_keyring_setup() {
- step "Checking gnome-keyring pre-configuration"
- local keyring_dir="/home/cjennings/.local/share/keyrings"
-
- # Check directory exists
- if ! ssh_cmd "test -d $keyring_dir"; then
- validation_fail "Keyring directory not created"
- attribute_issue "gnome-keyring directory not pre-created" "archsetup"
- return 1
- fi
-
- # Check directory permissions (should be 700)
- local perms=$(ssh_cmd "stat -c '%a' $keyring_dir")
- if [ "$perms" != "700" ]; then
- validation_fail "Keyring directory has wrong permissions: $perms (expected 700)"
- attribute_issue "gnome-keyring directory wrong permissions" "archsetup"
- return 1
- fi
-
- # Check ownership
- local owner=$(ssh_cmd "stat -c '%U' $keyring_dir")
- if [ "$owner" != "cjennings" ]; then
- validation_fail "Keyring directory owned by $owner (expected cjennings)"
- attribute_issue "gnome-keyring directory wrong ownership" "archsetup"
- return 1
- fi
-
- # Check default file exists and contains "login"
- local default_keyring=$(ssh_cmd "cat $keyring_dir/default 2>/dev/null")
- if [ "$default_keyring" != "login" ]; then
- validation_fail "Default keyring not set to 'login' (got: '$default_keyring')"
- attribute_issue "gnome-keyring default not set to login" "archsetup"
- return 1
- fi
-
- validation_pass "gnome-keyring pre-configured (default=login, dir=700)"
-}
-
-#-----------------------------------------------------------------------------
-# Archsetup-Specific Validations
-#-----------------------------------------------------------------------------
-
-validate_archsetup_log() {
- step "Checking archsetup log for errors"
- local error_count
- # Use grep -h to suppress filenames, then wc -l to count total matches
- error_count=$(ssh_cmd "grep -h '^Error:' /var/log/archsetup-*.log 2>/dev/null | wc -l" | tr -d '[:space:]')
- error_count=${error_count:-0}
-
- if [ "$error_count" = "0" ]; then
- validation_pass "No errors in archsetup log"
- else
- validation_fail "Found $error_count errors in archsetup log"
- attribute_issue "Errors in archsetup log: $error_count" "archsetup"
- fi
-}
-
-validate_state_markers() {
- step "Checking archsetup state markers"
- local state_count=$(ssh_cmd "ls /var/lib/archsetup/state/ 2>/dev/null | wc -l")
-
- if [ "$state_count" -ge 12 ]; then
- validation_pass "All 12 installation steps completed"
- else
- validation_warn "Only $state_count/12 steps completed"
- fi
-}
-
-#=============================================================================
# ISSUE REPORTING
#=============================================================================
@@ -1138,18 +312,3 @@ EOF
info "Issue report saved: $report_file"
}
-#=============================================================================
-# MAIN VALIDATION ENTRY POINT
-#=============================================================================
-
-run_full_validation() {
- local output_dir="$1"
- local archzfs_inbox="${2:-}"
-
- run_all_validations
- analyze_log_diff "$output_dir"
- generate_issue_report "$output_dir" "$archzfs_inbox"
-
- # Return success if no failures
- [ $VALIDATION_FAILED -eq 0 ]
-}
diff --git a/scripts/testing/lib/vm-utils.sh b/scripts/testing/lib/vm-utils.sh
index a8736a3..b85e773 100755
--- a/scripts/testing/lib/vm-utils.sh
+++ b/scripts/testing/lib/vm-utils.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0-or-later
# VM management utilities for archsetup testing (direct QEMU)
# Author: Craig Jennings <craigmartinjennings@gmail.com>
# License: GNU GPLv3
@@ -10,13 +11,26 @@
# VM configuration defaults
VM_CPUS="${VM_CPUS:-4}"
-VM_RAM="${VM_RAM:-4096}" # MB
+# 8 GiB headroom for AUR builds: makepkg runs -j$VM_CPUS, and parallel cc1plus
+# (~700 MB each on heavy C++ packages) OOM-killed under the old 4 GiB default.
+VM_RAM="${VM_RAM:-8192}" # MB
VM_DISK_SIZE="${VM_DISK_SIZE:-50}" # GB
+# Filesystem profile: selects which base image + archangel config the harness
+# targets. "btrfs" is the historical default (its image name stays unsuffixed
+# so existing base images keep working); "zfs" gets its own image, since the
+# two on-disk layouts can't share a disk. Honoured by init_vm_paths below.
+FS_PROFILE="${FS_PROFILE:-btrfs}"
+
# SSH configuration
SSH_PORT="${SSH_PORT:-2222}"
SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10"
ROOT_PASSWORD="${ROOT_PASSWORD:-archsetup}"
+# Set by inject_root_key once a root key is authorized in the VM. When set, the
+# ssh/scp helpers add "-i <key>" so they keep working after archsetup hardens
+# sshd to PermitRootLogin prohibit-password (which kills root *password* login
+# but still allows key auth). Left unquoted at use sites, like SSH_OPTS.
+SSH_KEY_OPT="${SSH_KEY_OPT:-}"
# OVMF firmware paths
OVMF_CODE="/usr/share/edk2/x64/OVMF_CODE.4m.fd"
@@ -36,9 +50,22 @@ init_vm_paths() {
local images_dir="${1:-$VM_IMAGES_DIR}"
[ -z "$images_dir" ] && fatal "VM_IMAGES_DIR not set"
+ case "$FS_PROFILE" in
+ btrfs|zfs) ;;
+ *) fatal "Invalid FS_PROFILE: $FS_PROFILE (must be 'btrfs' or 'zfs')" ;;
+ esac
+
VM_IMAGES_DIR="$images_dir"
- DISK_PATH="$VM_IMAGES_DIR/archsetup-base.qcow2"
- OVMF_VARS="$VM_IMAGES_DIR/OVMF_VARS.fd"
+ # btrfs keeps the legacy unsuffixed name; other profiles get a suffix so
+ # their images sit side by side without clobbering each other.
+ local img_suffix=""
+ [ "$FS_PROFILE" != "btrfs" ] && img_suffix="-$FS_PROFILE"
+ DISK_PATH="$VM_IMAGES_DIR/archsetup-base${img_suffix}.qcow2"
+ # Per-profile NVRAM: UEFI boot entries live here, outside the qcow2, so a
+ # disk-snapshot revert can't restore them. Sharing one file across profiles
+ # let a zfs run's ZFSBootMenu entries clobber the btrfs GRUB entry, leaving
+ # the btrfs base unbootable (no removable ESP fallback to recover from).
+ OVMF_VARS="$VM_IMAGES_DIR/OVMF_VARS${img_suffix}.fd"
PID_FILE="$VM_IMAGES_DIR/qemu.pid"
MONITOR_SOCK="$VM_IMAGES_DIR/qemu-monitor.sock"
SERIAL_LOG="$VM_IMAGES_DIR/qemu-serial.log"
@@ -350,7 +377,7 @@ wait_for_ssh() {
progress "Waiting for SSH on localhost:$SSH_PORT..."
while [ "$elapsed" -lt "$timeout" ]; do
- if sshpass -p "$password" ssh $SSH_OPTS -p "$SSH_PORT" root@localhost true 2>/dev/null; then
+ if sshpass -p "$password" ssh $SSH_OPTS $SSH_KEY_OPT -p "$SSH_PORT" root@localhost true 2>/dev/null; then
success "SSH is available"
return 0
fi
@@ -366,7 +393,7 @@ wait_for_ssh() {
vm_exec() {
local password="${1:-$ROOT_PASSWORD}"
shift
- sshpass -p "$password" ssh $SSH_OPTS \
+ sshpass -p "$password" ssh $SSH_OPTS $SSH_KEY_OPT \
-o ServerAliveInterval=30 -o ServerAliveCountMax=10 \
-p "$SSH_PORT" root@localhost "$@" 2>> "$LOGFILE"
}
@@ -378,7 +405,7 @@ copy_to_vm() {
local password="${3:-$ROOT_PASSWORD}"
step "Copying $(basename "$local_file") to VM:$remote_path"
- if sshpass -p "$password" scp $SSH_OPTS -P "$SSH_PORT" \
+ if sshpass -p "$password" scp $SSH_OPTS $SSH_KEY_OPT -P "$SSH_PORT" \
"$local_file" "root@localhost:$remote_path" >> "$LOGFILE" 2>&1; then
success "File copied to VM"
return 0
@@ -395,7 +422,7 @@ copy_from_vm() {
local password="${3:-$ROOT_PASSWORD}"
step "Copying $remote_file from VM"
- if sshpass -p "$password" scp $SSH_OPTS -P "$SSH_PORT" \
+ if sshpass -p "$password" scp $SSH_OPTS $SSH_KEY_OPT -P "$SSH_PORT" \
"root@localhost:$remote_file" "$local_path" >> "$LOGFILE" 2>&1; then
success "File copied from VM"
return 0
@@ -404,3 +431,31 @@ copy_from_vm() {
return 1
fi
}
+
+# inject_root_key <key_path>
+# Authorize a throwaway root key over the initial password session and switch
+# all the helpers above to key auth (sets SSH_KEY_OPT + ROOT_SSH_KEY). Call once,
+# right after wait_for_ssh and before running archsetup: archsetup sets
+# PermitRootLogin prohibit-password and reloads sshd partway through, which kills
+# root *password* login. Without a key in place first, every SSH after that step
+# fails and the run aborts before any validation. Key auth survives the hardening.
+# Targets root@$VM_IP on $SSH_PORT so it works for both the local VM runner
+# (VM_IP=localhost, port 2222) and the bare-metal runner (VM_IP=host, port 22).
+inject_root_key() {
+ local key="$1"
+ rm -f "$key" "$key.pub"
+ if ! ssh-keygen -t ed25519 -N "" -q -f "$key"; then
+ warn "Root key generation failed - run may break at sshd hardening"
+ return 1
+ fi
+ if sshpass -p "$ROOT_PASSWORD" ssh $SSH_OPTS -p "$SSH_PORT" "root@${VM_IP:-localhost}" \
+ "mkdir -p /root/.ssh && chmod 700 /root/.ssh && cat >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys" \
+ < "$key.pub" >> "$LOGFILE" 2>&1; then
+ SSH_KEY_OPT="-i $key"
+ export ROOT_SSH_KEY="$key"
+ success "Root SSH key authorized (survives sshd prohibit-password hardening)"
+ return 0
+ fi
+ warn "Root key authorization failed - run may break at sshd hardening"
+ return 1
+}