diff options
Diffstat (limited to 'scripts/testing/lib')
| -rwxr-xr-x | scripts/testing/lib/logging.sh | 1 | ||||
| -rw-r--r-- | scripts/testing/lib/network-diagnostics.sh | 1 | ||||
| -rw-r--r-- | scripts/testing/lib/testinfra.sh | 120 | ||||
| -rw-r--r-- | scripts/testing/lib/validation.sh | 845 | ||||
| -rwxr-xr-x | scripts/testing/lib/vm-utils.sh | 69 |
5 files changed, 186 insertions, 850 deletions
diff --git a/scripts/testing/lib/logging.sh b/scripts/testing/lib/logging.sh index ed20707..809d396 100755 --- a/scripts/testing/lib/logging.sh +++ b/scripts/testing/lib/logging.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # Logging utilities for archsetup testing # Author: Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 diff --git a/scripts/testing/lib/network-diagnostics.sh b/scripts/testing/lib/network-diagnostics.sh index 674aeba..38788e5 100644 --- a/scripts/testing/lib/network-diagnostics.sh +++ b/scripts/testing/lib/network-diagnostics.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # Network diagnostics for VM testing # Author: Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 diff --git a/scripts/testing/lib/testinfra.sh b/scripts/testing/lib/testinfra.sh new file mode 100644 index 0000000..0822a9f --- /dev/null +++ b/scripts/testing/lib/testinfra.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later +# +# Testinfra post-install validation sweep (runs on the host, over SSH). +# +# This is the primary post-install validator (it replaced the shell +# run_all_validations sweep). It connects to the freshly-installed VM over SSH +# and runs the pytest suite under scripts/testing/tests/. Its result drives the +# run's pass/fail, and per-test failures are bucketed (archsetup / base_install +# / unknown) into the same issue-attribution report the shell sweep produced. +# +# Auth: reuse the root key the harness already authorized (inject_root_key), +# which survives the sshd prohibit-password hardening; mint our own only if the +# harness didn't (standalone use). pytest connects key-only via a generated +# ssh-config. Key + config live in the results dir and are discarded with it. +# +# Uses globals from run-test.sh / vm-utils.sh: SCRIPT_DIR, VM_IP, SSH_PORT, +# ROOT_PASSWORD, ROOT_SSH_KEY, ARCHSETUP_VM_CONF, plus the validation.sh +# helpers attribute_issue / VALIDATION_*. Toggle with RUN_TESTINFRA=false. + +# Record each pytest failure from the attribution file into the issue arrays +# (validation.sh's attribute_issue), so generate_issue_report covers them. +_testinfra_record_attribution() { + local file="$1" bucket="" + [ -f "$file" ] || return 0 + while IFS= read -r line; do + case "$line" in + "[archsetup]") bucket=archsetup ;; + "[base_install]") bucket=base ;; + "[unknown]") bucket=unknown ;; + " "*) attribute_issue "testinfra: ${line# }" "$bucket" ;; + esac + done < "$file" +} + +# run_testinfra_validation <results_dir> +# Returns 0 only when the pytest sweep ran and passed. Returns non-zero when it +# failed OR could not run (missing tooling / SSH setup) — a sweep that can't run +# is not a pass. RUN_TESTINFRA=false is the one explicit opt-out (returns 0). +run_testinfra_validation() { + local results_dir="$1" + local tests_dir="$SCRIPT_DIR/tests" + local key="$results_dir/testinfra_key" + local sshcfg="$results_dir/testinfra_ssh_config" + + if [ "${RUN_TESTINFRA:-true}" != "true" ]; then + warn "RUN_TESTINFRA=false - skipping the Testinfra validation sweep" + return 0 + fi + if ! command -v pytest >/dev/null 2>&1 || ! python3 -c 'import testinfra' >/dev/null 2>&1; then + error "Testinfra/pytest not installed on host - cannot validate (run: make deps)" + return 1 + fi + + section "Running Validation Checks (Testinfra)" + + # Prefer the harness's already-authorized root key; mint one if absent. + if [ -n "${ROOT_SSH_KEY:-}" ] && [ -f "${ROOT_SSH_KEY}" ]; then + key="$ROOT_SSH_KEY" + else + rm -f "$key" "$key.pub" + if ! ssh-keygen -t ed25519 -N "" -q -f "$key"; then + error "testinfra: ssh-keygen failed" + return 1 + fi + if ! copy_to_vm "$key.pub" "/tmp/testinfra_key.pub" "$ROOT_PASSWORD"; then + error "testinfra: pubkey copy failed" + return 1 + fi + if ! vm_exec "$ROOT_PASSWORD" \ + "mkdir -p /root/.ssh && chmod 700 /root/.ssh && cat /tmp/testinfra_key.pub >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"; then + error "testinfra: authorizing key in VM failed" + return 1 + fi + fi + + # ssh-config so testinfra connects key-only, no host-key prompt. + cat > "$sshcfg" <<EOF +Host testinfra-target + HostName ${VM_IP:-localhost} + Port ${SSH_PORT:-2222} + User root + IdentityFile $key + IdentitiesOnly yes + StrictHostKeyChecking no + UserKnownHostsFile /dev/null +EOF + + # The account archsetup created, for the tests that need it. + local test_user + test_user=$(sed -n 's/^USERNAME=//p' "$ARCHSETUP_VM_CONF" 2>/dev/null | head -n1) + : "${test_user:=cjennings}" + + local logf="$results_dir/testinfra.log" + ARCHSETUP_TEST_USER="$test_user" pytest "$tests_dir" \ + --hosts="ssh://testinfra-target" \ + --ssh-config="$sshcfg" \ + --attribution-file="$results_dir/testinfra-attribution.txt" \ + -v >> "$logf" 2>&1 + local rc=$? + + # Surface pytest's counts through the shared validation counters so the + # issue report summary is meaningful (the shell sweep no longer runs). + local summary + summary=$(grep -oE '[0-9]+ (passed|failed|error|errors|skipped)' "$logf" | tail -10) + VALIDATION_PASSED=$(echo "$summary" | awk '/passed/{print $1}' | tail -1); VALIDATION_PASSED=${VALIDATION_PASSED:-0} + VALIDATION_WARNINGS=$(echo "$summary" | awk '/skipped/{print $1}' | tail -1); VALIDATION_WARNINGS=${VALIDATION_WARNINGS:-0} + local nfail nerr + nfail=$(echo "$summary" | awk '/failed/{print $1}' | tail -1); nfail=${nfail:-0} + nerr=$(echo "$summary" | awk '/error/{print $1}' | tail -1); nerr=${nerr:-0} + VALIDATION_FAILED=$((nfail + nerr)) + + if [ "$rc" -eq 0 ]; then + success "Testinfra validation passed ($VALIDATION_PASSED passed, $VALIDATION_WARNINGS skipped)" + else + error "Testinfra validation failed ($VALIDATION_FAILED failed/error; see testinfra.log)" + _testinfra_record_attribution "$results_dir/testinfra-attribution.txt" + fi + return "$rc" +} diff --git a/scripts/testing/lib/validation.sh b/scripts/testing/lib/validation.sh index 91270ef..fa7ddcc 100644 --- a/scripts/testing/lib/validation.sh +++ b/scripts/testing/lib/validation.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # Validation utilities for archsetup testing # Author: Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 @@ -20,38 +21,7 @@ declare -a UNKNOWN_ISSUES # SSH helper (uses globals: VM_IP, ROOT_PASSWORD) ssh_cmd() { sshpass -p "$ROOT_PASSWORD" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ - -o ConnectTimeout=10 -p "${SSH_PORT:-22}" "root@$VM_IP" "$@" 2>/dev/null -} - -# Validation result helpers -validation_pass() { - local test_name="$1" - success "$test_name" - ((VALIDATION_PASSED++)) || true -} - -validation_fail() { - local test_name="$1" - local details="${2:-}" - error "$test_name" - [ -n "$details" ] && info " Details: $details" - ((VALIDATION_FAILED++)) || true -} - -validation_warn() { - local test_name="$1" - local details="${2:-}" - warn "$test_name" - [ -n "$details" ] && info " Details: $details" - ((VALIDATION_WARNINGS++)) || true -} - -# A check whose precondition can't hold in this environment (headless VM, -# slirp networking, pre-reboot state). Logged for the record, counted nowhere -# — a warning that fires on every run trains readers to ignore warnings. -validation_skip() { - local test_name="$1" - info "SKIP: $test_name" + -o ConnectTimeout=10 ${SSH_KEY_OPT:-} -p "${SSH_PORT:-22}" "root@$VM_IP" "$@" 2>/dev/null } # Attribute an issue to archsetup or base install @@ -264,802 +234,6 @@ categorize_errors() { } #============================================================================= -# VALIDATION CHECKS -#============================================================================= - -run_all_validations() { - section "Running Validation Checks" - - # User & Authentication - validate_user_created - validate_user_shell - validate_user_groups - - # Dotfiles - validate_dotfiles - - # Package Managers - validate_yay_installed - validate_pacman_working - - # Window Manager (detects DWM or Hyprland automatically) - validate_window_manager - - # Essential Services - validate_firewall - validate_dns_config - validate_avahi - validate_fail2ban - validate_networkmanager - - # Developer Tools - validate_emacs - validate_git_config - validate_dev_tools - - # System Configuration - validate_zfs_config - validate_boot_config - validate_autologin_config - validate_gnome_keyring_setup - - # Boot & Initramfs (critical for ZFS systems) - validate_terminus_font - validate_mkinitcpio_hooks - validate_initramfs_consolefont - validate_nvme_module - - # Archsetup Specific - validate_archsetup_log - validate_state_markers -} - -#----------------------------------------------------------------------------- -# User & Authentication Validations -#----------------------------------------------------------------------------- - -validate_user_created() { - step "Checking if user 'cjennings' exists" - if ssh_cmd "id cjennings" &>> "$LOGFILE"; then - validation_pass "User cjennings exists" - else - validation_fail "User cjennings not found" - attribute_issue "User cjennings not created" "archsetup" - fi -} - -validate_user_shell() { - step "Checking if ZSH is default shell" - local shell=$(ssh_cmd "getent passwd cjennings | cut -d: -f7") - if [ "$shell" = "/bin/zsh" ] || [ "$shell" = "/usr/bin/zsh" ]; then - validation_pass "ZSH is default shell" - else - validation_fail "ZSH not default shell (got: $shell)" - attribute_issue "ZSH not set as default shell" "archsetup" - fi -} - -validate_user_groups() { - step "Checking user group memberships" - # Groups added by archsetup: - # - wheel (useradd -G wheel) - # - sys,adm,network,scanner,power,uucp,audio,lp,rfkill,video,storage,optical,users (usermod -aG) - # - docker (gpasswd -a, added later in developer_workstation) - local expected_groups="wheel sys adm network scanner power uucp audio lp rfkill video storage optical users docker" - local missing_groups="" - - for group in $expected_groups; do - if ! ssh_cmd "groups cjennings" | grep -q "\b$group\b"; then - missing_groups="$missing_groups $group" - fi - done - - if [ -z "$missing_groups" ]; then - validation_pass "User in all expected groups (15 groups)" - else - validation_fail "User missing groups:$missing_groups" - attribute_issue "User missing groups:$missing_groups" "archsetup" - fi -} - -#----------------------------------------------------------------------------- -# Dotfiles Validations -#----------------------------------------------------------------------------- - -validate_dotfiles() { - step "Checking dotfiles setup" - - # 1. Check if .zshrc is a symlink - if ! ssh_cmd "test -L /home/cjennings/.zshrc"; then - validation_fail "Dotfiles not stowed (.zshrc is not a symlink)" - attribute_issue "Dotfiles stow failed" "archsetup" - return 1 - fi - - # 2. Check symlink points to correct location. archsetup now clones the - # dotfiles repo to ~/.dotfiles and stows from there (DOTFILES_DIR default). - # Which tree owns .zshrc depends on DESKTOP_ENV: none stows the standalone - # minimal/ tree; dwm and hyprland stow common/. - local target=$(ssh_cmd "readlink /home/cjennings/.zshrc") - local desktop_env=$(sed -n 's/^DESKTOP_ENV=//p' "$ARCHSETUP_VM_CONF" 2>/dev/null | head -n1) - local expected_pattern=".dotfiles/common/.zshrc" - [ "$desktop_env" = "none" ] && expected_pattern=".dotfiles/minimal/.zshrc" - - if ! echo "$target" | grep -q "$expected_pattern"; then - validation_fail "Dotfiles symlink points to wrong location: $target" - attribute_issue "Dotfiles symlink incorrect: $target" "archsetup" - return 1 - fi - - # 3. Check the target file actually exists (not a broken symlink) - if ! ssh_cmd "test -f /home/cjennings/.zshrc"; then - validation_fail "Dotfiles symlink is broken (target doesn't exist)" - ssh_cmd "ls -la /home/cjennings/.zshrc" >> "$LOGFILE" 2>&1 - attribute_issue "Dotfiles symlink broken" "archsetup" - return 1 - fi - - # 4. Check user can actually read the file (not just root) - local result=$(ssh_cmd "sudo -u cjennings cat /home/cjennings/.zshrc > /dev/null 2>&1 && echo OK || echo FAIL") - if [ "$result" != "OK" ]; then - validation_fail "Dotfiles not readable by user (permission issue)" - ssh_cmd "ls -la /home/cjennings/.zshrc" >> "$LOGFILE" 2>&1 - attribute_issue "Dotfiles not readable by user" "archsetup" - return 1 - fi - - validation_pass "Dotfiles configured correctly (symlink to $target, readable by user)" -} - -#----------------------------------------------------------------------------- -# Package Manager Validations -#----------------------------------------------------------------------------- - -validate_yay_installed() { - step "Checking if yay (AUR helper) is installed and functional" - - # Check binary exists - if ! ssh_cmd "which yay" &>> "$LOGFILE"; then - validation_fail "yay not found" - attribute_issue "yay not installed" "archsetup" - return 1 - fi - - # Check yay can query packages (functional test) - if ssh_cmd "sudo -u cjennings yay -Qi yay" &>> "$LOGFILE"; then - validation_pass "yay is installed and functional" - else - validation_fail "yay binary exists but query failed" - attribute_issue "yay not functional" "archsetup" - fi -} - -validate_pacman_working() { - step "Checking if pacman is functional" - if ssh_cmd "pacman -Qi base" &>> "$LOGFILE"; then - validation_pass "pacman is functional" - else - validation_fail "pacman query failed" - attribute_issue "pacman not functional" "unknown" - fi -} - -#----------------------------------------------------------------------------- -# Window Manager Validations -#----------------------------------------------------------------------------- - -validate_suckless_tools() { - step "Checking suckless tools (dwm, st, dmenu, slock)" - local missing="" - - for tool in dwm st dmenu slock; do - if ! ssh_cmd "test -f /usr/local/bin/$tool"; then - missing="$missing $tool" - fi - done - - if [ -z "$missing" ]; then - validation_pass "All suckless tools installed (dwm, st, dmenu, slock)" - else - validation_fail "Missing suckless tools:$missing" - attribute_issue "Missing suckless tools:$missing" "archsetup" - fi -} - -validate_hyprland_tools() { - step "Checking Hyprland tools" - local missing="" - - # Check core Hyprland packages - for pkg in hyprland hypridle hyprlock waybar fuzzel swww grim slurp gammastep foot; do - if ! ssh_cmd "pacman -Q $pkg &>/dev/null"; then - missing="$missing $pkg" - fi - done - - if [ -z "$missing" ]; then - validation_pass "All Hyprland tools installed" - else - validation_fail "Missing Hyprland tools:$missing" - attribute_issue "Missing Hyprland tools:$missing" "archsetup" - fi -} - -validate_hyprland_config() { - step "Checking Hyprland configuration files" - local missing="" - - for config in ".config/hypr/hyprland.conf" ".config/hypr/hypridle.conf" \ - ".config/hypr/hyprlock.conf" ".config/waybar/config" \ - ".config/fuzzel/fuzzel.ini" ".config/gammastep/config.ini"; do - if ! ssh_cmd "test -f /home/cjennings/$config"; then - missing="$missing $config" - fi - done - - if [ -z "$missing" ]; then - validation_pass "All Hyprland config files present" - else - validation_fail "Missing Hyprland configs:$missing" - attribute_issue "Missing Hyprland configs:$missing" "archsetup" - fi -} - -validate_hyprland_socket() { - step "Checking Hyprland IPC socket" - # The socket only exists while the compositor runs. In the headless test - # VM nobody logs in graphically, so a missing socket with no Hyprland - # process is the expected state, not a finding. - if ssh_cmd "test -S /tmp/hypr/*/.socket.sock 2>/dev/null"; then - validation_pass "Hyprland socket exists" - elif ! ssh_cmd "pgrep -x Hyprland >/dev/null 2>&1"; then - validation_skip "Hyprland not running (headless) — socket check not applicable" - else - validation_warn "Hyprland running but IPC socket not found" - fi -} - -validate_portal_dark_mode() { - step "Checking Settings portal returns dark mode" - - # Check portals.conf exists and uses gtk for Settings - local portals_conf="/home/cjennings/.config/xdg-desktop-portal/portals.conf" - if ! ssh_cmd "test -f $portals_conf"; then - validation_fail "portals.conf not found" - attribute_issue "xdg-desktop-portal portals.conf missing" "archsetup" - return 1 - fi - - local settings_backend=$(ssh_cmd "grep 'org.freedesktop.impl.portal.Settings' $portals_conf 2>/dev/null | cut -d= -f2") - if [ "$settings_backend" = "none" ]; then - validation_fail "Settings portal disabled (set to 'none')" - attribute_issue "Settings portal disabled in portals.conf" "archsetup" - return 1 - fi - - # Query the portal for color-scheme (requires portal services running) - # Returns "v v u 1" for dark mode (1 = prefer-dark) - local color_scheme=$(ssh_cmd "sudo -u cjennings busctl --user call org.freedesktop.portal.Desktop /org/freedesktop/portal/desktop org.freedesktop.portal.Settings Read 'ss' 'org.freedesktop.appearance' 'color-scheme' 2>/dev/null | grep -o 'u [0-9]' | cut -d' ' -f2") - - if [ "$color_scheme" = "1" ]; then - validation_pass "Settings portal returns dark mode (color-scheme=1)" - elif [ -z "$color_scheme" ] && ! ssh_cmd "pgrep -x Hyprland >/dev/null 2>&1"; then - # No compositor → no graphical session bus to query. A socket-activated - # xdg-desktop-portal process can exist even headless, so the compositor - # is the real precondition (same condition as the socket check). The - # conf-file checks above already validated what install controls. - validation_skip "No compositor running (headless) — portal query not applicable" - elif [ -z "$color_scheme" ]; then - validation_warn "Could not query Settings portal (portal may not be running)" - else - validation_fail "Settings portal not returning dark mode (color-scheme=$color_scheme, expected 1)" - attribute_issue "Settings portal not configured for dark mode" "archsetup" - fi -} - -validate_window_manager() { - # Detect which desktop environment is installed and validate accordingly - if ssh_cmd "pacman -Q hyprland &>/dev/null"; then - section "Hyprland Desktop Environment" - validate_hyprland_tools - validate_hyprland_config - validate_hyprland_socket - validate_portal_dark_mode - elif ssh_cmd "test -f /usr/local/bin/dwm"; then - section "DWM Desktop Environment" - validate_suckless_tools - else - validation_warn "No window manager detected (DESKTOP_ENV=none?)" - fi -} - -#----------------------------------------------------------------------------- -# Essential Services Validations -#----------------------------------------------------------------------------- - -validate_firewall() { - step "Checking if firewall (ufw) is enabled" - local status=$(ssh_cmd "systemctl is-enabled ufw.service 2>/dev/null || echo disabled") - if [ "$status" = "enabled" ]; then - validation_pass "UFW firewall is enabled" - else - validation_fail "UFW firewall not enabled" - attribute_issue "UFW not enabled" "archsetup" - fi -} - -validate_dns_config() { - step "Checking DNS-over-TLS configuration" - if ssh_cmd "grep -q 'DNS=.*#' /etc/systemd/resolved.conf 2>/dev/null"; then - validation_pass "DNS-over-TLS configured" - else - validation_warn "DNS-over-TLS may not be configured" - fi -} - -validate_avahi() { - step "Checking avahi-daemon status" - local status=$(ssh_cmd "systemctl is-enabled avahi-daemon.service 2>/dev/null || echo disabled") - if [ "$status" = "enabled" ]; then - validation_pass "avahi-daemon is enabled" - - # Full-stack mDNS test: ping hostname.local. QEMU user-mode (slirp, - # 10.0.2.x) doesn't pass multicast, so mDNS genuinely can't resolve - # there — only run the ping on real networking. - if ssh_cmd "ip -4 addr show" 2>/dev/null | grep -q "10\.0\.2\."; then - validation_skip "mDNS ping not possible on slirp networking (no multicast)" - else - local hostname=$(ssh_cmd "hostname") - if ssh_cmd "ping -c 1 -W 2 ${hostname}.local" &>> "$LOGFILE"; then - validation_pass "mDNS working (${hostname}.local responds to ping)" - else - validation_warn "mDNS ping failed (avahi may need time to propagate)" - fi - fi - else - # This might be OK if avahi was pre-installed - validation_warn "avahi-daemon not enabled (may have been pre-configured)" - fi -} - -validate_fail2ban() { - step "Checking fail2ban status" - local status=$(ssh_cmd "systemctl is-enabled fail2ban.service 2>/dev/null || echo disabled") - if [ "$status" = "enabled" ]; then - validation_pass "fail2ban is enabled" - else - validation_fail "fail2ban not enabled" - attribute_issue "fail2ban not enabled" "archsetup" - fi -} - -validate_networkmanager() { - step "Checking NetworkManager status" - local status=$(ssh_cmd "systemctl is-enabled NetworkManager.service 2>/dev/null || echo disabled") - if [ "$status" = "enabled" ]; then - validation_pass "NetworkManager is enabled" - # Functional test - if ssh_cmd "nmcli general status" &>> "$LOGFILE"; then - validation_pass "NetworkManager is functional" - else - validation_warn "NetworkManager enabled but not responding" - fi - else - validation_fail "NetworkManager not enabled" - attribute_issue "NetworkManager not enabled" "archsetup" - fi -} - -#----------------------------------------------------------------------------- -# Service-Specific Validations -#----------------------------------------------------------------------------- - -validate_all_services() { - section "Service Validations" - - # Core services (always expected) - validate_service "sshd" "enabled" "active" - validate_service "systemd-resolved" "enabled" "active" - validate_service "ufw" "enabled" "" # VM lacks iptables modules, can't be active - validate_service "fail2ban" "enabled" "active" - validate_service "NetworkManager" "enabled" "active" - validate_service "rngd" "enabled" "active" - validate_service "cronie" "enabled" "" - validate_service "atd" "enabled" "" - - # Cron job: log cleanup - step "Checking log-cleanup cron job" - local crontab_entry=$(ssh_cmd "sudo -u cjennings crontab -l 2>/dev/null | grep log-cleanup") - if [ -n "$crontab_entry" ]; then - validation_pass "log-cleanup cron job installed" - else - validation_fail "log-cleanup cron job not in crontab" - attribute_issue "log-cleanup cron job missing from user crontab" "archsetup" - fi - - # Timer services - validate_service "reflector.timer" "enabled" "" - validate_service "paccache.timer" "enabled" "" - - # Optional services (warn if missing, don't fail) - validate_service_optional "avahi-daemon" "enabled" - validate_service_optional "bluetooth" "enabled" - validate_service_optional "cups" "enabled" - validate_service_optional "docker" "enabled" - validate_service_optional "tailscaled" "enabled" - # Syncthing uses user service (not system), check lingering is enabled. - # test -e, not ls: ls prints the path on success, so the old capture held - # "path\nyes" and could never equal "yes" — the check warned on every - # run, even with lingering correctly enabled. - step "Checking user lingering for syncthing" - local linger_enabled=$(ssh_cmd "test -e /var/lib/systemd/linger/cjennings && echo yes || echo no") - if [ "$linger_enabled" = "yes" ]; then - validation_pass "User lingering enabled for syncthing user service" - else - validation_warn "User lingering not enabled (syncthing may not autostart)" - fi - - # Filesystem-specific - validate_zfs_services - validate_btrfs_services - - # Functional tests - validate_service_functions -} - -validate_service() { - local service="$1" - local expected_enabled="$2" # "enabled" or "" - local expected_active="$3" # "active" or "" - - step "Checking $service" - - if [ -n "$expected_enabled" ]; then - local enabled=$(ssh_cmd "systemctl is-enabled $service 2>/dev/null || echo disabled") - if [ "$enabled" = "enabled" ]; then - validation_pass "$service is enabled" - else - validation_fail "$service not enabled (got: $enabled)" - attribute_issue "$service not enabled" "archsetup" - return 1 - fi - fi - - if [ -n "$expected_active" ]; then - local active=$(ssh_cmd "systemctl is-active $service 2>/dev/null || echo inactive") - if [ "$active" = "active" ]; then - validation_pass "$service is active" - else - validation_fail "$service not active (got: $active)" - attribute_issue "$service not active" "archsetup" - return 1 - fi - fi - - return 0 -} - -validate_service_optional() { - local service="$1" - local expected_enabled="$2" - - step "Checking optional service: $service" - - local enabled=$(ssh_cmd "systemctl is-enabled $service 2>/dev/null || echo disabled") - if [ "$enabled" = "enabled" ]; then - validation_pass "$service is enabled" - else - validation_warn "$service not enabled (optional)" - fi -} - -validate_zfs_services() { - # Only check if ZFS is installed - if ! ssh_cmd "which zfs" &>> "$LOGFILE"; then - return 0 - fi - - step "Checking ZFS-specific services" - - validate_service_optional "sanoid.timer" "enabled" - - # Check for zfs-scrub timer (pool name varies) - local scrub_enabled - scrub_enabled=$(ssh_cmd "systemctl list-unit-files 'zfs-scrub*' 2>/dev/null | grep -c enabled" | tr -d '[:space:]') - scrub_enabled=${scrub_enabled:-0} - if [ "$scrub_enabled" -gt 0 ]; then - validation_pass "ZFS scrub timer enabled" - else - validation_warn "ZFS scrub timer not found" - fi -} - -validate_btrfs_services() { - # Only check if btrfs root - if ! ssh_cmd "mount | grep 'on / ' | grep -q btrfs"; then - return 0 - fi - - step "Checking btrfs-specific services" - validate_service_optional "grub-btrfsd" "enabled" -} - -validate_service_functions() { - section "Service Functional Tests" - - # UFW functional test - # NOTE: VM environment lacks iptables kernel modules, so UFW cannot activate. - # We only verify it's enabled; active status requires real hardware. - step "Testing UFW functionality" - local ufw_enabled - ufw_enabled=$(ssh_cmd "systemctl is-enabled ufw.service 2>/dev/null || echo disabled") - if [ "$ufw_enabled" = "enabled" ]; then - validation_pass "UFW is enabled (activation requires iptables kernel modules)" - else - validation_fail "UFW not enabled" - attribute_issue "UFW not enabled" "archsetup" - fi - - # fail2ban functional test - step "Testing fail2ban functionality" - if ssh_cmd "fail2ban-client status" &>> "$LOGFILE"; then - validation_pass "fail2ban is responding" - else - validation_fail "fail2ban not responding" - attribute_issue "fail2ban not functioning" "archsetup" - fi - - # DNS resolution test - step "Testing DNS resolution" - if ssh_cmd "resolvectl query archlinux.org" &>> "$LOGFILE"; then - validation_pass "DNS resolution working" - else - validation_warn "DNS resolution test failed (may be network issue)" - fi - - # Docker functional test (if enabled) - if ssh_cmd "systemctl is-enabled docker" &>> "$LOGFILE"; then - step "Testing Docker functionality" - if ssh_cmd "docker info" &>> "$LOGFILE"; then - validation_pass "Docker is responding" - elif ! ssh_cmd "systemctl is-active --quiet docker"; then - # archsetup enables docker for next boot (enable, not enable --now, - # by design — the daemon is heavy). Validation runs pre-reboot, so - # enabled-but-not-started is the correct installed state. - validation_skip "Docker enabled but not started (starts on boot by design)" - else - validation_warn "Docker active but not responding" - fi - fi -} - -#----------------------------------------------------------------------------- -# Developer Tools Validations -#----------------------------------------------------------------------------- - -validate_emacs() { - step "Checking if Emacs is installed" - if ssh_cmd "which emacs" &>> "$LOGFILE"; then - validation_pass "Emacs is installed" - - # Check if config exists - if ssh_cmd "test -d /home/cjennings/.emacs.d"; then - validation_pass "Emacs config directory exists" - - # Check user can access the directory - local result - result=$(ssh_cmd "sudo -u cjennings ls /home/cjennings/.emacs.d > /dev/null 2>&1 && echo OK || echo FAIL") - if [ "$result" = "OK" ]; then - validation_pass "Emacs config readable by user" - else - validation_fail "Emacs config not readable by user (permission issue)" - attribute_issue "Emacs .emacs.d not readable by user" "archsetup" - fi - else - validation_warn "Emacs config directory not found" - fi - else - validation_fail "Emacs not found" - attribute_issue "Emacs not installed" "archsetup" - fi -} - -validate_git_config() { - step "Checking git installation" - if ssh_cmd "which git" &>> "$LOGFILE"; then - validation_pass "git is installed" - else - validation_fail "git not found" - attribute_issue "git not installed" "archsetup" - fi -} - -validate_dev_tools() { - step "Checking developer tools" - local tools="python node npm go rustc" - local missing="" - - for tool in $tools; do - if ! ssh_cmd "which $tool" &>> "$LOGFILE"; then - missing="$missing $tool" - fi - done - - if [ -z "$missing" ]; then - validation_pass "Core dev tools installed" - else - validation_warn "Some dev tools missing:$missing" - fi -} - -#----------------------------------------------------------------------------- -# System Configuration Validations -#----------------------------------------------------------------------------- - -validate_zfs_config() { - step "Checking ZFS configuration (if applicable)" - if ssh_cmd "which zfs" &>> "$LOGFILE"; then - # ZFS is installed, check for sanoid - if ssh_cmd "which sanoid" &>> "$LOGFILE"; then - validation_pass "ZFS with sanoid detected" - else - validation_warn "ZFS detected but sanoid not installed" - fi - else - info "ZFS not installed (non-ZFS system)" - fi -} - -validate_boot_config() { - step "Checking GRUB configuration" - if ssh_cmd "test -f /boot/grub/grub.cfg" &>> "$LOGFILE"; then - validation_pass "GRUB config exists" - else - validation_warn "GRUB config not found (may use different bootloader)" - fi -} - -validate_terminus_font() { - step "Checking terminus-font installation" - if ssh_cmd "pacman -Q terminus-font" &>> "$LOGFILE"; then - validation_pass "terminus-font package installed" - else - validation_fail "terminus-font package not installed" - attribute_issue "terminus-font not installed via pacman" "archsetup" - fi -} - -validate_mkinitcpio_hooks() { - step "Checking mkinitcpio HOOKS configuration" - local hooks=$(ssh_cmd "grep '^HOOKS=' /etc/mkinitcpio.conf") - local is_zfs=$(ssh_cmd "findmnt -n -o FSTYPE / 2>/dev/null") - - if [ "$is_zfs" = "zfs" ]; then - # ZFS system: must use udev, not systemd - if echo "$hooks" | grep -q '\budev\b'; then - validation_pass "ZFS system uses udev hook (correct)" - elif echo "$hooks" | grep -q '\bsystemd\b'; then - validation_fail "ZFS system uses systemd hook (will break boot)" - attribute_issue "mkinitcpio uses systemd hook on ZFS system" "archsetup" - else - validation_warn "Could not determine init hook type" - fi - else - # Non-ZFS: systemd hook is fine - if echo "$hooks" | grep -q '\bsystemd\b'; then - validation_pass "Non-ZFS system uses systemd hook" - elif echo "$hooks" | grep -q '\budev\b'; then - validation_pass "Non-ZFS system uses udev hook" - fi - fi -} - -validate_initramfs_consolefont() { - step "Checking console font in initramfs" - local font_in_initramfs=$(ssh_cmd "lsinitcpio /boot/initramfs-linux*.img 2>/dev/null | grep -c 'consolefont.psf\\|ter-'") - - if [ "${font_in_initramfs:-0}" -gt 0 ]; then - validation_pass "Console font included in initramfs" - else - validation_warn "Console font may not be in initramfs" - fi -} - -validate_nvme_module() { - step "Checking NVMe module configuration" - local has_nvme=$(ssh_cmd "ls /dev/nvme* 2>/dev/null | head -1") - - if [ -n "$has_nvme" ]; then - # System has NVMe drives - local modules=$(ssh_cmd "grep '^MODULES=' /etc/mkinitcpio.conf") - if echo "$modules" | grep -q 'nvme'; then - validation_pass "NVMe module in mkinitcpio MODULES" - else - validation_warn "NVMe system but nvme not in MODULES (may cause slow boot)" - fi - else - info "No NVMe drives detected" - fi -} - -validate_autologin_config() { - step "Checking autologin configuration" - if ssh_cmd "test -f /etc/systemd/system/getty@tty1.service.d/autologin.conf" &>> "$LOGFILE"; then - validation_pass "Autologin configured" - else - info "Autologin not configured (may be intentional)" - fi -} - -validate_gnome_keyring_setup() { - step "Checking gnome-keyring pre-configuration" - local keyring_dir="/home/cjennings/.local/share/keyrings" - - # Check directory exists - if ! ssh_cmd "test -d $keyring_dir"; then - validation_fail "Keyring directory not created" - attribute_issue "gnome-keyring directory not pre-created" "archsetup" - return 1 - fi - - # Check directory permissions (should be 700) - local perms=$(ssh_cmd "stat -c '%a' $keyring_dir") - if [ "$perms" != "700" ]; then - validation_fail "Keyring directory has wrong permissions: $perms (expected 700)" - attribute_issue "gnome-keyring directory wrong permissions" "archsetup" - return 1 - fi - - # Check ownership - local owner=$(ssh_cmd "stat -c '%U' $keyring_dir") - if [ "$owner" != "cjennings" ]; then - validation_fail "Keyring directory owned by $owner (expected cjennings)" - attribute_issue "gnome-keyring directory wrong ownership" "archsetup" - return 1 - fi - - # Check default file exists and contains "login" - local default_keyring=$(ssh_cmd "cat $keyring_dir/default 2>/dev/null") - if [ "$default_keyring" != "login" ]; then - validation_fail "Default keyring not set to 'login' (got: '$default_keyring')" - attribute_issue "gnome-keyring default not set to login" "archsetup" - return 1 - fi - - validation_pass "gnome-keyring pre-configured (default=login, dir=700)" -} - -#----------------------------------------------------------------------------- -# Archsetup-Specific Validations -#----------------------------------------------------------------------------- - -validate_archsetup_log() { - step "Checking archsetup log for errors" - local error_count - # Use grep -h to suppress filenames, then wc -l to count total matches - error_count=$(ssh_cmd "grep -h '^Error:' /var/log/archsetup-*.log 2>/dev/null | wc -l" | tr -d '[:space:]') - error_count=${error_count:-0} - - if [ "$error_count" = "0" ]; then - validation_pass "No errors in archsetup log" - else - validation_fail "Found $error_count errors in archsetup log" - attribute_issue "Errors in archsetup log: $error_count" "archsetup" - fi -} - -validate_state_markers() { - step "Checking archsetup state markers" - local state_count=$(ssh_cmd "ls /var/lib/archsetup/state/ 2>/dev/null | wc -l") - - if [ "$state_count" -ge 12 ]; then - validation_pass "All 12 installation steps completed" - else - validation_warn "Only $state_count/12 steps completed" - fi -} - -#============================================================================= # ISSUE REPORTING #============================================================================= @@ -1138,18 +312,3 @@ EOF info "Issue report saved: $report_file" } -#============================================================================= -# MAIN VALIDATION ENTRY POINT -#============================================================================= - -run_full_validation() { - local output_dir="$1" - local archzfs_inbox="${2:-}" - - run_all_validations - analyze_log_diff "$output_dir" - generate_issue_report "$output_dir" "$archzfs_inbox" - - # Return success if no failures - [ $VALIDATION_FAILED -eq 0 ] -} diff --git a/scripts/testing/lib/vm-utils.sh b/scripts/testing/lib/vm-utils.sh index a8736a3..b85e773 100755 --- a/scripts/testing/lib/vm-utils.sh +++ b/scripts/testing/lib/vm-utils.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # VM management utilities for archsetup testing (direct QEMU) # Author: Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 @@ -10,13 +11,26 @@ # VM configuration defaults VM_CPUS="${VM_CPUS:-4}" -VM_RAM="${VM_RAM:-4096}" # MB +# 8 GiB headroom for AUR builds: makepkg runs -j$VM_CPUS, and parallel cc1plus +# (~700 MB each on heavy C++ packages) OOM-killed under the old 4 GiB default. +VM_RAM="${VM_RAM:-8192}" # MB VM_DISK_SIZE="${VM_DISK_SIZE:-50}" # GB +# Filesystem profile: selects which base image + archangel config the harness +# targets. "btrfs" is the historical default (its image name stays unsuffixed +# so existing base images keep working); "zfs" gets its own image, since the +# two on-disk layouts can't share a disk. Honoured by init_vm_paths below. +FS_PROFILE="${FS_PROFILE:-btrfs}" + # SSH configuration SSH_PORT="${SSH_PORT:-2222}" SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10" ROOT_PASSWORD="${ROOT_PASSWORD:-archsetup}" +# Set by inject_root_key once a root key is authorized in the VM. When set, the +# ssh/scp helpers add "-i <key>" so they keep working after archsetup hardens +# sshd to PermitRootLogin prohibit-password (which kills root *password* login +# but still allows key auth). Left unquoted at use sites, like SSH_OPTS. +SSH_KEY_OPT="${SSH_KEY_OPT:-}" # OVMF firmware paths OVMF_CODE="/usr/share/edk2/x64/OVMF_CODE.4m.fd" @@ -36,9 +50,22 @@ init_vm_paths() { local images_dir="${1:-$VM_IMAGES_DIR}" [ -z "$images_dir" ] && fatal "VM_IMAGES_DIR not set" + case "$FS_PROFILE" in + btrfs|zfs) ;; + *) fatal "Invalid FS_PROFILE: $FS_PROFILE (must be 'btrfs' or 'zfs')" ;; + esac + VM_IMAGES_DIR="$images_dir" - DISK_PATH="$VM_IMAGES_DIR/archsetup-base.qcow2" - OVMF_VARS="$VM_IMAGES_DIR/OVMF_VARS.fd" + # btrfs keeps the legacy unsuffixed name; other profiles get a suffix so + # their images sit side by side without clobbering each other. + local img_suffix="" + [ "$FS_PROFILE" != "btrfs" ] && img_suffix="-$FS_PROFILE" + DISK_PATH="$VM_IMAGES_DIR/archsetup-base${img_suffix}.qcow2" + # Per-profile NVRAM: UEFI boot entries live here, outside the qcow2, so a + # disk-snapshot revert can't restore them. Sharing one file across profiles + # let a zfs run's ZFSBootMenu entries clobber the btrfs GRUB entry, leaving + # the btrfs base unbootable (no removable ESP fallback to recover from). + OVMF_VARS="$VM_IMAGES_DIR/OVMF_VARS${img_suffix}.fd" PID_FILE="$VM_IMAGES_DIR/qemu.pid" MONITOR_SOCK="$VM_IMAGES_DIR/qemu-monitor.sock" SERIAL_LOG="$VM_IMAGES_DIR/qemu-serial.log" @@ -350,7 +377,7 @@ wait_for_ssh() { progress "Waiting for SSH on localhost:$SSH_PORT..." while [ "$elapsed" -lt "$timeout" ]; do - if sshpass -p "$password" ssh $SSH_OPTS -p "$SSH_PORT" root@localhost true 2>/dev/null; then + if sshpass -p "$password" ssh $SSH_OPTS $SSH_KEY_OPT -p "$SSH_PORT" root@localhost true 2>/dev/null; then success "SSH is available" return 0 fi @@ -366,7 +393,7 @@ wait_for_ssh() { vm_exec() { local password="${1:-$ROOT_PASSWORD}" shift - sshpass -p "$password" ssh $SSH_OPTS \ + sshpass -p "$password" ssh $SSH_OPTS $SSH_KEY_OPT \ -o ServerAliveInterval=30 -o ServerAliveCountMax=10 \ -p "$SSH_PORT" root@localhost "$@" 2>> "$LOGFILE" } @@ -378,7 +405,7 @@ copy_to_vm() { local password="${3:-$ROOT_PASSWORD}" step "Copying $(basename "$local_file") to VM:$remote_path" - if sshpass -p "$password" scp $SSH_OPTS -P "$SSH_PORT" \ + if sshpass -p "$password" scp $SSH_OPTS $SSH_KEY_OPT -P "$SSH_PORT" \ "$local_file" "root@localhost:$remote_path" >> "$LOGFILE" 2>&1; then success "File copied to VM" return 0 @@ -395,7 +422,7 @@ copy_from_vm() { local password="${3:-$ROOT_PASSWORD}" step "Copying $remote_file from VM" - if sshpass -p "$password" scp $SSH_OPTS -P "$SSH_PORT" \ + if sshpass -p "$password" scp $SSH_OPTS $SSH_KEY_OPT -P "$SSH_PORT" \ "root@localhost:$remote_file" "$local_path" >> "$LOGFILE" 2>&1; then success "File copied from VM" return 0 @@ -404,3 +431,31 @@ copy_from_vm() { return 1 fi } + +# inject_root_key <key_path> +# Authorize a throwaway root key over the initial password session and switch +# all the helpers above to key auth (sets SSH_KEY_OPT + ROOT_SSH_KEY). Call once, +# right after wait_for_ssh and before running archsetup: archsetup sets +# PermitRootLogin prohibit-password and reloads sshd partway through, which kills +# root *password* login. Without a key in place first, every SSH after that step +# fails and the run aborts before any validation. Key auth survives the hardening. +# Targets root@$VM_IP on $SSH_PORT so it works for both the local VM runner +# (VM_IP=localhost, port 2222) and the bare-metal runner (VM_IP=host, port 22). +inject_root_key() { + local key="$1" + rm -f "$key" "$key.pub" + if ! ssh-keygen -t ed25519 -N "" -q -f "$key"; then + warn "Root key generation failed - run may break at sshd hardening" + return 1 + fi + if sshpass -p "$ROOT_PASSWORD" ssh $SSH_OPTS -p "$SSH_PORT" "root@${VM_IP:-localhost}" \ + "mkdir -p /root/.ssh && chmod 700 /root/.ssh && cat >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys" \ + < "$key.pub" >> "$LOGFILE" 2>&1; then + SSH_KEY_OPT="-i $key" + export ROOT_SSH_KEY="$key" + success "Root SSH key authorized (survives sshd prohibit-password hardening)" + return 0 + fi + warn "Root key authorization failed - run may break at sshd hardening" + return 1 +} |
