diff options
Diffstat (limited to 'scripts')
35 files changed, 1090 insertions, 882 deletions
diff --git a/scripts/arch-distrobox b/scripts/arch-distrobox index 4afe3d1..99c295d 100755 --- a/scripts/arch-distrobox +++ b/scripts/arch-distrobox @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: GPL-3.0-or-later # ArchDistrobox - Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 diff --git a/scripts/audit-packages.sh b/scripts/audit-packages.sh index f7af19f..e41b79c 100755 --- a/scripts/audit-packages.sh +++ b/scripts/audit-packages.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # audit-packages.sh — verify every package archsetup installs still exists # at its declared source, and flag packages that moved between the official # repos and the AUR. diff --git a/scripts/cmail-setup-finish.sh b/scripts/cmail-setup-finish.sh index 704b707..7f9d3fc 100755 --- a/scripts/cmail-setup-finish.sh +++ b/scripts/cmail-setup-finish.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# SPDX-License-Identifier: GPL-3.0-or-later # cmail-setup-finish.sh — finish Proton Mail Bridge + cmail-action setup after # Bridge first-run. Idempotent; safe to re-run after a Bridge cert rotation or # a claude-templates re-clone. diff --git a/scripts/games.sh b/scripts/games.sh index de6a476..2ccdcb4 100755 --- a/scripts/games.sh +++ b/scripts/games.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # games installations via flatpak set -uo pipefail diff --git a/scripts/hypr-live-update-guard b/scripts/hypr-live-update-guard new file mode 100755 index 0000000..4f561ae --- /dev/null +++ b/scripts/hypr-live-update-guard @@ -0,0 +1,70 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-3.0-or-later +# hypr-live-update-guard - abort a live GPU/compositor library upgrade. +# +# Installed as a pacman PreTransaction hook. When an upgrade transaction +# includes GPU/compositor runtime libraries (mesa, hyprland, wayland, GPU +# drivers, ...) AND a Hyprland session is running, this aborts the +# transaction BEFORE any package is swapped. Replacing those libraries out +# from under a live compositor makes the next GPU-lib call hit a now +# "(deleted)" file and SIGABRT, taking the Wayland clients down with it +# (hit on ratio 2026-06-07: mesa + hyprland upgraded live, Hyprland crashed +# and took awww/insync/emacs with it). Aborting at PreTransaction is the +# safe point: nothing has been replaced yet, so the running session is +# untouched and the user can re-run the upgrade from a TTY. +# +# Pacman feeds the matched package names on stdin (NeedsTargets). +# +# Test seams / overrides (env): +# HYPR_GUARD_RUNNING 1/0 forces the running check (default: pgrep Hyprland) +# HYPR_ALLOW_LIVE_UPDATE 1 proceeds anyway (skip the guard) +# HYPR_GUARD_SENTINEL path whose existence also proceeds anyway +# (default /run/archsetup-allow-live-gpu-update, +# cleared on reboot since /run is tmpfs) + +set -u + +sentinel="${HYPR_GUARD_SENTINEL:-/run/archsetup-allow-live-gpu-update}" + +# Explicit override: the user knows what they're doing. +if [ "${HYPR_ALLOW_LIVE_UPDATE:-0}" = "1" ] || [ -e "$sentinel" ]; then + exit 0 +fi + +hyprland_running() { + if [ -n "${HYPR_GUARD_RUNNING:-}" ]; then + [ "$HYPR_GUARD_RUNNING" = "1" ] + return + fi + pgrep -x Hyprland >/dev/null 2>&1 +} + +# No live session means no live swap to worry about. Let the upgrade run -- +# this is exactly the from-a-TTY-after-logout path the warning points to. +hyprland_running || exit 0 + +# Collect the triggering packages (stdin from NeedsTargets) for the message. +pkgs=$(cat 2>/dev/null | sort -u | tr '\n' ' ') + +cat >&2 <<EOF + +========================================================================== + BLOCKED: live GPU/compositor library upgrade while Hyprland is running +========================================================================== + Packages in this upgrade can crash the running compositor if swapped now: + ${pkgs:-(GPU/compositor runtime libraries)} + + Replacing these out from under a live Hyprland session makes the next + GPU-lib call hit a deleted library and SIGABRT, taking your Wayland apps + down with it (and risking an unclean shutdown). + + Do it safely instead -- from a TTY with Hyprland stopped: + 1. Log out of Hyprland, or switch to a console (Ctrl+Alt+F2) and log in. + 2. Re-run the upgrade there: sudo pacman -Syu + + To override and proceed anyway (not recommended while Hyprland runs): + sudo touch $sentinel && sudo pacman -Syu +========================================================================== + +EOF +exit 1 diff --git a/scripts/normalize-notify-sounds.sh b/scripts/normalize-notify-sounds.sh index 52c1d36..72c4c33 100755 --- a/scripts/normalize-notify-sounds.sh +++ b/scripts/normalize-notify-sounds.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # Normalize notify sound files to a uniform RMS loudness so every notification # plays at the same perceived level. Re-encodes each file in place (ogg -> ogg). # Run once after adding or changing a sound in the notify set. diff --git a/scripts/package-inventory b/scripts/package-inventory index 2dda44b..0a4acf7 100755 --- a/scripts/package-inventory +++ b/scripts/package-inventory @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # package-inventory - Compare archsetup packages vs live system # Shows: packages in archsetup but missing from system, # packages on system but not in archsetup diff --git a/scripts/post-install.sh b/scripts/post-install.sh index 9045398..f7dd206 100755 --- a/scripts/post-install.sh +++ b/scripts/post-install.sh @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: GPL-3.0-or-later logfile="$HOME/post-install.log" touch "$logfile" diff --git a/scripts/setup-chess.sh b/scripts/setup-chess.sh index 6ac8749..648eea9 100755 --- a/scripts/setup-chess.sh +++ b/scripts/setup-chess.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# SPDX-License-Identifier: GPL-3.0-or-later set -euo pipefail # En Croissant + lc0 + Maia + Stockfish setup script for Arch Linux. diff --git a/scripts/testing/archsetup-test-zfs.conf b/scripts/testing/archsetup-test-zfs.conf new file mode 100644 index 0000000..a5459cf --- /dev/null +++ b/scripts/testing/archsetup-test-zfs.conf @@ -0,0 +1,21 @@ +# archsetup-test-zfs.conf - Archangel config for archsetup ZFS test VMs +# Used by create-base-vm.sh (FS_PROFILE=zfs) for fully automated base VM creation +# +# Usage: archangel --config-file /root/archsetup-test.conf +# +# Note: User creation is handled by archsetup, not archangel. +# See archsetup-vm.conf for archsetup configuration (shared across profiles - +# archsetup detects ZFS from the live root, so it needs no filesystem setting). +# +# Unencrypted ZFS root: encryption isn't what the harness validates, and +# NO_ENCRYPT=yes skips the passphrase prompt for a fully unattended install. + +FILESYSTEM=zfs +HOSTNAME=archsetup-test +TIMEZONE=America/Chicago +LOCALE=en_US.UTF-8 +KEYMAP=us +DISKS=/dev/vda +NO_ENCRYPT=yes +ROOT_PASSWORD=archsetup +ENABLE_SSH=yes diff --git a/scripts/testing/cleanup-tests.sh b/scripts/testing/cleanup-tests.sh index 5c0153b..390d7e5 100755 --- a/scripts/testing/cleanup-tests.sh +++ b/scripts/testing/cleanup-tests.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # Clean up old test VMs and artifacts # Author: Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 diff --git a/scripts/testing/create-base-vm.sh b/scripts/testing/create-base-vm.sh index 4ecf4d6..e626813 100755 --- a/scripts/testing/create-base-vm.sh +++ b/scripts/testing/create-base-vm.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # Create base VM for archsetup testing - Automated via Archangel ISO # Author: Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 @@ -19,10 +20,19 @@ source "$SCRIPT_DIR/lib/vm-utils.sh" # Configuration VM_IMAGES_DIR="$PROJECT_ROOT/vm-images" -CONFIG_FILE="$SCRIPT_DIR/archsetup-test.conf" LIVE_ISO_PASSWORD="archangel" SNAPSHOT_NAME="clean-install" +# FS_PROFILE (btrfs default / zfs) picks the archangel base-install config. +# btrfs -> archsetup-test.conf, zfs -> archsetup-test-zfs.conf. The matching +# base image name is derived from FS_PROFILE by init_vm_paths. +FS_PROFILE="${FS_PROFILE:-btrfs}" +if [ "$FS_PROFILE" = "btrfs" ]; then + CONFIG_FILE="$SCRIPT_DIR/archsetup-test.conf" +else + CONFIG_FILE="$SCRIPT_DIR/archsetup-test-${FS_PROFILE}.conf" +fi + # Initialize logging mkdir -p "$PROJECT_ROOT/test-results" LOGFILE="$PROJECT_ROOT/test-results/create-base-vm-$(date +'%Y%m%d-%H%M%S').log" diff --git a/scripts/testing/debug-vm.sh b/scripts/testing/debug-vm.sh index 32f377c..b0fa2b9 100755 --- a/scripts/testing/debug-vm.sh +++ b/scripts/testing/debug-vm.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # Launch VM for interactive debugging # Author: Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 diff --git a/scripts/testing/lib/logging.sh b/scripts/testing/lib/logging.sh index ed20707..809d396 100755 --- a/scripts/testing/lib/logging.sh +++ b/scripts/testing/lib/logging.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # Logging utilities for archsetup testing # Author: Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 diff --git a/scripts/testing/lib/network-diagnostics.sh b/scripts/testing/lib/network-diagnostics.sh index 674aeba..38788e5 100644 --- a/scripts/testing/lib/network-diagnostics.sh +++ b/scripts/testing/lib/network-diagnostics.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # Network diagnostics for VM testing # Author: Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 diff --git a/scripts/testing/lib/testinfra.sh b/scripts/testing/lib/testinfra.sh new file mode 100644 index 0000000..0822a9f --- /dev/null +++ b/scripts/testing/lib/testinfra.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later +# +# Testinfra post-install validation sweep (runs on the host, over SSH). +# +# This is the primary post-install validator (it replaced the shell +# run_all_validations sweep). It connects to the freshly-installed VM over SSH +# and runs the pytest suite under scripts/testing/tests/. Its result drives the +# run's pass/fail, and per-test failures are bucketed (archsetup / base_install +# / unknown) into the same issue-attribution report the shell sweep produced. +# +# Auth: reuse the root key the harness already authorized (inject_root_key), +# which survives the sshd prohibit-password hardening; mint our own only if the +# harness didn't (standalone use). pytest connects key-only via a generated +# ssh-config. Key + config live in the results dir and are discarded with it. +# +# Uses globals from run-test.sh / vm-utils.sh: SCRIPT_DIR, VM_IP, SSH_PORT, +# ROOT_PASSWORD, ROOT_SSH_KEY, ARCHSETUP_VM_CONF, plus the validation.sh +# helpers attribute_issue / VALIDATION_*. Toggle with RUN_TESTINFRA=false. + +# Record each pytest failure from the attribution file into the issue arrays +# (validation.sh's attribute_issue), so generate_issue_report covers them. +_testinfra_record_attribution() { + local file="$1" bucket="" + [ -f "$file" ] || return 0 + while IFS= read -r line; do + case "$line" in + "[archsetup]") bucket=archsetup ;; + "[base_install]") bucket=base ;; + "[unknown]") bucket=unknown ;; + " "*) attribute_issue "testinfra: ${line# }" "$bucket" ;; + esac + done < "$file" +} + +# run_testinfra_validation <results_dir> +# Returns 0 only when the pytest sweep ran and passed. Returns non-zero when it +# failed OR could not run (missing tooling / SSH setup) — a sweep that can't run +# is not a pass. RUN_TESTINFRA=false is the one explicit opt-out (returns 0). +run_testinfra_validation() { + local results_dir="$1" + local tests_dir="$SCRIPT_DIR/tests" + local key="$results_dir/testinfra_key" + local sshcfg="$results_dir/testinfra_ssh_config" + + if [ "${RUN_TESTINFRA:-true}" != "true" ]; then + warn "RUN_TESTINFRA=false - skipping the Testinfra validation sweep" + return 0 + fi + if ! command -v pytest >/dev/null 2>&1 || ! python3 -c 'import testinfra' >/dev/null 2>&1; then + error "Testinfra/pytest not installed on host - cannot validate (run: make deps)" + return 1 + fi + + section "Running Validation Checks (Testinfra)" + + # Prefer the harness's already-authorized root key; mint one if absent. + if [ -n "${ROOT_SSH_KEY:-}" ] && [ -f "${ROOT_SSH_KEY}" ]; then + key="$ROOT_SSH_KEY" + else + rm -f "$key" "$key.pub" + if ! ssh-keygen -t ed25519 -N "" -q -f "$key"; then + error "testinfra: ssh-keygen failed" + return 1 + fi + if ! copy_to_vm "$key.pub" "/tmp/testinfra_key.pub" "$ROOT_PASSWORD"; then + error "testinfra: pubkey copy failed" + return 1 + fi + if ! vm_exec "$ROOT_PASSWORD" \ + "mkdir -p /root/.ssh && chmod 700 /root/.ssh && cat /tmp/testinfra_key.pub >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys"; then + error "testinfra: authorizing key in VM failed" + return 1 + fi + fi + + # ssh-config so testinfra connects key-only, no host-key prompt. + cat > "$sshcfg" <<EOF +Host testinfra-target + HostName ${VM_IP:-localhost} + Port ${SSH_PORT:-2222} + User root + IdentityFile $key + IdentitiesOnly yes + StrictHostKeyChecking no + UserKnownHostsFile /dev/null +EOF + + # The account archsetup created, for the tests that need it. + local test_user + test_user=$(sed -n 's/^USERNAME=//p' "$ARCHSETUP_VM_CONF" 2>/dev/null | head -n1) + : "${test_user:=cjennings}" + + local logf="$results_dir/testinfra.log" + ARCHSETUP_TEST_USER="$test_user" pytest "$tests_dir" \ + --hosts="ssh://testinfra-target" \ + --ssh-config="$sshcfg" \ + --attribution-file="$results_dir/testinfra-attribution.txt" \ + -v >> "$logf" 2>&1 + local rc=$? + + # Surface pytest's counts through the shared validation counters so the + # issue report summary is meaningful (the shell sweep no longer runs). + local summary + summary=$(grep -oE '[0-9]+ (passed|failed|error|errors|skipped)' "$logf" | tail -10) + VALIDATION_PASSED=$(echo "$summary" | awk '/passed/{print $1}' | tail -1); VALIDATION_PASSED=${VALIDATION_PASSED:-0} + VALIDATION_WARNINGS=$(echo "$summary" | awk '/skipped/{print $1}' | tail -1); VALIDATION_WARNINGS=${VALIDATION_WARNINGS:-0} + local nfail nerr + nfail=$(echo "$summary" | awk '/failed/{print $1}' | tail -1); nfail=${nfail:-0} + nerr=$(echo "$summary" | awk '/error/{print $1}' | tail -1); nerr=${nerr:-0} + VALIDATION_FAILED=$((nfail + nerr)) + + if [ "$rc" -eq 0 ]; then + success "Testinfra validation passed ($VALIDATION_PASSED passed, $VALIDATION_WARNINGS skipped)" + else + error "Testinfra validation failed ($VALIDATION_FAILED failed/error; see testinfra.log)" + _testinfra_record_attribution "$results_dir/testinfra-attribution.txt" + fi + return "$rc" +} diff --git a/scripts/testing/lib/validation.sh b/scripts/testing/lib/validation.sh index 91270ef..fa7ddcc 100644 --- a/scripts/testing/lib/validation.sh +++ b/scripts/testing/lib/validation.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # Validation utilities for archsetup testing # Author: Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 @@ -20,38 +21,7 @@ declare -a UNKNOWN_ISSUES # SSH helper (uses globals: VM_IP, ROOT_PASSWORD) ssh_cmd() { sshpass -p "$ROOT_PASSWORD" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ - -o ConnectTimeout=10 -p "${SSH_PORT:-22}" "root@$VM_IP" "$@" 2>/dev/null -} - -# Validation result helpers -validation_pass() { - local test_name="$1" - success "$test_name" - ((VALIDATION_PASSED++)) || true -} - -validation_fail() { - local test_name="$1" - local details="${2:-}" - error "$test_name" - [ -n "$details" ] && info " Details: $details" - ((VALIDATION_FAILED++)) || true -} - -validation_warn() { - local test_name="$1" - local details="${2:-}" - warn "$test_name" - [ -n "$details" ] && info " Details: $details" - ((VALIDATION_WARNINGS++)) || true -} - -# A check whose precondition can't hold in this environment (headless VM, -# slirp networking, pre-reboot state). Logged for the record, counted nowhere -# — a warning that fires on every run trains readers to ignore warnings. -validation_skip() { - local test_name="$1" - info "SKIP: $test_name" + -o ConnectTimeout=10 ${SSH_KEY_OPT:-} -p "${SSH_PORT:-22}" "root@$VM_IP" "$@" 2>/dev/null } # Attribute an issue to archsetup or base install @@ -264,802 +234,6 @@ categorize_errors() { } #============================================================================= -# VALIDATION CHECKS -#============================================================================= - -run_all_validations() { - section "Running Validation Checks" - - # User & Authentication - validate_user_created - validate_user_shell - validate_user_groups - - # Dotfiles - validate_dotfiles - - # Package Managers - validate_yay_installed - validate_pacman_working - - # Window Manager (detects DWM or Hyprland automatically) - validate_window_manager - - # Essential Services - validate_firewall - validate_dns_config - validate_avahi - validate_fail2ban - validate_networkmanager - - # Developer Tools - validate_emacs - validate_git_config - validate_dev_tools - - # System Configuration - validate_zfs_config - validate_boot_config - validate_autologin_config - validate_gnome_keyring_setup - - # Boot & Initramfs (critical for ZFS systems) - validate_terminus_font - validate_mkinitcpio_hooks - validate_initramfs_consolefont - validate_nvme_module - - # Archsetup Specific - validate_archsetup_log - validate_state_markers -} - -#----------------------------------------------------------------------------- -# User & Authentication Validations -#----------------------------------------------------------------------------- - -validate_user_created() { - step "Checking if user 'cjennings' exists" - if ssh_cmd "id cjennings" &>> "$LOGFILE"; then - validation_pass "User cjennings exists" - else - validation_fail "User cjennings not found" - attribute_issue "User cjennings not created" "archsetup" - fi -} - -validate_user_shell() { - step "Checking if ZSH is default shell" - local shell=$(ssh_cmd "getent passwd cjennings | cut -d: -f7") - if [ "$shell" = "/bin/zsh" ] || [ "$shell" = "/usr/bin/zsh" ]; then - validation_pass "ZSH is default shell" - else - validation_fail "ZSH not default shell (got: $shell)" - attribute_issue "ZSH not set as default shell" "archsetup" - fi -} - -validate_user_groups() { - step "Checking user group memberships" - # Groups added by archsetup: - # - wheel (useradd -G wheel) - # - sys,adm,network,scanner,power,uucp,audio,lp,rfkill,video,storage,optical,users (usermod -aG) - # - docker (gpasswd -a, added later in developer_workstation) - local expected_groups="wheel sys adm network scanner power uucp audio lp rfkill video storage optical users docker" - local missing_groups="" - - for group in $expected_groups; do - if ! ssh_cmd "groups cjennings" | grep -q "\b$group\b"; then - missing_groups="$missing_groups $group" - fi - done - - if [ -z "$missing_groups" ]; then - validation_pass "User in all expected groups (15 groups)" - else - validation_fail "User missing groups:$missing_groups" - attribute_issue "User missing groups:$missing_groups" "archsetup" - fi -} - -#----------------------------------------------------------------------------- -# Dotfiles Validations -#----------------------------------------------------------------------------- - -validate_dotfiles() { - step "Checking dotfiles setup" - - # 1. Check if .zshrc is a symlink - if ! ssh_cmd "test -L /home/cjennings/.zshrc"; then - validation_fail "Dotfiles not stowed (.zshrc is not a symlink)" - attribute_issue "Dotfiles stow failed" "archsetup" - return 1 - fi - - # 2. Check symlink points to correct location. archsetup now clones the - # dotfiles repo to ~/.dotfiles and stows from there (DOTFILES_DIR default). - # Which tree owns .zshrc depends on DESKTOP_ENV: none stows the standalone - # minimal/ tree; dwm and hyprland stow common/. - local target=$(ssh_cmd "readlink /home/cjennings/.zshrc") - local desktop_env=$(sed -n 's/^DESKTOP_ENV=//p' "$ARCHSETUP_VM_CONF" 2>/dev/null | head -n1) - local expected_pattern=".dotfiles/common/.zshrc" - [ "$desktop_env" = "none" ] && expected_pattern=".dotfiles/minimal/.zshrc" - - if ! echo "$target" | grep -q "$expected_pattern"; then - validation_fail "Dotfiles symlink points to wrong location: $target" - attribute_issue "Dotfiles symlink incorrect: $target" "archsetup" - return 1 - fi - - # 3. Check the target file actually exists (not a broken symlink) - if ! ssh_cmd "test -f /home/cjennings/.zshrc"; then - validation_fail "Dotfiles symlink is broken (target doesn't exist)" - ssh_cmd "ls -la /home/cjennings/.zshrc" >> "$LOGFILE" 2>&1 - attribute_issue "Dotfiles symlink broken" "archsetup" - return 1 - fi - - # 4. Check user can actually read the file (not just root) - local result=$(ssh_cmd "sudo -u cjennings cat /home/cjennings/.zshrc > /dev/null 2>&1 && echo OK || echo FAIL") - if [ "$result" != "OK" ]; then - validation_fail "Dotfiles not readable by user (permission issue)" - ssh_cmd "ls -la /home/cjennings/.zshrc" >> "$LOGFILE" 2>&1 - attribute_issue "Dotfiles not readable by user" "archsetup" - return 1 - fi - - validation_pass "Dotfiles configured correctly (symlink to $target, readable by user)" -} - -#----------------------------------------------------------------------------- -# Package Manager Validations -#----------------------------------------------------------------------------- - -validate_yay_installed() { - step "Checking if yay (AUR helper) is installed and functional" - - # Check binary exists - if ! ssh_cmd "which yay" &>> "$LOGFILE"; then - validation_fail "yay not found" - attribute_issue "yay not installed" "archsetup" - return 1 - fi - - # Check yay can query packages (functional test) - if ssh_cmd "sudo -u cjennings yay -Qi yay" &>> "$LOGFILE"; then - validation_pass "yay is installed and functional" - else - validation_fail "yay binary exists but query failed" - attribute_issue "yay not functional" "archsetup" - fi -} - -validate_pacman_working() { - step "Checking if pacman is functional" - if ssh_cmd "pacman -Qi base" &>> "$LOGFILE"; then - validation_pass "pacman is functional" - else - validation_fail "pacman query failed" - attribute_issue "pacman not functional" "unknown" - fi -} - -#----------------------------------------------------------------------------- -# Window Manager Validations -#----------------------------------------------------------------------------- - -validate_suckless_tools() { - step "Checking suckless tools (dwm, st, dmenu, slock)" - local missing="" - - for tool in dwm st dmenu slock; do - if ! ssh_cmd "test -f /usr/local/bin/$tool"; then - missing="$missing $tool" - fi - done - - if [ -z "$missing" ]; then - validation_pass "All suckless tools installed (dwm, st, dmenu, slock)" - else - validation_fail "Missing suckless tools:$missing" - attribute_issue "Missing suckless tools:$missing" "archsetup" - fi -} - -validate_hyprland_tools() { - step "Checking Hyprland tools" - local missing="" - - # Check core Hyprland packages - for pkg in hyprland hypridle hyprlock waybar fuzzel swww grim slurp gammastep foot; do - if ! ssh_cmd "pacman -Q $pkg &>/dev/null"; then - missing="$missing $pkg" - fi - done - - if [ -z "$missing" ]; then - validation_pass "All Hyprland tools installed" - else - validation_fail "Missing Hyprland tools:$missing" - attribute_issue "Missing Hyprland tools:$missing" "archsetup" - fi -} - -validate_hyprland_config() { - step "Checking Hyprland configuration files" - local missing="" - - for config in ".config/hypr/hyprland.conf" ".config/hypr/hypridle.conf" \ - ".config/hypr/hyprlock.conf" ".config/waybar/config" \ - ".config/fuzzel/fuzzel.ini" ".config/gammastep/config.ini"; do - if ! ssh_cmd "test -f /home/cjennings/$config"; then - missing="$missing $config" - fi - done - - if [ -z "$missing" ]; then - validation_pass "All Hyprland config files present" - else - validation_fail "Missing Hyprland configs:$missing" - attribute_issue "Missing Hyprland configs:$missing" "archsetup" - fi -} - -validate_hyprland_socket() { - step "Checking Hyprland IPC socket" - # The socket only exists while the compositor runs. In the headless test - # VM nobody logs in graphically, so a missing socket with no Hyprland - # process is the expected state, not a finding. - if ssh_cmd "test -S /tmp/hypr/*/.socket.sock 2>/dev/null"; then - validation_pass "Hyprland socket exists" - elif ! ssh_cmd "pgrep -x Hyprland >/dev/null 2>&1"; then - validation_skip "Hyprland not running (headless) — socket check not applicable" - else - validation_warn "Hyprland running but IPC socket not found" - fi -} - -validate_portal_dark_mode() { - step "Checking Settings portal returns dark mode" - - # Check portals.conf exists and uses gtk for Settings - local portals_conf="/home/cjennings/.config/xdg-desktop-portal/portals.conf" - if ! ssh_cmd "test -f $portals_conf"; then - validation_fail "portals.conf not found" - attribute_issue "xdg-desktop-portal portals.conf missing" "archsetup" - return 1 - fi - - local settings_backend=$(ssh_cmd "grep 'org.freedesktop.impl.portal.Settings' $portals_conf 2>/dev/null | cut -d= -f2") - if [ "$settings_backend" = "none" ]; then - validation_fail "Settings portal disabled (set to 'none')" - attribute_issue "Settings portal disabled in portals.conf" "archsetup" - return 1 - fi - - # Query the portal for color-scheme (requires portal services running) - # Returns "v v u 1" for dark mode (1 = prefer-dark) - local color_scheme=$(ssh_cmd "sudo -u cjennings busctl --user call org.freedesktop.portal.Desktop /org/freedesktop/portal/desktop org.freedesktop.portal.Settings Read 'ss' 'org.freedesktop.appearance' 'color-scheme' 2>/dev/null | grep -o 'u [0-9]' | cut -d' ' -f2") - - if [ "$color_scheme" = "1" ]; then - validation_pass "Settings portal returns dark mode (color-scheme=1)" - elif [ -z "$color_scheme" ] && ! ssh_cmd "pgrep -x Hyprland >/dev/null 2>&1"; then - # No compositor → no graphical session bus to query. A socket-activated - # xdg-desktop-portal process can exist even headless, so the compositor - # is the real precondition (same condition as the socket check). The - # conf-file checks above already validated what install controls. - validation_skip "No compositor running (headless) — portal query not applicable" - elif [ -z "$color_scheme" ]; then - validation_warn "Could not query Settings portal (portal may not be running)" - else - validation_fail "Settings portal not returning dark mode (color-scheme=$color_scheme, expected 1)" - attribute_issue "Settings portal not configured for dark mode" "archsetup" - fi -} - -validate_window_manager() { - # Detect which desktop environment is installed and validate accordingly - if ssh_cmd "pacman -Q hyprland &>/dev/null"; then - section "Hyprland Desktop Environment" - validate_hyprland_tools - validate_hyprland_config - validate_hyprland_socket - validate_portal_dark_mode - elif ssh_cmd "test -f /usr/local/bin/dwm"; then - section "DWM Desktop Environment" - validate_suckless_tools - else - validation_warn "No window manager detected (DESKTOP_ENV=none?)" - fi -} - -#----------------------------------------------------------------------------- -# Essential Services Validations -#----------------------------------------------------------------------------- - -validate_firewall() { - step "Checking if firewall (ufw) is enabled" - local status=$(ssh_cmd "systemctl is-enabled ufw.service 2>/dev/null || echo disabled") - if [ "$status" = "enabled" ]; then - validation_pass "UFW firewall is enabled" - else - validation_fail "UFW firewall not enabled" - attribute_issue "UFW not enabled" "archsetup" - fi -} - -validate_dns_config() { - step "Checking DNS-over-TLS configuration" - if ssh_cmd "grep -q 'DNS=.*#' /etc/systemd/resolved.conf 2>/dev/null"; then - validation_pass "DNS-over-TLS configured" - else - validation_warn "DNS-over-TLS may not be configured" - fi -} - -validate_avahi() { - step "Checking avahi-daemon status" - local status=$(ssh_cmd "systemctl is-enabled avahi-daemon.service 2>/dev/null || echo disabled") - if [ "$status" = "enabled" ]; then - validation_pass "avahi-daemon is enabled" - - # Full-stack mDNS test: ping hostname.local. QEMU user-mode (slirp, - # 10.0.2.x) doesn't pass multicast, so mDNS genuinely can't resolve - # there — only run the ping on real networking. - if ssh_cmd "ip -4 addr show" 2>/dev/null | grep -q "10\.0\.2\."; then - validation_skip "mDNS ping not possible on slirp networking (no multicast)" - else - local hostname=$(ssh_cmd "hostname") - if ssh_cmd "ping -c 1 -W 2 ${hostname}.local" &>> "$LOGFILE"; then - validation_pass "mDNS working (${hostname}.local responds to ping)" - else - validation_warn "mDNS ping failed (avahi may need time to propagate)" - fi - fi - else - # This might be OK if avahi was pre-installed - validation_warn "avahi-daemon not enabled (may have been pre-configured)" - fi -} - -validate_fail2ban() { - step "Checking fail2ban status" - local status=$(ssh_cmd "systemctl is-enabled fail2ban.service 2>/dev/null || echo disabled") - if [ "$status" = "enabled" ]; then - validation_pass "fail2ban is enabled" - else - validation_fail "fail2ban not enabled" - attribute_issue "fail2ban not enabled" "archsetup" - fi -} - -validate_networkmanager() { - step "Checking NetworkManager status" - local status=$(ssh_cmd "systemctl is-enabled NetworkManager.service 2>/dev/null || echo disabled") - if [ "$status" = "enabled" ]; then - validation_pass "NetworkManager is enabled" - # Functional test - if ssh_cmd "nmcli general status" &>> "$LOGFILE"; then - validation_pass "NetworkManager is functional" - else - validation_warn "NetworkManager enabled but not responding" - fi - else - validation_fail "NetworkManager not enabled" - attribute_issue "NetworkManager not enabled" "archsetup" - fi -} - -#----------------------------------------------------------------------------- -# Service-Specific Validations -#----------------------------------------------------------------------------- - -validate_all_services() { - section "Service Validations" - - # Core services (always expected) - validate_service "sshd" "enabled" "active" - validate_service "systemd-resolved" "enabled" "active" - validate_service "ufw" "enabled" "" # VM lacks iptables modules, can't be active - validate_service "fail2ban" "enabled" "active" - validate_service "NetworkManager" "enabled" "active" - validate_service "rngd" "enabled" "active" - validate_service "cronie" "enabled" "" - validate_service "atd" "enabled" "" - - # Cron job: log cleanup - step "Checking log-cleanup cron job" - local crontab_entry=$(ssh_cmd "sudo -u cjennings crontab -l 2>/dev/null | grep log-cleanup") - if [ -n "$crontab_entry" ]; then - validation_pass "log-cleanup cron job installed" - else - validation_fail "log-cleanup cron job not in crontab" - attribute_issue "log-cleanup cron job missing from user crontab" "archsetup" - fi - - # Timer services - validate_service "reflector.timer" "enabled" "" - validate_service "paccache.timer" "enabled" "" - - # Optional services (warn if missing, don't fail) - validate_service_optional "avahi-daemon" "enabled" - validate_service_optional "bluetooth" "enabled" - validate_service_optional "cups" "enabled" - validate_service_optional "docker" "enabled" - validate_service_optional "tailscaled" "enabled" - # Syncthing uses user service (not system), check lingering is enabled. - # test -e, not ls: ls prints the path on success, so the old capture held - # "path\nyes" and could never equal "yes" — the check warned on every - # run, even with lingering correctly enabled. - step "Checking user lingering for syncthing" - local linger_enabled=$(ssh_cmd "test -e /var/lib/systemd/linger/cjennings && echo yes || echo no") - if [ "$linger_enabled" = "yes" ]; then - validation_pass "User lingering enabled for syncthing user service" - else - validation_warn "User lingering not enabled (syncthing may not autostart)" - fi - - # Filesystem-specific - validate_zfs_services - validate_btrfs_services - - # Functional tests - validate_service_functions -} - -validate_service() { - local service="$1" - local expected_enabled="$2" # "enabled" or "" - local expected_active="$3" # "active" or "" - - step "Checking $service" - - if [ -n "$expected_enabled" ]; then - local enabled=$(ssh_cmd "systemctl is-enabled $service 2>/dev/null || echo disabled") - if [ "$enabled" = "enabled" ]; then - validation_pass "$service is enabled" - else - validation_fail "$service not enabled (got: $enabled)" - attribute_issue "$service not enabled" "archsetup" - return 1 - fi - fi - - if [ -n "$expected_active" ]; then - local active=$(ssh_cmd "systemctl is-active $service 2>/dev/null || echo inactive") - if [ "$active" = "active" ]; then - validation_pass "$service is active" - else - validation_fail "$service not active (got: $active)" - attribute_issue "$service not active" "archsetup" - return 1 - fi - fi - - return 0 -} - -validate_service_optional() { - local service="$1" - local expected_enabled="$2" - - step "Checking optional service: $service" - - local enabled=$(ssh_cmd "systemctl is-enabled $service 2>/dev/null || echo disabled") - if [ "$enabled" = "enabled" ]; then - validation_pass "$service is enabled" - else - validation_warn "$service not enabled (optional)" - fi -} - -validate_zfs_services() { - # Only check if ZFS is installed - if ! ssh_cmd "which zfs" &>> "$LOGFILE"; then - return 0 - fi - - step "Checking ZFS-specific services" - - validate_service_optional "sanoid.timer" "enabled" - - # Check for zfs-scrub timer (pool name varies) - local scrub_enabled - scrub_enabled=$(ssh_cmd "systemctl list-unit-files 'zfs-scrub*' 2>/dev/null | grep -c enabled" | tr -d '[:space:]') - scrub_enabled=${scrub_enabled:-0} - if [ "$scrub_enabled" -gt 0 ]; then - validation_pass "ZFS scrub timer enabled" - else - validation_warn "ZFS scrub timer not found" - fi -} - -validate_btrfs_services() { - # Only check if btrfs root - if ! ssh_cmd "mount | grep 'on / ' | grep -q btrfs"; then - return 0 - fi - - step "Checking btrfs-specific services" - validate_service_optional "grub-btrfsd" "enabled" -} - -validate_service_functions() { - section "Service Functional Tests" - - # UFW functional test - # NOTE: VM environment lacks iptables kernel modules, so UFW cannot activate. - # We only verify it's enabled; active status requires real hardware. - step "Testing UFW functionality" - local ufw_enabled - ufw_enabled=$(ssh_cmd "systemctl is-enabled ufw.service 2>/dev/null || echo disabled") - if [ "$ufw_enabled" = "enabled" ]; then - validation_pass "UFW is enabled (activation requires iptables kernel modules)" - else - validation_fail "UFW not enabled" - attribute_issue "UFW not enabled" "archsetup" - fi - - # fail2ban functional test - step "Testing fail2ban functionality" - if ssh_cmd "fail2ban-client status" &>> "$LOGFILE"; then - validation_pass "fail2ban is responding" - else - validation_fail "fail2ban not responding" - attribute_issue "fail2ban not functioning" "archsetup" - fi - - # DNS resolution test - step "Testing DNS resolution" - if ssh_cmd "resolvectl query archlinux.org" &>> "$LOGFILE"; then - validation_pass "DNS resolution working" - else - validation_warn "DNS resolution test failed (may be network issue)" - fi - - # Docker functional test (if enabled) - if ssh_cmd "systemctl is-enabled docker" &>> "$LOGFILE"; then - step "Testing Docker functionality" - if ssh_cmd "docker info" &>> "$LOGFILE"; then - validation_pass "Docker is responding" - elif ! ssh_cmd "systemctl is-active --quiet docker"; then - # archsetup enables docker for next boot (enable, not enable --now, - # by design — the daemon is heavy). Validation runs pre-reboot, so - # enabled-but-not-started is the correct installed state. - validation_skip "Docker enabled but not started (starts on boot by design)" - else - validation_warn "Docker active but not responding" - fi - fi -} - -#----------------------------------------------------------------------------- -# Developer Tools Validations -#----------------------------------------------------------------------------- - -validate_emacs() { - step "Checking if Emacs is installed" - if ssh_cmd "which emacs" &>> "$LOGFILE"; then - validation_pass "Emacs is installed" - - # Check if config exists - if ssh_cmd "test -d /home/cjennings/.emacs.d"; then - validation_pass "Emacs config directory exists" - - # Check user can access the directory - local result - result=$(ssh_cmd "sudo -u cjennings ls /home/cjennings/.emacs.d > /dev/null 2>&1 && echo OK || echo FAIL") - if [ "$result" = "OK" ]; then - validation_pass "Emacs config readable by user" - else - validation_fail "Emacs config not readable by user (permission issue)" - attribute_issue "Emacs .emacs.d not readable by user" "archsetup" - fi - else - validation_warn "Emacs config directory not found" - fi - else - validation_fail "Emacs not found" - attribute_issue "Emacs not installed" "archsetup" - fi -} - -validate_git_config() { - step "Checking git installation" - if ssh_cmd "which git" &>> "$LOGFILE"; then - validation_pass "git is installed" - else - validation_fail "git not found" - attribute_issue "git not installed" "archsetup" - fi -} - -validate_dev_tools() { - step "Checking developer tools" - local tools="python node npm go rustc" - local missing="" - - for tool in $tools; do - if ! ssh_cmd "which $tool" &>> "$LOGFILE"; then - missing="$missing $tool" - fi - done - - if [ -z "$missing" ]; then - validation_pass "Core dev tools installed" - else - validation_warn "Some dev tools missing:$missing" - fi -} - -#----------------------------------------------------------------------------- -# System Configuration Validations -#----------------------------------------------------------------------------- - -validate_zfs_config() { - step "Checking ZFS configuration (if applicable)" - if ssh_cmd "which zfs" &>> "$LOGFILE"; then - # ZFS is installed, check for sanoid - if ssh_cmd "which sanoid" &>> "$LOGFILE"; then - validation_pass "ZFS with sanoid detected" - else - validation_warn "ZFS detected but sanoid not installed" - fi - else - info "ZFS not installed (non-ZFS system)" - fi -} - -validate_boot_config() { - step "Checking GRUB configuration" - if ssh_cmd "test -f /boot/grub/grub.cfg" &>> "$LOGFILE"; then - validation_pass "GRUB config exists" - else - validation_warn "GRUB config not found (may use different bootloader)" - fi -} - -validate_terminus_font() { - step "Checking terminus-font installation" - if ssh_cmd "pacman -Q terminus-font" &>> "$LOGFILE"; then - validation_pass "terminus-font package installed" - else - validation_fail "terminus-font package not installed" - attribute_issue "terminus-font not installed via pacman" "archsetup" - fi -} - -validate_mkinitcpio_hooks() { - step "Checking mkinitcpio HOOKS configuration" - local hooks=$(ssh_cmd "grep '^HOOKS=' /etc/mkinitcpio.conf") - local is_zfs=$(ssh_cmd "findmnt -n -o FSTYPE / 2>/dev/null") - - if [ "$is_zfs" = "zfs" ]; then - # ZFS system: must use udev, not systemd - if echo "$hooks" | grep -q '\budev\b'; then - validation_pass "ZFS system uses udev hook (correct)" - elif echo "$hooks" | grep -q '\bsystemd\b'; then - validation_fail "ZFS system uses systemd hook (will break boot)" - attribute_issue "mkinitcpio uses systemd hook on ZFS system" "archsetup" - else - validation_warn "Could not determine init hook type" - fi - else - # Non-ZFS: systemd hook is fine - if echo "$hooks" | grep -q '\bsystemd\b'; then - validation_pass "Non-ZFS system uses systemd hook" - elif echo "$hooks" | grep -q '\budev\b'; then - validation_pass "Non-ZFS system uses udev hook" - fi - fi -} - -validate_initramfs_consolefont() { - step "Checking console font in initramfs" - local font_in_initramfs=$(ssh_cmd "lsinitcpio /boot/initramfs-linux*.img 2>/dev/null | grep -c 'consolefont.psf\\|ter-'") - - if [ "${font_in_initramfs:-0}" -gt 0 ]; then - validation_pass "Console font included in initramfs" - else - validation_warn "Console font may not be in initramfs" - fi -} - -validate_nvme_module() { - step "Checking NVMe module configuration" - local has_nvme=$(ssh_cmd "ls /dev/nvme* 2>/dev/null | head -1") - - if [ -n "$has_nvme" ]; then - # System has NVMe drives - local modules=$(ssh_cmd "grep '^MODULES=' /etc/mkinitcpio.conf") - if echo "$modules" | grep -q 'nvme'; then - validation_pass "NVMe module in mkinitcpio MODULES" - else - validation_warn "NVMe system but nvme not in MODULES (may cause slow boot)" - fi - else - info "No NVMe drives detected" - fi -} - -validate_autologin_config() { - step "Checking autologin configuration" - if ssh_cmd "test -f /etc/systemd/system/getty@tty1.service.d/autologin.conf" &>> "$LOGFILE"; then - validation_pass "Autologin configured" - else - info "Autologin not configured (may be intentional)" - fi -} - -validate_gnome_keyring_setup() { - step "Checking gnome-keyring pre-configuration" - local keyring_dir="/home/cjennings/.local/share/keyrings" - - # Check directory exists - if ! ssh_cmd "test -d $keyring_dir"; then - validation_fail "Keyring directory not created" - attribute_issue "gnome-keyring directory not pre-created" "archsetup" - return 1 - fi - - # Check directory permissions (should be 700) - local perms=$(ssh_cmd "stat -c '%a' $keyring_dir") - if [ "$perms" != "700" ]; then - validation_fail "Keyring directory has wrong permissions: $perms (expected 700)" - attribute_issue "gnome-keyring directory wrong permissions" "archsetup" - return 1 - fi - - # Check ownership - local owner=$(ssh_cmd "stat -c '%U' $keyring_dir") - if [ "$owner" != "cjennings" ]; then - validation_fail "Keyring directory owned by $owner (expected cjennings)" - attribute_issue "gnome-keyring directory wrong ownership" "archsetup" - return 1 - fi - - # Check default file exists and contains "login" - local default_keyring=$(ssh_cmd "cat $keyring_dir/default 2>/dev/null") - if [ "$default_keyring" != "login" ]; then - validation_fail "Default keyring not set to 'login' (got: '$default_keyring')" - attribute_issue "gnome-keyring default not set to login" "archsetup" - return 1 - fi - - validation_pass "gnome-keyring pre-configured (default=login, dir=700)" -} - -#----------------------------------------------------------------------------- -# Archsetup-Specific Validations -#----------------------------------------------------------------------------- - -validate_archsetup_log() { - step "Checking archsetup log for errors" - local error_count - # Use grep -h to suppress filenames, then wc -l to count total matches - error_count=$(ssh_cmd "grep -h '^Error:' /var/log/archsetup-*.log 2>/dev/null | wc -l" | tr -d '[:space:]') - error_count=${error_count:-0} - - if [ "$error_count" = "0" ]; then - validation_pass "No errors in archsetup log" - else - validation_fail "Found $error_count errors in archsetup log" - attribute_issue "Errors in archsetup log: $error_count" "archsetup" - fi -} - -validate_state_markers() { - step "Checking archsetup state markers" - local state_count=$(ssh_cmd "ls /var/lib/archsetup/state/ 2>/dev/null | wc -l") - - if [ "$state_count" -ge 12 ]; then - validation_pass "All 12 installation steps completed" - else - validation_warn "Only $state_count/12 steps completed" - fi -} - -#============================================================================= # ISSUE REPORTING #============================================================================= @@ -1138,18 +312,3 @@ EOF info "Issue report saved: $report_file" } -#============================================================================= -# MAIN VALIDATION ENTRY POINT -#============================================================================= - -run_full_validation() { - local output_dir="$1" - local archzfs_inbox="${2:-}" - - run_all_validations - analyze_log_diff "$output_dir" - generate_issue_report "$output_dir" "$archzfs_inbox" - - # Return success if no failures - [ $VALIDATION_FAILED -eq 0 ] -} diff --git a/scripts/testing/lib/vm-utils.sh b/scripts/testing/lib/vm-utils.sh index a8736a3..b85e773 100755 --- a/scripts/testing/lib/vm-utils.sh +++ b/scripts/testing/lib/vm-utils.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # VM management utilities for archsetup testing (direct QEMU) # Author: Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 @@ -10,13 +11,26 @@ # VM configuration defaults VM_CPUS="${VM_CPUS:-4}" -VM_RAM="${VM_RAM:-4096}" # MB +# 8 GiB headroom for AUR builds: makepkg runs -j$VM_CPUS, and parallel cc1plus +# (~700 MB each on heavy C++ packages) OOM-killed under the old 4 GiB default. +VM_RAM="${VM_RAM:-8192}" # MB VM_DISK_SIZE="${VM_DISK_SIZE:-50}" # GB +# Filesystem profile: selects which base image + archangel config the harness +# targets. "btrfs" is the historical default (its image name stays unsuffixed +# so existing base images keep working); "zfs" gets its own image, since the +# two on-disk layouts can't share a disk. Honoured by init_vm_paths below. +FS_PROFILE="${FS_PROFILE:-btrfs}" + # SSH configuration SSH_PORT="${SSH_PORT:-2222}" SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10" ROOT_PASSWORD="${ROOT_PASSWORD:-archsetup}" +# Set by inject_root_key once a root key is authorized in the VM. When set, the +# ssh/scp helpers add "-i <key>" so they keep working after archsetup hardens +# sshd to PermitRootLogin prohibit-password (which kills root *password* login +# but still allows key auth). Left unquoted at use sites, like SSH_OPTS. +SSH_KEY_OPT="${SSH_KEY_OPT:-}" # OVMF firmware paths OVMF_CODE="/usr/share/edk2/x64/OVMF_CODE.4m.fd" @@ -36,9 +50,22 @@ init_vm_paths() { local images_dir="${1:-$VM_IMAGES_DIR}" [ -z "$images_dir" ] && fatal "VM_IMAGES_DIR not set" + case "$FS_PROFILE" in + btrfs|zfs) ;; + *) fatal "Invalid FS_PROFILE: $FS_PROFILE (must be 'btrfs' or 'zfs')" ;; + esac + VM_IMAGES_DIR="$images_dir" - DISK_PATH="$VM_IMAGES_DIR/archsetup-base.qcow2" - OVMF_VARS="$VM_IMAGES_DIR/OVMF_VARS.fd" + # btrfs keeps the legacy unsuffixed name; other profiles get a suffix so + # their images sit side by side without clobbering each other. + local img_suffix="" + [ "$FS_PROFILE" != "btrfs" ] && img_suffix="-$FS_PROFILE" + DISK_PATH="$VM_IMAGES_DIR/archsetup-base${img_suffix}.qcow2" + # Per-profile NVRAM: UEFI boot entries live here, outside the qcow2, so a + # disk-snapshot revert can't restore them. Sharing one file across profiles + # let a zfs run's ZFSBootMenu entries clobber the btrfs GRUB entry, leaving + # the btrfs base unbootable (no removable ESP fallback to recover from). + OVMF_VARS="$VM_IMAGES_DIR/OVMF_VARS${img_suffix}.fd" PID_FILE="$VM_IMAGES_DIR/qemu.pid" MONITOR_SOCK="$VM_IMAGES_DIR/qemu-monitor.sock" SERIAL_LOG="$VM_IMAGES_DIR/qemu-serial.log" @@ -350,7 +377,7 @@ wait_for_ssh() { progress "Waiting for SSH on localhost:$SSH_PORT..." while [ "$elapsed" -lt "$timeout" ]; do - if sshpass -p "$password" ssh $SSH_OPTS -p "$SSH_PORT" root@localhost true 2>/dev/null; then + if sshpass -p "$password" ssh $SSH_OPTS $SSH_KEY_OPT -p "$SSH_PORT" root@localhost true 2>/dev/null; then success "SSH is available" return 0 fi @@ -366,7 +393,7 @@ wait_for_ssh() { vm_exec() { local password="${1:-$ROOT_PASSWORD}" shift - sshpass -p "$password" ssh $SSH_OPTS \ + sshpass -p "$password" ssh $SSH_OPTS $SSH_KEY_OPT \ -o ServerAliveInterval=30 -o ServerAliveCountMax=10 \ -p "$SSH_PORT" root@localhost "$@" 2>> "$LOGFILE" } @@ -378,7 +405,7 @@ copy_to_vm() { local password="${3:-$ROOT_PASSWORD}" step "Copying $(basename "$local_file") to VM:$remote_path" - if sshpass -p "$password" scp $SSH_OPTS -P "$SSH_PORT" \ + if sshpass -p "$password" scp $SSH_OPTS $SSH_KEY_OPT -P "$SSH_PORT" \ "$local_file" "root@localhost:$remote_path" >> "$LOGFILE" 2>&1; then success "File copied to VM" return 0 @@ -395,7 +422,7 @@ copy_from_vm() { local password="${3:-$ROOT_PASSWORD}" step "Copying $remote_file from VM" - if sshpass -p "$password" scp $SSH_OPTS -P "$SSH_PORT" \ + if sshpass -p "$password" scp $SSH_OPTS $SSH_KEY_OPT -P "$SSH_PORT" \ "root@localhost:$remote_file" "$local_path" >> "$LOGFILE" 2>&1; then success "File copied from VM" return 0 @@ -404,3 +431,31 @@ copy_from_vm() { return 1 fi } + +# inject_root_key <key_path> +# Authorize a throwaway root key over the initial password session and switch +# all the helpers above to key auth (sets SSH_KEY_OPT + ROOT_SSH_KEY). Call once, +# right after wait_for_ssh and before running archsetup: archsetup sets +# PermitRootLogin prohibit-password and reloads sshd partway through, which kills +# root *password* login. Without a key in place first, every SSH after that step +# fails and the run aborts before any validation. Key auth survives the hardening. +# Targets root@$VM_IP on $SSH_PORT so it works for both the local VM runner +# (VM_IP=localhost, port 2222) and the bare-metal runner (VM_IP=host, port 22). +inject_root_key() { + local key="$1" + rm -f "$key" "$key.pub" + if ! ssh-keygen -t ed25519 -N "" -q -f "$key"; then + warn "Root key generation failed - run may break at sshd hardening" + return 1 + fi + if sshpass -p "$ROOT_PASSWORD" ssh $SSH_OPTS -p "$SSH_PORT" "root@${VM_IP:-localhost}" \ + "mkdir -p /root/.ssh && chmod 700 /root/.ssh && cat >> /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys" \ + < "$key.pub" >> "$LOGFILE" 2>&1; then + SSH_KEY_OPT="-i $key" + export ROOT_SSH_KEY="$key" + success "Root SSH key authorized (survives sshd prohibit-password hardening)" + return 0 + fi + warn "Root key authorization failed - run may break at sshd hardening" + return 1 +} diff --git a/scripts/testing/run-test-baremetal.sh b/scripts/testing/run-test-baremetal.sh index b6d1ab1..d22c424 100755 --- a/scripts/testing/run-test-baremetal.sh +++ b/scripts/testing/run-test-baremetal.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # Run archsetup test on bare metal ZFS system # Author: Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 @@ -19,13 +20,16 @@ PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" # Source utilities source "$SCRIPT_DIR/lib/logging.sh" -source "$SCRIPT_DIR/lib/validation.sh" +source "$SCRIPT_DIR/lib/validation.sh" # live helpers: ssh_cmd, capture_*_state, analyze_log_diff, generate_issue_report +source "$SCRIPT_DIR/lib/vm-utils.sh" # inject_root_key + SSH_OPTS/SSH_KEY_OPT for key auth +source "$SCRIPT_DIR/lib/testinfra.sh" # run_testinfra_validation (authoritative validator) # Parse arguments ROLLBACK_FIRST=false ROLLBACK_AFTER=false TARGET_HOST="" ROOT_PASSWORD="" +PORT="22" usage() { echo "Usage: $0 --host <hostname> --password <root_password> [options]" @@ -35,6 +39,7 @@ usage() { echo " --password <password> Root password for SSH" echo "" echo "Options:" + echo " --port <port> SSH port (default 22; use 2222 to target a test VM)" echo " --rollback-first Roll back to genesis snapshots before running" echo " --rollback-after Roll back to genesis snapshots after test (cleanup)" echo " --validate-only Skip archsetup, only run validation checks" @@ -54,6 +59,10 @@ while [[ $# -gt 0 ]]; do ROOT_PASSWORD="${2:?--password requires a value}" shift 2 ;; + --port) + PORT="${2:?--port requires a value}" + shift 2 + ;; --rollback-first) ROLLBACK_FIRST=true shift @@ -93,9 +102,16 @@ cleanup_baremetal() { } trap cleanup_baremetal EXIT -# Override VM_IP for validation.sh ssh_cmd function -# shellcheck disable=SC2034 # consumed by the sourced validation.sh +# Connection globals consumed by ssh_cmd (validation.sh), inject_root_key +# (vm-utils.sh), and run_testinfra_validation (testinfra.sh). +# shellcheck disable=SC2034 # consumed by the sourced libraries VM_IP="$TARGET_HOST" +# shellcheck disable=SC2034 +SSH_PORT="$PORT" +# Test-user source for testinfra (reads USERNAME); the bare-metal user is the +# archsetup default, cjennings, same as the VM conf. +# shellcheck disable=SC2034 +ARCHSETUP_VM_CONF="$SCRIPT_DIR/archsetup-vm.conf" # Initialize logging mkdir -p "$TEST_RESULTS_DIR" @@ -108,8 +124,8 @@ info "Target: $TARGET_HOST" # Test SSH connectivity step "Testing SSH connectivity to $TARGET_HOST" if ! sshpass -p "$ROOT_PASSWORD" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ - -o ConnectTimeout=10 "root@$TARGET_HOST" "echo connected" &>/dev/null; then - fatal "Cannot connect to $TARGET_HOST via SSH" + -o ConnectTimeout=10 -p "$PORT" "root@$TARGET_HOST" "echo connected" &>/dev/null; then + fatal "Cannot connect to $TARGET_HOST:$PORT via SSH" fi success "SSH connection OK" @@ -146,6 +162,15 @@ if $ROLLBACK_FIRST; then success "Reconnected" fi +# Authorize a throwaway root key before archsetup hardens sshd. archsetup sets +# PermitRootLogin prohibit-password and reloads sshd partway through, which kills +# root *password* SSH; key auth survives it, so every later ssh_cmd and the +# Testinfra sweep keep working. Placed after any genesis rollback so the key +# isn't rolled away. Best-effort: a failure only risks the post-hardening steps. +step "Authorizing throwaway root key (survives sshd hardening)" +inject_root_key "$TEST_RESULTS_DIR/root_key" || \ + warn "Continuing without an injected root key — SSH may fail after archsetup hardens sshd" + if ! $VALIDATE_ONLY; then # Capture pre-install state capture_pre_install_state "$TEST_RESULTS_DIR" @@ -160,7 +185,7 @@ if ! $VALIDATE_ONLY; then step "Transferring to $TARGET_HOST" ssh_cmd "rm -rf /tmp/archsetup-test && mkdir -p /tmp/archsetup-test" sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ - "$BUNDLE_FILE" "root@$TARGET_HOST:/tmp/archsetup.bundle" >> "$LOGFILE" 2>&1 + ${SSH_KEY_OPT:-} -P "$PORT" "$BUNDLE_FILE" "root@$TARGET_HOST:/tmp/archsetup.bundle" >> "$LOGFILE" 2>&1 step "Extracting on target" ssh_cmd "cd /tmp && git clone /tmp/archsetup.bundle archsetup-test && rm /tmp/archsetup.bundle" >> "$LOGFILE" 2>&1 @@ -222,12 +247,12 @@ if ! $VALIDATE_ONLY; then step "Copying archsetup log" sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ - "root@$TARGET_HOST:/var/log/archsetup-*.log" "$TEST_RESULTS_DIR/" 2>> "$LOGFILE" || \ + ${SSH_KEY_OPT:-} -P "$PORT" "root@$TARGET_HOST:/var/log/archsetup-*.log" "$TEST_RESULTS_DIR/" 2>> "$LOGFILE" || \ warn "Could not copy archsetup log" step "Copying archsetup output" sshpass -p "$ROOT_PASSWORD" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ - "root@$TARGET_HOST:$REMOTE_LOG" "$TEST_RESULTS_DIR/archsetup-output.log" 2>> "$LOGFILE" || \ + ${SSH_KEY_OPT:-} -P "$PORT" "root@$TARGET_HOST:$REMOTE_LOG" "$TEST_RESULTS_DIR/archsetup-output.log" 2>> "$LOGFILE" || \ warn "Could not copy output log" # Capture post-install state @@ -238,13 +263,14 @@ else mkdir -p "$TEST_RESULTS_DIR/pre-install" "$TEST_RESULTS_DIR/post-install" fi -# Run validations -run_all_validations -validate_all_services - -# Additional ZFS-specific validations -section "ZFS-Specific Validations" -validate_zfs_services +# Run validations. Testinfra is the authoritative validator (same as the VM +# runner); its ZFS-conditional pytest checks cover what validate_zfs_services +# used to, and it connects over the key authorized above. set +e because it +# returns pytest's rc (non-zero on failures) and the report + summary below must +# still run; the verdict is carried by testinfra_rc and the exit code at the end. +set +e +run_testinfra_validation "$TEST_RESULTS_DIR" +testinfra_rc=$? # Analyze logs if we ran archsetup if ! $VALIDATE_ONLY; then @@ -254,8 +280,8 @@ fi # Generate reports generate_issue_report "$TEST_RESULTS_DIR" "$ARCHZFS_INBOX" -# Set validation result (TEST_PASSED is the boolean; VALIDATION_PASSED stays the counter) -if [ "$VALIDATION_FAILED" -eq 0 ]; then +# The run passes only if the Testinfra sweep passed. +if [ "$testinfra_rc" -eq 0 ]; then TEST_PASSED=true else TEST_PASSED=false diff --git a/scripts/testing/run-test.sh b/scripts/testing/run-test.sh index 5830ed9..f962df3 100755 --- a/scripts/testing/run-test.sh +++ b/scripts/testing/run-test.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # Run archsetup test in a VM using snapshots # Author: Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 @@ -23,6 +24,7 @@ source "$SCRIPT_DIR/lib/logging.sh" source "$SCRIPT_DIR/lib/vm-utils.sh" source "$SCRIPT_DIR/lib/network-diagnostics.sh" source "$SCRIPT_DIR/lib/validation.sh" +source "$SCRIPT_DIR/lib/testinfra.sh" # Parse arguments KEEP_VM=false @@ -48,6 +50,9 @@ while [[ $# -gt 0 ]]; do echo " --keep Keep VM in post-test state (for debugging)" echo " --script Specify custom archsetup script to test" echo " --snapshot Snapshot name to revert to (default: clean-install)" + echo "" + echo "Env: FS_PROFILE=btrfs|zfs (default btrfs) selects the base image" + echo " built by create-base-vm.sh. e.g. FS_PROFILE=zfs $0" exit 1 ;; esac @@ -98,6 +103,7 @@ init_logging "$LOGFILE" init_vm_paths "$VM_IMAGES_DIR" section "ArchSetup Test Run: $TIMESTAMP" +info "Filesystem profile: $FS_PROFILE (image: $(basename "$DISK_PATH"))" # Verify archsetup script exists if [ ! -f "$ARCHSETUP_SCRIPT" ]; then @@ -106,7 +112,11 @@ fi # Check disk exists if [ ! -f "$DISK_PATH" ]; then - info "Create it first: ./scripts/testing/create-base-vm.sh" + if [ "$FS_PROFILE" = "btrfs" ]; then + info "Create it first: ./scripts/testing/create-base-vm.sh" + else + info "Create it first: FS_PROFILE=$FS_PROFILE ./scripts/testing/create-base-vm.sh" + fi fatal "Base disk not found: $DISK_PATH" fi @@ -140,6 +150,13 @@ start_qemu "$DISK_PATH" "disk" "" "none" || fatal "Failed to start VM" wait_for_ssh "$ROOT_PASSWORD" 120 || fatal "VM SSH not available" stop_timer "boot" +# Authorize a root key now, before archsetup runs. archsetup hardens sshd to +# PermitRootLogin prohibit-password partway through, which breaks the harness's +# root password SSH; key auth survives it. Without this, the run aborts mid-way +# (before any validation) once the hardening step lands. +inject_root_key "$TEST_RESULTS_DIR/root_key" || \ + warn "Continuing without root key - run may break at the sshd hardening step" + # Run network diagnostics if ! run_network_diagnostics; then fatal "Network diagnostics failed - aborting test" @@ -240,7 +257,8 @@ fi # Poll for completion step "Monitoring archsetup progress (polling every 30 seconds)..." POLL_COUNT=0 -MAX_POLLS=180 # 90 minutes max (180 * 30 seconds) +MAX_POLLS=300 # 150 minutes max (300 * 30 seconds); a full install with heavy + # AUR builds (e.g. vagrant) can exceed 90 min on a slow mirror while [ $POLL_COUNT -lt $MAX_POLLS ]; do # Check if archsetup process is still running @@ -261,7 +279,7 @@ while [ $POLL_COUNT -lt $MAX_POLLS ]; do done if [ $POLL_COUNT -ge $MAX_POLLS ]; then - error "ArchSetup timed out after 90 minutes" + error "ArchSetup timed out after 150 minutes" ARCHSETUP_EXIT_CODE=124 else # Get exit code from the remote log @@ -307,18 +325,17 @@ copy_from_vm "/var/log/archsetup-installed-packages.txt" "$TEST_RESULTS_DIR/" "$ # Capture post-install state capture_post_install_state "$TEST_RESULTS_DIR" -# Run comprehensive validation -# This uses the validation.sh library for all checks. +# Run comprehensive validation (Testinfra/pytest is the primary validator; the +# old shell run_all_validations sweep was retired once pytest reached parity). # # From here to the end of the script, errexit is disabled on purpose: the -# validation functions are designed to fail-and-count (see VALIDATION_FAILED) -# rather than abort, and the analysis/report-generation steps below can also -# legitimately return non-zero. With `set -e` active, a single failed check -# would kill the run before the test report is written or the VM is cleaned -# up. Pass/fail is signalled explicitly by the exit code at the bottom. +# analysis/report-generation steps below can legitimately return non-zero, and +# with `set -e` active a single failed check would kill the run before the test +# report is written or the VM is cleaned up. Pass/fail is signalled explicitly +# by the exit code at the bottom. set +e -run_all_validations -validate_all_services +run_testinfra_validation "$TEST_RESULTS_DIR" +testinfra_rc=$? # Analyze log differences (pre vs post install) analyze_log_diff "$TEST_RESULTS_DIR" @@ -327,8 +344,8 @@ analyze_log_diff "$TEST_RESULTS_DIR" # If base install issues found and archzfs inbox exists, create issue file generate_issue_report "$TEST_RESULTS_DIR" "$ARCHZFS_INBOX" -# Set validation result based on failure count -if [ "$VALIDATION_FAILED" -eq 0 ]; then +# The run passes only if the Testinfra sweep passed. +if [ "$testinfra_rc" -eq 0 ]; then TEST_PASSED=true else TEST_PASSED=false diff --git a/scripts/testing/setup-testing-env.sh b/scripts/testing/setup-testing-env.sh index fb0628b..b5b584f 100755 --- a/scripts/testing/setup-testing-env.sh +++ b/scripts/testing/setup-testing-env.sh @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # Setup testing environment for archsetup # Author: Craig Jennings <craigmartinjennings@gmail.com> # License: GNU GPLv3 diff --git a/scripts/testing/tests/conftest.py b/scripts/testing/tests/conftest.py new file mode 100644 index 0000000..680c967 --- /dev/null +++ b/scripts/testing/tests/conftest.py @@ -0,0 +1,111 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +"""Pytest + Testinfra config for archsetup post-install validation. + +These tests run on the *host* and connect to the freshly-installed VM over SSH +(Testinfra provides the `host` fixture, parametrized from --hosts). This file +adds two things the bespoke shell harness had that Testinfra does not: + + - Failure attribution. Each check is marked with the layer that owns a + failure (archsetup | base_install | unknown), mirroring validation.sh's + attribute_issue. Failures are bucketed and written to --attribution-file + so run-test.sh can route base-install issues to the archzfs inbox as before. + - Tiering markers (smoke | integration) so `pytest -m smoke` is a fast gate. + +The `target_user` fixture supplies the account archsetup created; it reads +ARCHSETUP_TEST_USER (set by run-test.sh from the VM conf) and defaults to the +historical "cjennings". +""" + +import os + +import pytest + + +_ATTRIBUTION_BUCKETS = ("archsetup", "base_install", "unknown") +_failures = {bucket: [] for bucket in _ATTRIBUTION_BUCKETS} + + +def pytest_addoption(parser): + parser.addoption( + "--attribution-file", + action="store", + default=None, + help="write the failure attribution report (archsetup/base_install/unknown) here", + ) + + +def pytest_configure(config): + config.addinivalue_line( + "markers", + "attribution(bucket): layer that owns a failure — archsetup, base_install, or unknown", + ) + config.addinivalue_line("markers", "smoke: fast subset (user, key packages, dotfiles present)") + config.addinivalue_line("markers", "integration: full post-install checks") + + +@pytest.hookimpl(wrapper=True) +def pytest_runtest_makereport(item, call): + report = yield + if report.when == "call" and report.failed: + marker = item.get_closest_marker("attribution") + bucket = marker.args[0] if (marker and marker.args) else "archsetup" + if bucket not in _failures: + bucket = "unknown" + _failures[bucket].append(item.nodeid) + return report + + +def pytest_sessionfinish(session, exitstatus): + path = session.config.getoption("--attribution-file") + if not path: + return + with open(path, "w") as fh: + for bucket in _ATTRIBUTION_BUCKETS: + fh.write("[%s]\n" % bucket) + for nodeid in _failures[bucket]: + fh.write(" %s\n" % nodeid) + + +@pytest.fixture(scope="session") +def target_user(): + """The account archsetup created in the VM under test.""" + return os.environ.get("ARCHSETUP_TEST_USER", "cjennings") + + +@pytest.fixture(scope="session") +def home(target_user): + return "/home/%s" % target_user + + +@pytest.fixture(scope="module") +def zfs_root(host): + """True when the VM's root filesystem is ZFS (gates ZFS-specific checks).""" + return host.run("findmnt -n -o FSTYPE /").stdout.strip() == "zfs" + + +@pytest.fixture(scope="module") +def has_nvme(host): + """True when the VM exposes an NVMe device.""" + return host.run("ls /dev/nvme0n1 2>/dev/null").rc == 0 + + +@pytest.fixture(scope="module") +def hyprland_installed(host): + return host.package("hyprland").is_installed + + +@pytest.fixture(scope="module") +def dwm_installed(host): + return host.file("/usr/local/bin/dwm").exists + + +@pytest.fixture(scope="module") +def compositor_running(host): + """A graphical session is live (gates socket/portal checks that need one).""" + return host.run("pgrep -x Hyprland").rc == 0 + + +@pytest.fixture(scope="module") +def on_slirp(host): + """QEMU user-mode networking (10.0.2.x) — no multicast, so mDNS can't work.""" + return "10.0.2." in host.run("ip -4 addr show").stdout diff --git a/scripts/testing/tests/test_archsetup.py b/scripts/testing/tests/test_archsetup.py new file mode 100644 index 0000000..52fe3f7 --- /dev/null +++ b/scripts/testing/tests/test_archsetup.py @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +"""Post-install checks: archsetup's own log and state markers. + +Parity port of validate_archsetup_log and validate_state_markers. +""" + +import pytest + + +EXPECTED_STATE_STEPS = 12 + + +@pytest.mark.attribution("archsetup") +def test_no_errors_in_archsetup_log(host): + out = host.run("grep -h '^Error:' /var/log/archsetup-*.log 2>/dev/null | wc -l") + count = int((out.stdout.strip() or "0")) + assert count == 0, "archsetup log reported %d Error: lines" % count + + +@pytest.mark.attribution("archsetup") +def test_all_install_steps_completed(host): + out = host.run("ls /var/lib/archsetup/state/ 2>/dev/null | wc -l") + count = int((out.stdout.strip() or "0")) + assert count >= EXPECTED_STATE_STEPS, ( + "only %d/%d install steps completed" % (count, EXPECTED_STATE_STEPS) + ) diff --git a/scripts/testing/tests/test_backups.py b/scripts/testing/tests/test_backups.py new file mode 100644 index 0000000..07da5ec --- /dev/null +++ b/scripts/testing/tests/test_backups.py @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +"""Post-install checks: backup_system_file ran during a real install. + +Expansion coverage (P4). The unit suite (tests/backup-system-file/) covers the +helper's logic; this confirms it actually fires end-to-end — archsetup leaves a +<file>.archsetup.bak next to each pre-existing file it edits in place. + +These targets are edited unconditionally on every run (pacman.conf/makepkg.conf +always sed'd, sudoers always appended), so their backups must exist. +mkinitcpio.conf is edited only conditionally (the systemd-hook switch on +non-ZFS, or the nvme module on NVMe systems), so it gets its own fixture-gated +check below. Conditionally-edited files (locale.gen, geoclue, fstab) aren't +asserted here since their edits depend on the base image. +""" + +import pytest + + +ALWAYS_BACKED_UP = [ + "/etc/pacman.conf", + "/etc/makepkg.conf", + "/etc/sudoers", +] + + +@pytest.mark.attribution("archsetup") +@pytest.mark.parametrize("path", ALWAYS_BACKED_UP) +def test_backup_created_for_edited_file(host, path): + bak = host.file(path + ".archsetup.bak") + assert bak.exists, "%s.archsetup.bak missing — backup_system_file did not fire" % path + assert bak.is_file + + +@pytest.mark.attribution("archsetup") +def test_backup_created_for_mkinitcpio(host, zfs_root, has_nvme): + # archsetup edits /etc/mkinitcpio.conf only when it has something to change: + # the systemd-hook switch (non-ZFS only) or adding the nvme module (NVMe + # systems). A ZFS root with no NVMe touches neither, so there's no backup. + if zfs_root and not has_nvme: + pytest.skip("ZFS root + no NVMe: archsetup doesn't edit mkinitcpio.conf") + bak = host.file("/etc/mkinitcpio.conf.archsetup.bak") + assert bak.exists, \ + "/etc/mkinitcpio.conf.archsetup.bak missing — backup_system_file did not fire" + assert bak.is_file diff --git a/scripts/testing/tests/test_boot.py b/scripts/testing/tests/test_boot.py new file mode 100644 index 0000000..78b4404 --- /dev/null +++ b/scripts/testing/tests/test_boot.py @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +"""Post-install checks: boot, initramfs, and filesystem config. + +Parity port of validate_zfs_config, validate_boot_config, +validate_mkinitcpio_hooks, validate_initramfs_consolefont, validate_nvme_module. +Filesystem/hardware-specific checks are gated on fixtures. +""" + +import pytest + + +@pytest.mark.attribution("archsetup") +def test_bootloader_installed(host, zfs_root): + # A ZFS root boots via ZFSBootMenu (archangel installs the EFI binary under + # /efi/EFI/ZBM), so there is no GRUB; a non-ZFS root uses GRUB. + if zfs_root: + assert host.file("/efi/EFI/ZBM/zfsbootmenu.efi").exists, \ + "ZFS root must have the ZFSBootMenu EFI binary" + else: + assert host.file("/boot/grub/grub.cfg").exists, \ + "non-ZFS root must have a GRUB config" + + +@pytest.mark.attribution("archsetup") +def test_mkinitcpio_hooks(host, zfs_root): + hooks = host.run("grep '^HOOKS=' /etc/mkinitcpio.conf").stdout + if zfs_root: + # ZFS must use the udev hook; the systemd hook breaks a ZFS boot. + assert " udev" in hooks or "(udev" in hooks, "ZFS root must use the udev hook" + assert "systemd" not in hooks, "ZFS root must not use the systemd hook" + else: + # Non-ZFS: either hook is acceptable. + assert ("systemd" in hooks) or ("udev" in hooks) + + +@pytest.mark.attribution("archsetup") +def test_console_font_configured(host, zfs_root): + # archsetup sets FONT=ter-132n in /etc/vconsole.conf on every run. + assert host.file("/etc/vconsole.conf").contains("^FONT=ter-132n"), \ + "archsetup should set FONT=ter-132n in /etc/vconsole.conf" + # On non-ZFS it also rebuilds the initramfs (mkinitcpio -P) so the font is + # baked in for early boot. On ZFS that rebuild is skipped (the busybox ZFS + # hook is incompatible with the systemd-hook switch), so the font applies at + # the vconsole layer once userspace starts, not inside the initramfs. + if zfs_root: + return + # Pick the main initramfs (this fleet runs linux-lts, so the name is + # initramfs-linux-lts.img, not initramfs-linux.img); skip the fallback image. + img = host.run( + "ls /boot/initramfs-*.img 2>/dev/null | grep -v fallback | head -1" + ).stdout.strip() + assert img, "no initramfs image found under /boot" + out = host.run("lsinitcpio %s 2>/dev/null | grep -cE 'consolefont.psf|ter-'" % img) + assert int((out.stdout.strip() or "0")) > 0, "console font not found in %s" % img + + +def test_nvme_module_when_nvme_present(host, has_nvme): + if not has_nvme: + pytest.skip("no NVMe device present") + modules = host.run("grep '^MODULES=' /etc/mkinitcpio.conf").stdout + assert "nvme" in modules, "NVMe system should list nvme in mkinitcpio MODULES" + + +def test_zfs_has_sanoid(host): + if not host.exists("zfs"): + pytest.skip("ZFS not installed (non-ZFS system)") + assert host.exists("sanoid"), "ZFS system should have sanoid installed" diff --git a/scripts/testing/tests/test_config_applied.py b/scripts/testing/tests/test_config_applied.py new file mode 100644 index 0000000..00c410e --- /dev/null +++ b/scripts/testing/tests/test_config_applied.py @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +"""Post-install checks: archsetup's in-place config edits actually took effect. + +Expansion coverage (P4). These assert the *content* archsetup writes, not just +that a service is enabled — catching cases where a sed silently no-ops (e.g. +ParallelDownloads, which current Arch ships uncommented so a "^#"-only match +left it at 5). +""" + +import pytest + + +@pytest.mark.attribution("archsetup") +def test_pacman_parallel_downloads(host): + line = host.run("grep -E '^ParallelDownloads' /etc/pacman.conf").stdout + assert "ParallelDownloads = 10" in line, "ParallelDownloads not set to 10 (got: %r)" % line + + +@pytest.mark.attribution("archsetup") +def test_pacman_color_enabled(host): + assert host.run("grep -qx Color /etc/pacman.conf").rc == 0 + + +@pytest.mark.attribution("archsetup") +def test_pacman_multilib_enabled(host): + # -F: [multilib] is a literal section header, not a regex character class. + assert host.run("grep -Fxq '[multilib]' /etc/pacman.conf").rc == 0 + + +@pytest.mark.attribution("archsetup") +def test_makepkg_parallel_make(host): + line = host.run("grep -E '^MAKEFLAGS' /etc/makepkg.conf").stdout + assert "-j" in line, "MAKEFLAGS not configured for parallel make (got: %r)" % line + + +@pytest.mark.attribution("archsetup") +def test_makepkg_options_trimmed(host): + opts = host.run("grep -E '^OPTIONS' /etc/makepkg.conf").stdout + assert "!debug" in opts and "purge" in opts, "makepkg OPTIONS not customized" + + +@pytest.mark.attribution("archsetup") +@pytest.mark.parametrize("rel", ["dns.conf", "wifi-privacy.conf"]) +def test_networkmanager_dropin(host, rel): + assert host.file("/etc/NetworkManager/conf.d/%s" % rel).exists + + +@pytest.mark.attribution("archsetup") +def test_fail2ban_jail_local(host): + assert host.file("/etc/fail2ban/jail.local").exists + + +@pytest.mark.attribution("archsetup") +def test_reflector_config(host): + assert host.file("/etc/xdg/reflector/reflector.conf").exists diff --git a/scripts/testing/tests/test_desktop.py b/scripts/testing/tests/test_desktop.py new file mode 100644 index 0000000..c02d2b6 --- /dev/null +++ b/scripts/testing/tests/test_desktop.py @@ -0,0 +1,111 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +"""Post-install checks: window manager + desktop integration. + +Parity port of validate_window_manager and its Hyprland/DWM branches, plus +validate_autologin_config. Hyprland and DWM checks are gated on which DE the +run installed; socket/portal-query checks are gated on a live compositor (the +headless test VM has none). + +Note: validate_hyprland_tools historically checked `swww`, but archsetup now +installs `awww` (swww successor) and `pacman -Q swww` no longer matches — so +this checks awww. That divergence from the shell sweep is a correctness fix. +""" + +import pytest + + +HYPRLAND_TOOLS = [ + "hyprland", "hypridle", "hyprlock", "waybar", "fuzzel", + "awww", "grim", "slurp", "gammastep", "foot", +] + +HYPRLAND_CONFIGS = [ + ".config/hypr/hyprland.conf", + ".config/hypr/hypridle.conf", + ".config/hypr/hyprlock.conf", + ".config/waybar/config", + ".config/fuzzel/fuzzel.ini", + ".config/gammastep/config.ini", +] + +SUCKLESS_TOOLS = ["dwm", "st", "dmenu", "slock"] + +PORTALS_CONF = ".config/xdg-desktop-portal/portals.conf" + + +@pytest.mark.attribution("archsetup") +@pytest.mark.parametrize("pkg", HYPRLAND_TOOLS) +def test_hyprland_tool_installed(host, hyprland_installed, pkg): + if not hyprland_installed: + pytest.skip("Hyprland not installed (DESKTOP_ENV != hyprland)") + assert host.package(pkg).is_installed, "%s missing" % pkg + + +@pytest.mark.attribution("archsetup") +@pytest.mark.parametrize("rel", HYPRLAND_CONFIGS) +def test_hyprland_config_present(host, hyprland_installed, home, rel): + if not hyprland_installed: + pytest.skip("Hyprland not installed (DESKTOP_ENV != hyprland)") + assert host.file("%s/%s" % (home, rel)).exists, "%s missing" % rel + + +@pytest.mark.attribution("archsetup") +def test_live_update_guard_installed(host, hyprland_installed): + if not hyprland_installed: + pytest.skip("Hyprland not installed (DESKTOP_ENV != hyprland)") + guard = host.file("/usr/local/bin/hypr-live-update-guard") + assert guard.exists, "live-update guard script missing" + assert guard.mode & 0o111, "live-update guard not executable" + hook = host.file("/etc/pacman.d/hooks/hypr-live-update-guard.hook") + assert hook.exists, "live-update guard pacman hook missing" + assert "hypr-live-update-guard" in hook.content_string, \ + "hook does not invoke the guard script" + + +@pytest.mark.attribution("archsetup") +def test_portal_settings_backend_not_disabled(host, hyprland_installed, home): + if not hyprland_installed: + pytest.skip("Hyprland not installed") + conf = host.file("%s/%s" % (home, PORTALS_CONF)) + assert conf.exists, "portals.conf missing" + line = host.run( + "grep org.freedesktop.impl.portal.Settings %s" % conf.path + ).stdout + assert "=none" not in line.replace(" ", ""), "Settings portal disabled (=none)" + + +def test_portal_returns_dark_mode(host, hyprland_installed, compositor_running, target_user): + if not hyprland_installed: + pytest.skip("Hyprland not installed") + if not compositor_running: + pytest.skip("no compositor running (headless) — portal query not applicable") + cmd = ( + "sudo -u %s busctl --user call org.freedesktop.portal.Desktop " + "/org/freedesktop/portal/desktop org.freedesktop.portal.Settings Read " + "'ss' 'org.freedesktop.appearance' 'color-scheme'" % target_user + ) + out = host.run(cmd).stdout + assert "u 1" in out, "Settings portal should report color-scheme=1 (dark)" + + +def test_hyprland_socket(host, hyprland_installed, compositor_running): + if not hyprland_installed: + pytest.skip("Hyprland not installed") + if not compositor_running: + pytest.skip("Hyprland not running (headless) — socket check not applicable") + assert host.run("test -S /tmp/hypr/*/.socket.sock").rc == 0 + + +@pytest.mark.attribution("archsetup") +@pytest.mark.parametrize("tool", SUCKLESS_TOOLS) +def test_suckless_tool_installed(host, dwm_installed, tool): + if not dwm_installed: + pytest.skip("DWM not installed (DESKTOP_ENV != dwm)") + assert host.file("/usr/local/bin/%s" % tool).exists, "%s missing" % tool + + +def test_autologin_configured(host): + conf = host.file("/etc/systemd/system/getty@tty1.service.d/autologin.conf") + if not conf.exists: + pytest.skip("autologin not configured (AUTOLOGIN=no, may be intentional)") + assert conf.exists diff --git a/scripts/testing/tests/test_dotfiles.py b/scripts/testing/tests/test_dotfiles.py new file mode 100644 index 0000000..cd6e474 --- /dev/null +++ b/scripts/testing/tests/test_dotfiles.py @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +"""Post-install checks: dotfiles stowed for the user. + +Parity port of validate_dotfiles from validation.sh: .zshrc must be a symlink +into the ~/.dotfiles stow tree, not broken, and readable by the user (not just +root). +""" + +import pytest + + +@pytest.mark.attribution("archsetup") +def test_zshrc_stowed_and_readable(host, target_user): + zshrc = host.file("/home/%s/.zshrc" % target_user) + assert zshrc.is_symlink, ".zshrc should be a stow symlink" + assert ".dotfiles/" in zshrc.linked_to, "symlink should point into ~/.dotfiles" + assert zshrc.exists, "symlink target must exist (not broken)" + # Readable by the user, not only root. + assert host.run("sudo -u %s test -r %s" % (target_user, zshrc.path)).rc == 0 diff --git a/scripts/testing/tests/test_hardening.py b/scripts/testing/tests/test_hardening.py new file mode 100644 index 0000000..f12b0e6 --- /dev/null +++ b/scripts/testing/tests/test_hardening.py @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +"""Post-install checks: security/system hardening archsetup applies. + +Expansion coverage (P4) — these were not in the original shell sweep. They +assert the system-level changes archsetup makes in place: sshd root hardening, +quiet kernel console, an emptied /etc/issue, the console font, and the EFI +mount permission tightening. +""" + +import pytest + + +@pytest.mark.smoke +@pytest.mark.attribution("archsetup") +def test_sshd_root_prohibit_password(host): + conf = host.file("/etc/ssh/sshd_config.d/10-hardening.conf") + assert conf.exists, "sshd hardening drop-in missing" + assert "PermitRootLogin prohibit-password" in conf.content_string + + +@pytest.mark.attribution("archsetup") +def test_quiet_printk_sysctl(host): + conf = host.file("/etc/sysctl.d/20-quiet-printk.conf") + assert conf.exists + assert "kernel.printk" in conf.content_string + + +@pytest.mark.attribution("archsetup") +def test_issue_emptied(host): + # archsetup truncates /etc/issue to drop the distro/date banner. + assert host.file("/etc/issue").size == 0 + + +@pytest.mark.attribution("archsetup") +def test_console_font_configured(host): + assert "ter-132n" in host.file("/etc/vconsole.conf").content_string + + +@pytest.mark.attribution("archsetup") +def test_efi_mount_permissions_tightened(host): + # archsetup adds fmask/dmask to the /efi vfat line so it isn't world-readable. + fstab = host.file("/etc/fstab").content_string + efi_lines = [ + ln for ln in fstab.splitlines() + if ln.strip() and not ln.lstrip().startswith("#") + and " /efi " in ln and " vfat " in ln + ] + if not efi_lines: + pytest.skip("no /efi vfat line in fstab") + assert all("fmask=" in ln for ln in efi_lines), "/efi mount not permission-tightened" diff --git a/scripts/testing/tests/test_keyring.py b/scripts/testing/tests/test_keyring.py new file mode 100644 index 0000000..99d322d --- /dev/null +++ b/scripts/testing/tests/test_keyring.py @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +"""Post-install checks: gnome-keyring pre-configuration. + +Parity port of validate_gnome_keyring_setup: the keyrings dir must exist, be +mode 700, owned by the user, with the default keyring set to "login". +""" + +import pytest + + +@pytest.fixture(scope="session") +def keyring_dir(home): + return "%s/.local/share/keyrings" % home + + +@pytest.mark.attribution("archsetup") +def test_keyring_dir_exists(host, keyring_dir): + assert host.file(keyring_dir).is_directory + + +@pytest.mark.attribution("archsetup") +def test_keyring_dir_mode_700(host, keyring_dir): + assert host.file(keyring_dir).mode == 0o700 + + +@pytest.mark.attribution("archsetup") +def test_keyring_dir_owned_by_user(host, keyring_dir, target_user): + assert host.file(keyring_dir).user == target_user + + +@pytest.mark.attribution("archsetup") +def test_default_keyring_is_login(host, keyring_dir): + default = host.file("%s/default" % keyring_dir) + assert default.exists, "default keyring file missing" + assert default.content_string.strip() == "login" diff --git a/scripts/testing/tests/test_packages.py b/scripts/testing/tests/test_packages.py new file mode 100644 index 0000000..f237088 --- /dev/null +++ b/scripts/testing/tests/test_packages.py @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +"""Post-install checks: package managers and key packages. + +Parity port of validate_yay_installed, validate_pacman_working, +validate_terminus_font, validate_emacs, validate_git_config, validate_dev_tools. +""" + +import pytest + + +DEV_TOOLS = ["python", "node", "npm", "go", "rustc"] + + +@pytest.mark.smoke +@pytest.mark.attribution("archsetup") +def test_yay_installed(host): + assert host.exists("yay"), "yay binary not on PATH" + + +@pytest.mark.attribution("archsetup") +def test_yay_functional(host, target_user): + # yay must actually query the package DB as the user, not just exist. + assert host.run("sudo -u %s yay -Qi yay" % target_user).rc == 0 + + +@pytest.mark.smoke +@pytest.mark.attribution("unknown") +def test_pacman_functional(host): + assert host.package("base").is_installed + + +@pytest.mark.attribution("archsetup") +def test_terminus_font_installed(host): + assert host.package("terminus-font").is_installed + + +@pytest.mark.attribution("archsetup") +def test_emacs_installed(host): + assert host.exists("emacs") + + +@pytest.mark.attribution("archsetup") +def test_emacs_config_readable_by_user(host, target_user, home): + emacsd = host.file("%s/.emacs.d" % home) + if not emacsd.exists: + pytest.skip(".emacs.d not present (config dir optional on some profiles)") + assert emacsd.is_directory + assert host.run("sudo -u %s ls %s" % (target_user, emacsd.path)).rc == 0 + + +@pytest.mark.smoke +@pytest.mark.attribution("archsetup") +def test_git_installed(host): + assert host.exists("git") + + +@pytest.mark.attribution("archsetup") +@pytest.mark.parametrize("tool", DEV_TOOLS) +def test_dev_tool_present(host, tool): + assert host.exists(tool), "dev tool %s missing from PATH" % tool diff --git a/scripts/testing/tests/test_services.py b/scripts/testing/tests/test_services.py new file mode 100644 index 0000000..0ca3970 --- /dev/null +++ b/scripts/testing/tests/test_services.py @@ -0,0 +1,103 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +"""Post-install checks: services, timers, and their functional health. + +Parity port of validate_firewall, validate_dns_config, validate_avahi, +validate_fail2ban, validate_networkmanager, and validate_all_services / +validate_service_functions. + +Mapping of the shell sweep's three outcomes: + - validation_fail (hard) -> assert + - validation_warn (soft) -> pytest.skip with the reason (visible, never red) + - validation_skip (precond)-> pytest.skip gated on a fixture +""" + +import pytest + + +# Required services: (name, must_be_active). ufw can't activate in the VM (no +# iptables kernel modules), so it's enabled-only; cronie/atd are enabled-only too. +REQUIRED_ENABLED_ACTIVE = ["sshd", "systemd-resolved", "fail2ban", "NetworkManager", "rngd"] +REQUIRED_ENABLED_ONLY = ["ufw", "cronie", "atd"] +REQUIRED_TIMERS = ["reflector.timer", "paccache.timer"] +OPTIONAL_SERVICES = ["avahi-daemon", "bluetooth", "cups", "docker", "tailscaled"] + + +@pytest.mark.attribution("archsetup") +@pytest.mark.parametrize("svc", REQUIRED_ENABLED_ACTIVE) +def test_required_service_enabled_and_active(host, svc): + s = host.service(svc) + assert s.is_enabled, "%s should be enabled" % svc + assert s.is_running, "%s should be active" % svc + + +@pytest.mark.smoke +@pytest.mark.attribution("archsetup") +@pytest.mark.parametrize("svc", REQUIRED_ENABLED_ONLY) +def test_required_service_enabled(host, svc): + assert host.service(svc).is_enabled, "%s should be enabled" % svc + + +@pytest.mark.attribution("archsetup") +@pytest.mark.parametrize("timer", REQUIRED_TIMERS) +def test_required_timer_enabled(host, timer): + assert host.service(timer).is_enabled, "%s should be enabled" % timer + + +@pytest.mark.parametrize("svc", OPTIONAL_SERVICES) +def test_optional_service(host, svc): + # Optional: warn-if-missing in the shell sweep -> skip here so it never reds. + if not host.service(svc).is_enabled: + pytest.skip("%s not enabled (optional)" % svc) + + +@pytest.mark.attribution("archsetup") +def test_dns_over_tls_dropin_present(host): + # archsetup ships /etc/systemd/resolved.conf.d/dns-over-tls.conf. + assert host.file("/etc/systemd/resolved.conf.d/dns-over-tls.conf").exists + + +@pytest.mark.attribution("archsetup") +def test_fail2ban_responds(host): + assert host.run("fail2ban-client status").rc == 0 + + +@pytest.mark.attribution("archsetup") +def test_networkmanager_responds(host): + assert host.run("nmcli general status").rc == 0 + + +@pytest.mark.attribution("archsetup") +def test_log_cleanup_cron_installed(host, target_user): + out = host.run("sudo -u %s crontab -l" % target_user).stdout + assert "log-cleanup" in out, "log-cleanup entry missing from user crontab" + + +@pytest.mark.attribution("archsetup") +def test_syncthing_user_lingering_enabled(host, target_user): + # syncthing runs as a user service; lingering must be on for autostart. + assert host.file("/var/lib/systemd/linger/%s" % target_user).exists + + +def test_dns_resolution(host): + # Network-dependent; advisory in the shell sweep. Skip on failure. + if host.run("resolvectl query archlinux.org").rc != 0: + pytest.skip("DNS resolution query failed (network-dependent)") + + +def test_mdns_resolves(host, on_slirp): + # mDNS needs multicast, which QEMU slirp doesn't pass. + if on_slirp: + pytest.skip("mDNS not possible on slirp networking (no multicast)") + if not host.service("avahi-daemon").is_enabled: + pytest.skip("avahi-daemon not enabled") + hostname = host.run("hostname").stdout.strip() + assert host.run("ping -c 1 -W 2 %s.local" % hostname).rc == 0 + + +def test_docker_functional(host): + if not host.service("docker").is_enabled: + pytest.skip("docker not enabled") + if not host.service("docker").is_running: + # archsetup enables docker for next boot, not --now; pre-reboot this is correct. + pytest.skip("docker enabled but not started (starts on boot by design)") + assert host.run("docker info").rc == 0 diff --git a/scripts/testing/tests/test_users.py b/scripts/testing/tests/test_users.py new file mode 100644 index 0000000..c0097ed --- /dev/null +++ b/scripts/testing/tests/test_users.py @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +"""Post-install checks: the user account archsetup creates. + +Parity port of validate_user_created / validate_user_shell / validate_user_groups. +""" + +import pytest + + +# Groups archsetup adds: wheel (useradd -G), the usermod -aG set, and docker +# (added later in the developer-workstation step). +EXPECTED_GROUPS = [ + "wheel", "sys", "adm", "network", "scanner", "power", "uucp", + "audio", "lp", "rfkill", "video", "storage", "optical", "users", "docker", +] + + +@pytest.mark.smoke +@pytest.mark.attribution("archsetup") +def test_user_exists(host, target_user): + assert host.user(target_user).exists + + +@pytest.mark.attribution("archsetup") +def test_user_shell_is_zsh(host, target_user): + # archsetup may set either path depending on how zsh resolves. + assert host.user(target_user).shell in ("/bin/zsh", "/usr/bin/zsh") + + +@pytest.mark.attribution("archsetup") +@pytest.mark.parametrize("group", EXPECTED_GROUPS) +def test_user_in_group(host, target_user, group): + # Parametrized so a failure names the exact missing group. + assert group in host.user(target_user).groups diff --git a/scripts/wipedisk b/scripts/wipedisk index 0c08c72..b833407 100644 --- a/scripts/wipedisk +++ b/scripts/wipedisk @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# SPDX-License-Identifier: GPL-3.0-or-later # Craig Jennings <c@cjennings.net> # identify disk and erase diff --git a/scripts/zfs-replicate b/scripts/zfs-replicate index cf946f1..02ffcf5 100755 --- a/scripts/zfs-replicate +++ b/scripts/zfs-replicate @@ -1,4 +1,5 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later # zfs-replicate - Replicate ZFS datasets to TrueNAS # # Usage: |
