aboutsummaryrefslogtreecommitdiff
path: root/scripts/testing/lib
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/testing/lib')
-rwxr-xr-xscripts/testing/lib/finalize-base-vm.sh21
-rwxr-xr-xscripts/testing/lib/logging.sh151
-rw-r--r--scripts/testing/lib/network-diagnostics.sh60
-rw-r--r--scripts/testing/lib/validation.sh1080
-rwxr-xr-xscripts/testing/lib/vm-utils.sh321
5 files changed, 1633 insertions, 0 deletions
diff --git a/scripts/testing/lib/finalize-base-vm.sh b/scripts/testing/lib/finalize-base-vm.sh
new file mode 100755
index 0000000..e3913ea
--- /dev/null
+++ b/scripts/testing/lib/finalize-base-vm.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Finalize base VM after installation
+VM_NAME="archsetup-base"
+echo "[i] Removing ISO from VM..."
+virsh change-media $VM_NAME sda --eject 2>/dev/null || true
+virsh change-media $VM_NAME hda --eject 2>/dev/null || true
+echo "[✓] ISO removed"
+echo "[i] Starting VM from installed system..."
+virsh start $VM_NAME
+echo "[i] Waiting for boot..."
+sleep 30
+IP=$(virsh domifaddr $VM_NAME 2>/dev/null | grep -oP '(\d+\.){3}\d+' | head -1)
+echo "[✓] Base VM is ready!"
+echo ""
+echo "Connect via:"
+echo " Console: virsh console $VM_NAME"
+echo " SSH: ssh root@$IP"
+echo " Password: archsetup"
+echo ""
+echo "To create a test clone:"
+echo " ./scripts/testing/run-test.sh"
diff --git a/scripts/testing/lib/logging.sh b/scripts/testing/lib/logging.sh
new file mode 100755
index 0000000..eda9eb1
--- /dev/null
+++ b/scripts/testing/lib/logging.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# Logging utilities for archsetup testing
+# Author: Craig Jennings <craigmartinjennings@gmail.com>
+# License: GNU GPLv3
+
+# Global log file (set by calling script)
+LOGFILE="${LOGFILE:-/tmp/archsetup-test.log}"
+
+# Initialize logging
+init_logging() {
+ local logfile="$1"
+ LOGFILE="$logfile"
+
+ # Create log directory if it doesn't exist
+ mkdir -p "$(dirname "$LOGFILE")"
+
+ # Initialize log file
+ echo "=== Test Log Started: $(date +'%Y-%m-%d %H:%M:%S') ===" > "$LOGFILE"
+ echo "" >> "$LOGFILE"
+}
+
+# Log message (to file and optionally stdout)
+log() {
+ local message="$1"
+ local timestamp
+ timestamp=$(date +'%Y-%m-%d %H:%M:%S')
+ echo "[$timestamp] $message" >> "$LOGFILE"
+}
+
+# Info message
+info() {
+ local message="$1"
+ echo "[i] $message"
+ log "INFO: $message"
+}
+
+# Success message
+success() {
+ local message="$1"
+ echo "[✓] $message"
+ log "SUCCESS: $message"
+}
+
+# Warning message
+warn() {
+ local message="$1"
+ echo "[!] $message"
+ log "WARNING: $message"
+}
+
+# Error message
+error() {
+ local message="$1"
+ echo "[✗] $message" >&2
+ log "ERROR: $message"
+}
+
+# Fatal error (exits script)
+fatal() {
+ local message="$1"
+ local exit_code="${2:-1}"
+ echo "[✗] FATAL: $message" >&2
+ log "FATAL: $message (exit code: $exit_code)"
+ exit "$exit_code"
+}
+
+# Section header
+section() {
+ local title="$1"
+ echo ""
+ echo "=== $title ==="
+ log "=== $title ==="
+}
+
+# Step message
+step() {
+ local message="$1"
+ echo " -> $message"
+ log " STEP: $message"
+}
+
+# Progress indicator (for long-running operations)
+progress() {
+ local message="$1"
+ echo " ... $message"
+ log " PROGRESS: $message"
+}
+
+# Clear progress line and show completion
+complete() {
+ local message="$1"
+ echo " [✓] $message"
+ log " COMPLETE: $message"
+}
+
+# Show command being executed (useful for debugging)
+show_cmd() {
+ local cmd="$1"
+ echo "$ $cmd"
+ log "CMD: $cmd"
+}
+
+# Separator line
+separator() {
+ echo "----------------------------------------"
+}
+
+# Summary statistics
+summary() {
+ local passed="$1"
+ local failed="$2"
+ local total=$((passed + failed))
+
+ echo ""
+ separator
+ section "Test Summary"
+ echo " Total: $total"
+ echo " Passed: $passed"
+ echo " Failed: $failed"
+ separator
+ echo ""
+
+ log "=== Test Summary ==="
+ log "Total: $total, Passed: $passed, Failed: $failed"
+}
+
+# Timer utilities
+declare -A TIMERS
+
+start_timer() {
+ local name="${1:-default}"
+ TIMERS[$name]=$(date +%s)
+ log "TIMER START: $name"
+}
+
+stop_timer() {
+ local name="${1:-default}"
+ local start=${TIMERS[$name]}
+ local end=$(date +%s)
+ local duration=$((end - start))
+ local mins=$((duration / 60))
+ local secs=$((duration % 60))
+
+ if [ $mins -gt 0 ]; then
+ echo " Time: ${mins}m ${secs}s"
+ log "TIMER STOP: $name (${mins}m ${secs}s)"
+ else
+ echo " Time: ${secs}s"
+ log "TIMER STOP: $name (${secs}s)"
+ fi
+}
diff --git a/scripts/testing/lib/network-diagnostics.sh b/scripts/testing/lib/network-diagnostics.sh
new file mode 100644
index 0000000..3f9735b
--- /dev/null
+++ b/scripts/testing/lib/network-diagnostics.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# Network diagnostics for VM testing
+# Author: Craig Jennings <craigmartinjennings@gmail.com>
+# License: GNU GPLv3
+
+# Note: logging.sh should already be sourced by the calling script
+
+# Run quick network diagnostics
+# Args: $1 = VM IP address or hostname
+run_network_diagnostics() {
+ local vm_host="$1"
+
+ section "Pre-flight Network Diagnostics"
+
+ # Test 1: Basic connectivity
+ step "Testing internet connectivity"
+ if sshpass -p 'archsetup' ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@$vm_host "ping -c 3 8.8.8.8 >/dev/null 2>&1"; then
+ success "Internet connectivity OK"
+ else
+ error "No internet connectivity"
+ return 1
+ fi
+
+ # Test 2: DNS resolution
+ step "Testing DNS resolution"
+ if sshpass -p 'archsetup' ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@$vm_host "nslookup archlinux.org >/dev/null 2>&1"; then
+ success "DNS resolution OK"
+ else
+ error "DNS resolution failed"
+ return 1
+ fi
+
+ # Test 3: Arch mirror accessibility
+ step "Testing Arch mirror access"
+ if sshpass -p 'archsetup' ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@$vm_host "curl -s -I https://mirrors.kernel.org/archlinux/ | head -1 | grep -qE '(200|301)'"; then
+ success "Arch mirrors accessible"
+ else
+ error "Cannot reach Arch mirrors"
+ return 1
+ fi
+
+ # Test 4: AUR accessibility
+ step "Testing AUR access"
+ if sshpass -p 'archsetup' ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@$vm_host "curl -s -I https://aur.archlinux.org/ | head -1 | grep -qE '(200|405)'"; then
+ success "AUR accessible"
+ else
+ error "Cannot reach AUR"
+ return 1
+ fi
+
+ # Show network info
+ info "Network configuration:"
+ sshpass -p 'archsetup' ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@$vm_host \
+ "ip addr show | grep 'inet ' | grep -v '127.0.0.1'" 2>/dev/null | while read line; do
+ info " $line"
+ done
+
+ success "Network diagnostics complete"
+ return 0
+}
diff --git a/scripts/testing/lib/validation.sh b/scripts/testing/lib/validation.sh
new file mode 100644
index 0000000..c0a5d43
--- /dev/null
+++ b/scripts/testing/lib/validation.sh
@@ -0,0 +1,1080 @@
+#!/bin/bash
+# Validation utilities for archsetup testing
+# Author: Craig Jennings <craigmartinjennings@gmail.com>
+# License: GNU GPLv3
+#
+# This module provides comprehensive validation checks for archsetup installations.
+# It captures pre-install state, runs post-install validations, and attributes
+# issues to either archsetup or the base install (archzfs/vanilla Arch).
+
+# Validation counters
+VALIDATION_PASSED=0
+VALIDATION_FAILED=0
+VALIDATION_WARNINGS=0
+
+# Arrays to track issues
+declare -a ARCHSETUP_ISSUES
+declare -a BASE_INSTALL_ISSUES
+declare -a UNKNOWN_ISSUES
+
+# SSH helper (uses globals: VM_IP, ROOT_PASSWORD)
+ssh_cmd() {
+ sshpass -p "$ROOT_PASSWORD" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+ -o ConnectTimeout=10 "root@$VM_IP" "$@" 2>/dev/null
+}
+
+# Validation result helpers
+validation_pass() {
+ local test_name="$1"
+ success "$test_name"
+ ((VALIDATION_PASSED++)) || true
+}
+
+validation_fail() {
+ local test_name="$1"
+ local details="${2:-}"
+ error "$test_name"
+ [ -n "$details" ] && info " Details: $details"
+ ((VALIDATION_FAILED++)) || true
+}
+
+validation_warn() {
+ local test_name="$1"
+ local details="${2:-}"
+ warn "$test_name"
+ [ -n "$details" ] && info " Details: $details"
+ ((VALIDATION_WARNINGS++)) || true
+}
+
+# Attribute an issue to archsetup or base install
+attribute_issue() {
+ local issue="$1"
+ local source="$2" # "archsetup", "base", or "unknown"
+
+ case "$source" in
+ archsetup)
+ ARCHSETUP_ISSUES+=("$issue")
+ ;;
+ base)
+ BASE_INSTALL_ISSUES+=("$issue")
+ ;;
+ *)
+ UNKNOWN_ISSUES+=("$issue")
+ ;;
+ esac
+}
+
+#=============================================================================
+# PRE-INSTALL LOG CAPTURE
+#=============================================================================
+
+capture_pre_install_state() {
+ local output_dir="$1"
+
+ section "Capturing Pre-Install State"
+ mkdir -p "$output_dir/pre-install"
+
+ step "Capturing system logs before archsetup"
+
+ # Capture journal
+ ssh_cmd "journalctl -b --no-pager" > "$output_dir/pre-install/journal.log" 2>&1 || true
+
+ # Capture dmesg
+ ssh_cmd "dmesg" > "$output_dir/pre-install/dmesg.log" 2>&1 || true
+
+ # Capture package list
+ ssh_cmd "pacman -Q" > "$output_dir/pre-install/packages.txt" 2>&1 || true
+
+ # Capture service status
+ ssh_cmd "systemctl list-units --type=service --all" > "$output_dir/pre-install/services.txt" 2>&1 || true
+
+ # Capture failed services
+ ssh_cmd "systemctl --failed" > "$output_dir/pre-install/failed-services.txt" 2>&1 || true
+
+ # Capture existing errors in logs
+ ssh_cmd "journalctl -b -p err --no-pager" > "$output_dir/pre-install/errors.log" 2>&1 || true
+
+ # Count pre-existing errors
+ PRE_INSTALL_ERROR_COUNT=$(wc -l < "$output_dir/pre-install/errors.log" 2>/dev/null || echo 0)
+
+ success "Pre-install state captured ($PRE_INSTALL_ERROR_COUNT pre-existing error lines)"
+}
+
+#=============================================================================
+# POST-INSTALL LOG CAPTURE
+#=============================================================================
+
+capture_post_install_state() {
+ local output_dir="$1"
+
+ section "Capturing Post-Install State"
+ mkdir -p "$output_dir/post-install"
+
+ step "Capturing system logs after archsetup"
+
+ # Capture journal
+ ssh_cmd "journalctl -b --no-pager" > "$output_dir/post-install/journal.log" 2>&1 || true
+
+ # Capture dmesg
+ ssh_cmd "dmesg" > "$output_dir/post-install/dmesg.log" 2>&1 || true
+
+ # Capture package list
+ ssh_cmd "pacman -Q" > "$output_dir/post-install/packages.txt" 2>&1 || true
+
+ # Capture service status
+ ssh_cmd "systemctl list-units --type=service --all" > "$output_dir/post-install/services.txt" 2>&1 || true
+
+ # Capture failed services
+ ssh_cmd "systemctl --failed" > "$output_dir/post-install/failed-services.txt" 2>&1 || true
+
+ # Capture all errors
+ ssh_cmd "journalctl -b -p err --no-pager" > "$output_dir/post-install/errors.log" 2>&1 || true
+
+ # Capture archsetup log
+ ssh_cmd "cat /var/log/archsetup-*.log 2>/dev/null" > "$output_dir/post-install/archsetup.log" 2>&1 || true
+
+ success "Post-install state captured"
+}
+
+#=============================================================================
+# LOG DIFF ANALYSIS
+#=============================================================================
+
+analyze_log_diff() {
+ local output_dir="$1"
+
+ section "Analyzing Log Differences"
+ mkdir -p "$output_dir/analysis"
+
+ step "Comparing pre and post install errors"
+
+ # Find new errors (in post but not in pre)
+ if [ -f "$output_dir/pre-install/errors.log" ] && [ -f "$output_dir/post-install/errors.log" ]; then
+ comm -13 <(sort "$output_dir/pre-install/errors.log") <(sort "$output_dir/post-install/errors.log") \
+ > "$output_dir/analysis/new-errors.log" 2>/dev/null || true
+
+ NEW_ERROR_COUNT=$(wc -l < "$output_dir/analysis/new-errors.log" 2>/dev/null || echo 0)
+
+ if [ "$NEW_ERROR_COUNT" -gt 0 ]; then
+ warn "Found $NEW_ERROR_COUNT new error lines after archsetup"
+ # Categorize errors
+ categorize_errors "$output_dir/analysis/new-errors.log" "$output_dir/analysis"
+ else
+ success "No new errors introduced by archsetup"
+ fi
+ fi
+
+ step "Checking for new failed services"
+
+ # Compare failed services
+ if [ -f "$output_dir/pre-install/failed-services.txt" ] && [ -f "$output_dir/post-install/failed-services.txt" ]; then
+ local pre_failed post_failed
+ pre_failed=$(grep -c "failed" "$output_dir/pre-install/failed-services.txt" 2>/dev/null | tr -d '[:space:]')
+ post_failed=$(grep -c "failed" "$output_dir/post-install/failed-services.txt" 2>/dev/null | tr -d '[:space:]')
+ # Default to 0 if empty
+ pre_failed=${pre_failed:-0}
+ post_failed=${post_failed:-0}
+
+ if [ "$post_failed" -gt "$pre_failed" ]; then
+ warn "New failed services detected (before: $pre_failed, after: $post_failed)"
+ diff "$output_dir/pre-install/failed-services.txt" "$output_dir/post-install/failed-services.txt" \
+ > "$output_dir/analysis/failed-services-diff.txt" 2>/dev/null || true
+ else
+ success "No new service failures"
+ fi
+ fi
+
+ step "Counting new packages installed"
+
+ if [ -f "$output_dir/pre-install/packages.txt" ] && [ -f "$output_dir/post-install/packages.txt" ]; then
+ comm -13 <(sort "$output_dir/pre-install/packages.txt") <(sort "$output_dir/post-install/packages.txt") \
+ > "$output_dir/analysis/new-packages.txt" 2>/dev/null || true
+
+ local new_pkg_count=$(wc -l < "$output_dir/analysis/new-packages.txt" 2>/dev/null || echo 0)
+ info "Installed $new_pkg_count new packages"
+ fi
+}
+
+categorize_errors() {
+ local error_log="$1"
+ local output_dir="$2"
+
+ # Known benign errors/warnings to ignore
+ local -a BENIGN_PATTERNS=(
+ "SPL:.*module verification failed"
+ "ZFS:.*module verification failed"
+ "tainting kernel"
+ "RAS:.*Correctable Errors"
+ "ACPI.*AE_NOT_FOUND"
+ "firmware.*regulatory"
+ "Invalid user name.*in service file" # dbus-broker timing during package install
+ )
+
+ # Patterns that indicate archsetup issues
+ local -a ARCHSETUP_PATTERNS=(
+ "archsetup"
+ "stow"
+ "yay"
+ "makepkg"
+ "pacman.*error"
+ )
+
+ # Filter and categorize
+ while IFS= read -r line; do
+ local is_benign=false
+ local is_archsetup=false
+
+ # Check if benign
+ for pattern in "${BENIGN_PATTERNS[@]}"; do
+ if echo "$line" | grep -qiE "$pattern"; then
+ is_benign=true
+ break
+ fi
+ done
+
+ if $is_benign; then
+ echo "$line" >> "$output_dir/benign-errors.log"
+ continue
+ fi
+
+ # Check if archsetup-related
+ for pattern in "${ARCHSETUP_PATTERNS[@]}"; do
+ if echo "$line" | grep -qiE "$pattern"; then
+ is_archsetup=true
+ break
+ fi
+ done
+
+ if $is_archsetup; then
+ echo "$line" >> "$output_dir/archsetup-errors.log"
+ attribute_issue "$line" "archsetup"
+ else
+ echo "$line" >> "$output_dir/base-install-errors.log"
+ attribute_issue "$line" "base"
+ fi
+ done < "$error_log"
+}
+
+#=============================================================================
+# VALIDATION CHECKS
+#=============================================================================
+
+run_all_validations() {
+ section "Running Validation Checks"
+
+ # User & Authentication
+ validate_user_created
+ validate_user_shell
+ validate_user_groups
+
+ # Dotfiles
+ validate_dotfiles
+
+ # Package Managers
+ validate_yay_installed
+ validate_pacman_working
+
+ # Window Manager (detects DWM or Hyprland automatically)
+ validate_window_manager
+
+ # Essential Services
+ validate_firewall
+ validate_dns_config
+ validate_avahi
+ validate_fail2ban
+ validate_networkmanager
+
+ # Developer Tools
+ validate_emacs
+ validate_git_config
+ validate_dev_tools
+
+ # System Configuration
+ validate_zfs_config
+ validate_boot_config
+ validate_autologin_config
+ validate_gnome_keyring_setup
+
+ # Boot & Initramfs (critical for ZFS systems)
+ validate_terminus_font
+ validate_mkinitcpio_hooks
+ validate_initramfs_consolefont
+ validate_nvme_module
+
+ # Archsetup Specific
+ validate_archsetup_log
+ validate_state_markers
+}
+
+#-----------------------------------------------------------------------------
+# User & Authentication Validations
+#-----------------------------------------------------------------------------
+
+validate_user_created() {
+ step "Checking if user 'cjennings' exists"
+ if ssh_cmd "id cjennings" &>> "$LOGFILE"; then
+ validation_pass "User cjennings exists"
+ else
+ validation_fail "User cjennings not found"
+ attribute_issue "User cjennings not created" "archsetup"
+ fi
+}
+
+validate_user_shell() {
+ step "Checking if ZSH is default shell"
+ local shell=$(ssh_cmd "getent passwd cjennings | cut -d: -f7")
+ if [ "$shell" = "/bin/zsh" ] || [ "$shell" = "/usr/bin/zsh" ]; then
+ validation_pass "ZSH is default shell"
+ else
+ validation_fail "ZSH not default shell (got: $shell)"
+ attribute_issue "ZSH not set as default shell" "archsetup"
+ fi
+}
+
+validate_user_groups() {
+ step "Checking user group memberships"
+ # Groups added by archsetup:
+ # - wheel (useradd -G wheel)
+ # - sys,adm,network,scanner,power,uucp,audio,lp,rfkill,video,storage,optical,users (usermod -aG)
+ # - docker (gpasswd -a, added later in developer_workstation)
+ local expected_groups="wheel sys adm network scanner power uucp audio lp rfkill video storage optical users docker"
+ local missing_groups=""
+
+ for group in $expected_groups; do
+ if ! ssh_cmd "groups cjennings" | grep -q "\b$group\b"; then
+ missing_groups="$missing_groups $group"
+ fi
+ done
+
+ if [ -z "$missing_groups" ]; then
+ validation_pass "User in all expected groups (15 groups)"
+ else
+ validation_fail "User missing groups:$missing_groups"
+ attribute_issue "User missing groups:$missing_groups" "archsetup"
+ fi
+}
+
+#-----------------------------------------------------------------------------
+# Dotfiles Validations
+#-----------------------------------------------------------------------------
+
+validate_dotfiles() {
+ step "Checking dotfiles setup"
+
+ # 1. Check if .zshrc is a symlink
+ if ! ssh_cmd "test -L /home/cjennings/.zshrc"; then
+ validation_fail "Dotfiles not stowed (.zshrc is not a symlink)"
+ attribute_issue "Dotfiles stow failed" "archsetup"
+ return 1
+ fi
+
+ # 2. Check symlink points to correct location
+ local target=$(ssh_cmd "readlink /home/cjennings/.zshrc")
+ local expected_pattern="code/archsetup/dotfiles/system/.zshrc"
+
+ if ! echo "$target" | grep -q "$expected_pattern"; then
+ validation_fail "Dotfiles symlink points to wrong location: $target"
+ attribute_issue "Dotfiles symlink incorrect: $target" "archsetup"
+ return 1
+ fi
+
+ # 3. Check the target file actually exists (not a broken symlink)
+ if ! ssh_cmd "test -f /home/cjennings/.zshrc"; then
+ validation_fail "Dotfiles symlink is broken (target doesn't exist)"
+ ssh_cmd "ls -la /home/cjennings/.zshrc" >> "$LOGFILE" 2>&1
+ attribute_issue "Dotfiles symlink broken" "archsetup"
+ return 1
+ fi
+
+ # 4. Check user can actually read the file (not just root)
+ local result=$(ssh_cmd "sudo -u cjennings cat /home/cjennings/.zshrc > /dev/null 2>&1 && echo OK || echo FAIL")
+ if [ "$result" != "OK" ]; then
+ validation_fail "Dotfiles not readable by user (permission issue)"
+ ssh_cmd "ls -la /home/cjennings/.zshrc" >> "$LOGFILE" 2>&1
+ attribute_issue "Dotfiles not readable by user" "archsetup"
+ return 1
+ fi
+
+ validation_pass "Dotfiles configured correctly (symlink to $target, readable by user)"
+}
+
+#-----------------------------------------------------------------------------
+# Package Manager Validations
+#-----------------------------------------------------------------------------
+
+validate_yay_installed() {
+ step "Checking if yay (AUR helper) is installed and functional"
+
+ # Check binary exists
+ if ! ssh_cmd "which yay" &>> "$LOGFILE"; then
+ validation_fail "yay not found"
+ attribute_issue "yay not installed" "archsetup"
+ return 1
+ fi
+
+ # Check yay can query packages (functional test)
+ if ssh_cmd "sudo -u cjennings yay -Qi yay" &>> "$LOGFILE"; then
+ validation_pass "yay is installed and functional"
+ else
+ validation_fail "yay binary exists but query failed"
+ attribute_issue "yay not functional" "archsetup"
+ fi
+}
+
+validate_pacman_working() {
+ step "Checking if pacman is functional"
+ if ssh_cmd "pacman -Qi base" &>> "$LOGFILE"; then
+ validation_pass "pacman is functional"
+ else
+ validation_fail "pacman query failed"
+ attribute_issue "pacman not functional" "unknown"
+ fi
+}
+
+#-----------------------------------------------------------------------------
+# Window Manager Validations
+#-----------------------------------------------------------------------------
+
+validate_suckless_tools() {
+ step "Checking suckless tools (dwm, st, dmenu, slock)"
+ local missing=""
+
+ for tool in dwm st dmenu slock; do
+ if ! ssh_cmd "test -f /usr/local/bin/$tool"; then
+ missing="$missing $tool"
+ fi
+ done
+
+ if [ -z "$missing" ]; then
+ validation_pass "All suckless tools installed (dwm, st, dmenu, slock)"
+ else
+ validation_fail "Missing suckless tools:$missing"
+ attribute_issue "Missing suckless tools:$missing" "archsetup"
+ fi
+}
+
+validate_hyprland_tools() {
+ step "Checking Hyprland tools"
+ local missing=""
+
+ # Check core Hyprland packages
+ for pkg in hyprland hypridle hyprlock waybar wofi swww grim slurp gammastep; do
+ if ! ssh_cmd "pacman -Q $pkg &>/dev/null"; then
+ missing="$missing $pkg"
+ fi
+ done
+
+ # st should still be installed (via XWayland)
+ if ! ssh_cmd "test -f /usr/local/bin/st"; then
+ missing="$missing st"
+ fi
+
+ if [ -z "$missing" ]; then
+ validation_pass "All Hyprland tools installed"
+ else
+ validation_fail "Missing Hyprland tools:$missing"
+ attribute_issue "Missing Hyprland tools:$missing" "archsetup"
+ fi
+}
+
+validate_hyprland_config() {
+ step "Checking Hyprland configuration files"
+ local missing=""
+
+ for config in ".config/hypr/hyprland.conf" ".config/hypr/hypridle.conf" \
+ ".config/hypr/hyprlock.conf" ".config/waybar/config" \
+ ".config/wofi/config" ".config/gammastep/config.ini"; do
+ if ! ssh_cmd "test -f /home/cjennings/$config"; then
+ missing="$missing $config"
+ fi
+ done
+
+ if [ -z "$missing" ]; then
+ validation_pass "All Hyprland config files present"
+ else
+ validation_fail "Missing Hyprland configs:$missing"
+ attribute_issue "Missing Hyprland configs:$missing" "archsetup"
+ fi
+}
+
+validate_hyprland_socket() {
+ step "Checking Hyprland IPC socket"
+ # Note: This only works if Hyprland is running. Skip if no display.
+ if ssh_cmd "test -S /tmp/hypr/*/.socket.sock 2>/dev/null"; then
+ validation_pass "Hyprland socket exists"
+ else
+ validation_warn "Hyprland socket not found (Hyprland may not be running)"
+ fi
+}
+
+validate_window_manager() {
+ # Detect which desktop environment is installed and validate accordingly
+ if ssh_cmd "pacman -Q hyprland &>/dev/null"; then
+ section "Hyprland Desktop Environment"
+ validate_hyprland_tools
+ validate_hyprland_config
+ validate_hyprland_socket
+ elif ssh_cmd "test -f /usr/local/bin/dwm"; then
+ section "DWM Desktop Environment"
+ validate_suckless_tools
+ else
+ validation_warn "No window manager detected (DESKTOP_ENV=none?)"
+ fi
+}
+
+#-----------------------------------------------------------------------------
+# Essential Services Validations
+#-----------------------------------------------------------------------------
+
+validate_firewall() {
+ step "Checking if firewall (ufw) is enabled"
+ local status=$(ssh_cmd "systemctl is-enabled ufw.service 2>/dev/null || echo disabled")
+ if [ "$status" = "enabled" ]; then
+ validation_pass "UFW firewall is enabled"
+ else
+ validation_fail "UFW firewall not enabled"
+ attribute_issue "UFW not enabled" "archsetup"
+ fi
+}
+
+validate_dns_config() {
+ step "Checking DNS-over-TLS configuration"
+ if ssh_cmd "grep -q 'DNS=.*#' /etc/systemd/resolved.conf 2>/dev/null"; then
+ validation_pass "DNS-over-TLS configured"
+ else
+ validation_warn "DNS-over-TLS may not be configured"
+ fi
+}
+
+validate_avahi() {
+ step "Checking avahi-daemon status"
+ local status=$(ssh_cmd "systemctl is-enabled avahi-daemon.service 2>/dev/null || echo disabled")
+ if [ "$status" = "enabled" ]; then
+ validation_pass "avahi-daemon is enabled"
+
+ # Full-stack mDNS test: ping hostname.local
+ local hostname=$(ssh_cmd "hostname")
+ if ssh_cmd "ping -c 1 -W 2 ${hostname}.local" &>> "$LOGFILE"; then
+ validation_pass "mDNS working (${hostname}.local responds to ping)"
+ else
+ validation_warn "mDNS ping failed (avahi may need time to propagate)"
+ fi
+ else
+ # This might be OK if avahi was pre-installed
+ validation_warn "avahi-daemon not enabled (may have been pre-configured)"
+ fi
+}
+
+validate_fail2ban() {
+ step "Checking fail2ban status"
+ local status=$(ssh_cmd "systemctl is-enabled fail2ban.service 2>/dev/null || echo disabled")
+ if [ "$status" = "enabled" ]; then
+ validation_pass "fail2ban is enabled"
+ else
+ validation_fail "fail2ban not enabled"
+ attribute_issue "fail2ban not enabled" "archsetup"
+ fi
+}
+
+validate_networkmanager() {
+ step "Checking NetworkManager status"
+ local status=$(ssh_cmd "systemctl is-enabled NetworkManager.service 2>/dev/null || echo disabled")
+ if [ "$status" = "enabled" ]; then
+ validation_pass "NetworkManager is enabled"
+ # Functional test
+ if ssh_cmd "nmcli general status" &>> "$LOGFILE"; then
+ validation_pass "NetworkManager is functional"
+ else
+ validation_warn "NetworkManager enabled but not responding"
+ fi
+ else
+ validation_fail "NetworkManager not enabled"
+ attribute_issue "NetworkManager not enabled" "archsetup"
+ fi
+}
+
+#-----------------------------------------------------------------------------
+# Service-Specific Validations
+#-----------------------------------------------------------------------------
+
+validate_all_services() {
+ section "Service Validations"
+
+ # Core services (always expected)
+ validate_service "sshd" "enabled" "active"
+ validate_service "systemd-resolved" "enabled" "active"
+ validate_service "ufw" "enabled" "" # VM lacks iptables modules, can't be active
+ validate_service "fail2ban" "enabled" "active"
+ validate_service "NetworkManager" "enabled" "active"
+ validate_service "rngd" "enabled" "active"
+ validate_service "cronie" "enabled" ""
+ validate_service "atd" "enabled" ""
+
+ # Timer services
+ validate_service "reflector.timer" "enabled" ""
+ validate_service "paccache.timer" "enabled" ""
+
+ # Optional services (warn if missing, don't fail)
+ validate_service_optional "avahi-daemon" "enabled"
+ validate_service_optional "bluetooth" "enabled"
+ validate_service_optional "cups" "enabled"
+ validate_service_optional "docker" "enabled"
+ validate_service_optional "tailscaled" "enabled"
+ # Syncthing uses user service (not system), check lingering is enabled
+ step "Checking user lingering for syncthing"
+ local linger_enabled=$(ssh_cmd "ls /var/lib/systemd/linger/cjennings 2>/dev/null && echo yes || echo no")
+ if [ "$linger_enabled" = "yes" ]; then
+ validation_pass "User lingering enabled for syncthing user service"
+ else
+ validation_warn "User lingering not enabled (syncthing may not autostart)"
+ fi
+
+ # Filesystem-specific
+ validate_zfs_services
+ validate_btrfs_services
+
+ # Functional tests
+ validate_service_functions
+}
+
+validate_service() {
+ local service="$1"
+ local expected_enabled="$2" # "enabled" or ""
+ local expected_active="$3" # "active" or ""
+
+ step "Checking $service"
+
+ if [ -n "$expected_enabled" ]; then
+ local enabled=$(ssh_cmd "systemctl is-enabled $service 2>/dev/null || echo disabled")
+ if [ "$enabled" = "enabled" ]; then
+ validation_pass "$service is enabled"
+ else
+ validation_fail "$service not enabled (got: $enabled)"
+ attribute_issue "$service not enabled" "archsetup"
+ return 1
+ fi
+ fi
+
+ if [ -n "$expected_active" ]; then
+ local active=$(ssh_cmd "systemctl is-active $service 2>/dev/null || echo inactive")
+ if [ "$active" = "active" ]; then
+ validation_pass "$service is active"
+ else
+ validation_fail "$service not active (got: $active)"
+ attribute_issue "$service not active" "archsetup"
+ return 1
+ fi
+ fi
+
+ return 0
+}
+
+validate_service_optional() {
+ local service="$1"
+ local expected_enabled="$2"
+
+ step "Checking optional service: $service"
+
+ local enabled=$(ssh_cmd "systemctl is-enabled $service 2>/dev/null || echo disabled")
+ if [ "$enabled" = "enabled" ]; then
+ validation_pass "$service is enabled"
+ else
+ validation_warn "$service not enabled (optional)"
+ fi
+}
+
+validate_zfs_services() {
+ # Only check if ZFS is installed
+ if ! ssh_cmd "which zfs" &>> "$LOGFILE"; then
+ return 0
+ fi
+
+ step "Checking ZFS-specific services"
+
+ validate_service_optional "sanoid.timer" "enabled"
+
+ # Check for zfs-scrub timer (pool name varies)
+ local scrub_enabled
+ scrub_enabled=$(ssh_cmd "systemctl list-unit-files 'zfs-scrub*' 2>/dev/null | grep -c enabled" | tr -d '[:space:]')
+ scrub_enabled=${scrub_enabled:-0}
+ if [ "$scrub_enabled" -gt 0 ]; then
+ validation_pass "ZFS scrub timer enabled"
+ else
+ validation_warn "ZFS scrub timer not found"
+ fi
+}
+
+validate_btrfs_services() {
+ # Only check if btrfs root
+ if ! ssh_cmd "mount | grep 'on / ' | grep -q btrfs"; then
+ return 0
+ fi
+
+ step "Checking btrfs-specific services"
+ validate_service_optional "grub-btrfsd" "enabled"
+}
+
+validate_service_functions() {
+ section "Service Functional Tests"
+
+ # UFW functional test
+ # NOTE: VM environment lacks iptables kernel modules, so UFW cannot activate.
+ # We only verify it's enabled; active status requires real hardware.
+ step "Testing UFW functionality"
+ local ufw_enabled
+ ufw_enabled=$(ssh_cmd "systemctl is-enabled ufw.service 2>/dev/null || echo disabled")
+ if [ "$ufw_enabled" = "enabled" ]; then
+ validation_pass "UFW is enabled (activation requires iptables kernel modules)"
+ else
+ validation_fail "UFW not enabled"
+ attribute_issue "UFW not enabled" "archsetup"
+ fi
+
+ # fail2ban functional test
+ step "Testing fail2ban functionality"
+ if ssh_cmd "fail2ban-client status" &>> "$LOGFILE"; then
+ validation_pass "fail2ban is responding"
+ else
+ validation_fail "fail2ban not responding"
+ attribute_issue "fail2ban not functioning" "archsetup"
+ fi
+
+ # DNS resolution test
+ step "Testing DNS resolution"
+ if ssh_cmd "resolvectl query archlinux.org" &>> "$LOGFILE"; then
+ validation_pass "DNS resolution working"
+ else
+ validation_warn "DNS resolution test failed (may be network issue)"
+ fi
+
+ # Docker functional test (if enabled)
+ if ssh_cmd "systemctl is-enabled docker" &>> "$LOGFILE"; then
+ step "Testing Docker functionality"
+ if ssh_cmd "docker info" &>> "$LOGFILE"; then
+ validation_pass "Docker is responding"
+ else
+ validation_warn "Docker enabled but not responding"
+ fi
+ fi
+}
+
+#-----------------------------------------------------------------------------
+# Developer Tools Validations
+#-----------------------------------------------------------------------------
+
+validate_emacs() {
+ step "Checking if Emacs is installed"
+ if ssh_cmd "which emacs" &>> "$LOGFILE"; then
+ validation_pass "Emacs is installed"
+
+ # Check if config exists
+ if ssh_cmd "test -d /home/cjennings/.emacs.d"; then
+ validation_pass "Emacs config directory exists"
+
+ # Check user can access the directory
+ local result
+ result=$(ssh_cmd "sudo -u cjennings ls /home/cjennings/.emacs.d > /dev/null 2>&1 && echo OK || echo FAIL")
+ if [ "$result" = "OK" ]; then
+ validation_pass "Emacs config readable by user"
+ else
+ validation_fail "Emacs config not readable by user (permission issue)"
+ attribute_issue "Emacs .emacs.d not readable by user" "archsetup"
+ fi
+ else
+ validation_warn "Emacs config directory not found"
+ fi
+ else
+ validation_fail "Emacs not found"
+ attribute_issue "Emacs not installed" "archsetup"
+ fi
+}
+
+validate_git_config() {
+ step "Checking git installation"
+ if ssh_cmd "which git" &>> "$LOGFILE"; then
+ validation_pass "git is installed"
+ else
+ validation_fail "git not found"
+ attribute_issue "git not installed" "archsetup"
+ fi
+}
+
+validate_dev_tools() {
+ step "Checking developer tools"
+ local tools="python node npm go rustc"
+ local missing=""
+
+ for tool in $tools; do
+ if ! ssh_cmd "which $tool" &>> "$LOGFILE"; then
+ missing="$missing $tool"
+ fi
+ done
+
+ if [ -z "$missing" ]; then
+ validation_pass "Core dev tools installed"
+ else
+ validation_warn "Some dev tools missing:$missing"
+ fi
+}
+
+#-----------------------------------------------------------------------------
+# System Configuration Validations
+#-----------------------------------------------------------------------------
+
+validate_zfs_config() {
+ step "Checking ZFS configuration (if applicable)"
+ if ssh_cmd "which zfs" &>> "$LOGFILE"; then
+ # ZFS is installed, check for sanoid
+ if ssh_cmd "which sanoid" &>> "$LOGFILE"; then
+ validation_pass "ZFS with sanoid detected"
+ else
+ validation_warn "ZFS detected but sanoid not installed"
+ fi
+ else
+ info "ZFS not installed (non-ZFS system)"
+ fi
+}
+
+validate_boot_config() {
+ step "Checking GRUB configuration"
+ if ssh_cmd "test -f /boot/grub/grub.cfg" &>> "$LOGFILE"; then
+ validation_pass "GRUB config exists"
+ else
+ validation_warn "GRUB config not found (may use different bootloader)"
+ fi
+}
+
+validate_terminus_font() {
+ step "Checking terminus-font installation"
+ if ssh_cmd "pacman -Q terminus-font" &>> "$LOGFILE"; then
+ validation_pass "terminus-font package installed"
+ else
+ validation_fail "terminus-font package not installed"
+ attribute_issue "terminus-font not installed via pacman" "archsetup"
+ fi
+}
+
+validate_mkinitcpio_hooks() {
+ step "Checking mkinitcpio HOOKS configuration"
+ local hooks=$(ssh_cmd "grep '^HOOKS=' /etc/mkinitcpio.conf")
+ local is_zfs=$(ssh_cmd "findmnt -n -o FSTYPE / 2>/dev/null")
+
+ if [ "$is_zfs" = "zfs" ]; then
+ # ZFS system: must use udev, not systemd
+ if echo "$hooks" | grep -q '\budev\b'; then
+ validation_pass "ZFS system uses udev hook (correct)"
+ elif echo "$hooks" | grep -q '\bsystemd\b'; then
+ validation_fail "ZFS system uses systemd hook (will break boot)"
+ attribute_issue "mkinitcpio uses systemd hook on ZFS system" "archsetup"
+ else
+ validation_warn "Could not determine init hook type"
+ fi
+ else
+ # Non-ZFS: systemd hook is fine
+ if echo "$hooks" | grep -q '\bsystemd\b'; then
+ validation_pass "Non-ZFS system uses systemd hook"
+ elif echo "$hooks" | grep -q '\budev\b'; then
+ validation_pass "Non-ZFS system uses udev hook"
+ fi
+ fi
+}
+
+validate_initramfs_consolefont() {
+ step "Checking console font in initramfs"
+ local font_in_initramfs=$(ssh_cmd "lsinitcpio /boot/initramfs-linux*.img 2>/dev/null | grep -c 'consolefont.psf\\|ter-'")
+
+ if [ "${font_in_initramfs:-0}" -gt 0 ]; then
+ validation_pass "Console font included in initramfs"
+ else
+ validation_warn "Console font may not be in initramfs"
+ fi
+}
+
+validate_nvme_module() {
+ step "Checking NVMe module configuration"
+ local has_nvme=$(ssh_cmd "ls /dev/nvme* 2>/dev/null | head -1")
+
+ if [ -n "$has_nvme" ]; then
+ # System has NVMe drives
+ local modules=$(ssh_cmd "grep '^MODULES=' /etc/mkinitcpio.conf")
+ if echo "$modules" | grep -q 'nvme'; then
+ validation_pass "NVMe module in mkinitcpio MODULES"
+ else
+ validation_warn "NVMe system but nvme not in MODULES (may cause slow boot)"
+ fi
+ else
+ info "No NVMe drives detected"
+ fi
+}
+
+validate_autologin_config() {
+ step "Checking autologin configuration"
+ if ssh_cmd "test -f /etc/systemd/system/getty@tty1.service.d/autologin.conf" &>> "$LOGFILE"; then
+ validation_pass "Autologin configured"
+ else
+ info "Autologin not configured (may be intentional)"
+ fi
+}
+
+validate_gnome_keyring_setup() {
+ step "Checking gnome-keyring pre-configuration"
+ local keyring_dir="/home/cjennings/.local/share/keyrings"
+
+ # Check directory exists
+ if ! ssh_cmd "test -d $keyring_dir"; then
+ validation_fail "Keyring directory not created"
+ attribute_issue "gnome-keyring directory not pre-created" "archsetup"
+ return 1
+ fi
+
+ # Check directory permissions (should be 700)
+ local perms=$(ssh_cmd "stat -c '%a' $keyring_dir")
+ if [ "$perms" != "700" ]; then
+ validation_fail "Keyring directory has wrong permissions: $perms (expected 700)"
+ attribute_issue "gnome-keyring directory wrong permissions" "archsetup"
+ return 1
+ fi
+
+ # Check ownership
+ local owner=$(ssh_cmd "stat -c '%U' $keyring_dir")
+ if [ "$owner" != "cjennings" ]; then
+ validation_fail "Keyring directory owned by $owner (expected cjennings)"
+ attribute_issue "gnome-keyring directory wrong ownership" "archsetup"
+ return 1
+ fi
+
+ # Check default file exists and contains "login"
+ local default_keyring=$(ssh_cmd "cat $keyring_dir/default 2>/dev/null")
+ if [ "$default_keyring" != "login" ]; then
+ validation_fail "Default keyring not set to 'login' (got: '$default_keyring')"
+ attribute_issue "gnome-keyring default not set to login" "archsetup"
+ return 1
+ fi
+
+ validation_pass "gnome-keyring pre-configured (default=login, dir=700)"
+}
+
+#-----------------------------------------------------------------------------
+# Archsetup-Specific Validations
+#-----------------------------------------------------------------------------
+
+validate_archsetup_log() {
+ step "Checking archsetup log for errors"
+ local error_count
+ # Use grep -h to suppress filenames, then wc -l to count total matches
+ error_count=$(ssh_cmd "grep -h '^Error:' /var/log/archsetup-*.log 2>/dev/null | wc -l" | tr -d '[:space:]')
+ error_count=${error_count:-0}
+
+ if [ "$error_count" = "0" ]; then
+ validation_pass "No errors in archsetup log"
+ else
+ validation_fail "Found $error_count errors in archsetup log"
+ attribute_issue "Errors in archsetup log: $error_count" "archsetup"
+ fi
+}
+
+validate_state_markers() {
+ step "Checking archsetup state markers"
+ local state_count=$(ssh_cmd "ls /var/lib/archsetup/state/ 2>/dev/null | wc -l")
+
+ if [ "$state_count" -ge 12 ]; then
+ validation_pass "All 12 installation steps completed"
+ else
+ validation_warn "Only $state_count/12 steps completed"
+ fi
+}
+
+#=============================================================================
+# ISSUE REPORTING
+#=============================================================================
+
+generate_issue_report() {
+ local output_dir="$1"
+ local archzfs_inbox="$2"
+
+ section "Issue Attribution Report"
+
+ local report_file="$output_dir/issue-report.txt"
+
+ cat > "$report_file" << EOF
+========================================
+Issue Attribution Report
+Generated: $(date +'%Y-%m-%d %H:%M:%S')
+========================================
+
+VALIDATION SUMMARY
+------------------
+Passed: $VALIDATION_PASSED
+Failed: $VALIDATION_FAILED
+Warnings: $VALIDATION_WARNINGS
+
+EOF
+
+ if [ ${#ARCHSETUP_ISSUES[@]} -gt 0 ]; then
+ echo "ARCHSETUP ISSUES (${#ARCHSETUP_ISSUES[@]})" >> "$report_file"
+ echo "-------------------------------------------" >> "$report_file"
+ for issue in "${ARCHSETUP_ISSUES[@]}"; do
+ echo " - $issue" >> "$report_file"
+ done
+ echo "" >> "$report_file"
+
+ error "Found ${#ARCHSETUP_ISSUES[@]} archsetup issues"
+ fi
+
+ if [ ${#BASE_INSTALL_ISSUES[@]} -gt 0 ]; then
+ echo "BASE INSTALL ISSUES (${#BASE_INSTALL_ISSUES[@]})" >> "$report_file"
+ echo "-------------------------------------------" >> "$report_file"
+ for issue in "${BASE_INSTALL_ISSUES[@]}"; do
+ echo " - $issue" >> "$report_file"
+ done
+ echo "" >> "$report_file"
+
+ warn "Found ${#BASE_INSTALL_ISSUES[@]} base install issues"
+
+ # If archzfs inbox provided, create issue files
+ if [ -n "$archzfs_inbox" ] && [ -d "$archzfs_inbox" ]; then
+ local issue_file="$archzfs_inbox/$(date +'%Y-%m-%d')-test-issues.txt"
+ echo "Base install issues from archsetup test run:" > "$issue_file"
+ echo "Date: $(date +'%Y-%m-%d %H:%M:%S')" >> "$issue_file"
+ echo "" >> "$issue_file"
+ for issue in "${BASE_INSTALL_ISSUES[@]}"; do
+ echo "- $issue" >> "$issue_file"
+ done
+ info "Created archzfs issue file: $issue_file"
+ fi
+ fi
+
+ if [ ${#UNKNOWN_ISSUES[@]} -gt 0 ]; then
+ echo "UNKNOWN/UNATTRIBUTED ISSUES (${#UNKNOWN_ISSUES[@]})" >> "$report_file"
+ echo "-------------------------------------------" >> "$report_file"
+ for issue in "${UNKNOWN_ISSUES[@]}"; do
+ echo " - $issue" >> "$report_file"
+ done
+ echo "" >> "$report_file"
+
+ warn "Found ${#UNKNOWN_ISSUES[@]} unattributed issues"
+ fi
+
+ if [ ${#ARCHSETUP_ISSUES[@]} -eq 0 ] && [ ${#BASE_INSTALL_ISSUES[@]} -eq 0 ] && [ ${#UNKNOWN_ISSUES[@]} -eq 0 ]; then
+ echo "No issues found!" >> "$report_file"
+ success "No issues found!"
+ fi
+
+ info "Issue report saved: $report_file"
+}
+
+#=============================================================================
+# MAIN VALIDATION ENTRY POINT
+#=============================================================================
+
+run_full_validation() {
+ local output_dir="$1"
+ local archzfs_inbox="${2:-}"
+
+ run_all_validations
+ analyze_log_diff "$output_dir"
+ generate_issue_report "$output_dir" "$archzfs_inbox"
+
+ # Return success if no failures
+ [ $VALIDATION_FAILED -eq 0 ]
+}
diff --git a/scripts/testing/lib/vm-utils.sh b/scripts/testing/lib/vm-utils.sh
new file mode 100755
index 0000000..81aec33
--- /dev/null
+++ b/scripts/testing/lib/vm-utils.sh
@@ -0,0 +1,321 @@
+#!/bin/bash
+# VM management utilities for archsetup testing
+# Author: Craig Jennings <craigmartinjennings@gmail.com>
+# License: GNU GPLv3
+
+# Note: logging.sh should already be sourced by the calling script
+
+# VM configuration defaults
+VM_CPUS="${VM_CPUS:-4}"
+VM_RAM="${VM_RAM:-8192}" # MB
+VM_DISK="${VM_DISK:-50}" # GB
+VM_NETWORK="${VM_NETWORK:-default}"
+LIBVIRT_URI="qemu:///system" # Use system session, not user session
+
+# Check if libvirt is running
+check_libvirt() {
+ if ! systemctl is-active --quiet libvirtd; then
+ error "libvirtd service is not running"
+ info "Start it with: sudo systemctl start libvirtd"
+ return 1
+ fi
+ return 0
+}
+
+# Check if user is in libvirt group
+check_libvirt_group() {
+ if ! groups | grep -q libvirt; then
+ warn "Current user is not in libvirt group"
+ info "Add yourself with: sudo usermod -a -G libvirt $USER"
+ info "Then log out and back in for changes to take effect"
+ return 1
+ fi
+ return 0
+}
+
+# Check if KVM is available
+check_kvm() {
+ if [ ! -e /dev/kvm ]; then
+ error "KVM is not available"
+ info "Check if virtualization is enabled in BIOS"
+ info "Load kvm module: sudo modprobe kvm-intel (or kvm-amd)"
+ return 1
+ fi
+ return 0
+}
+
+# Wait for VM to boot (check for SSH or serial console)
+wait_for_vm() {
+ local vm_name="$1"
+ local timeout="${2:-300}" # 5 minutes default
+ local elapsed=0
+
+ progress "Waiting for VM $vm_name to boot..."
+
+ while [ $elapsed -lt $timeout ]; do
+ if virsh --connect "$LIBVIRT_URI" domstate "$vm_name" 2>/dev/null | grep -q "running"; then
+ sleep 5
+ complete "VM $vm_name is running"
+ return 0
+ fi
+ sleep 2
+ elapsed=$((elapsed + 2))
+ done
+
+ error "Timeout waiting for VM $vm_name to boot"
+ return 1
+}
+
+# Check if VM exists
+vm_exists() {
+ local vm_name="$1"
+ virsh --connect "$LIBVIRT_URI" dominfo "$vm_name" &>/dev/null
+ return $?
+}
+
+# Check if VM is running
+vm_is_running() {
+ local vm_name="$1"
+ [ "$(virsh --connect "$LIBVIRT_URI" domstate "$vm_name" 2>/dev/null)" = "running" ]
+ return $?
+}
+
+# Start VM
+start_vm() {
+ local vm_name="$1"
+
+ if vm_is_running "$vm_name"; then
+ warn "VM $vm_name is already running"
+ return 0
+ fi
+
+ step "Starting VM: $vm_name"
+ if virsh --connect "$LIBVIRT_URI" start "$vm_name" >> "$LOGFILE" 2>&1; then
+ success "VM $vm_name started"
+ return 0
+ else
+ error "Failed to start VM $vm_name"
+ return 1
+ fi
+}
+
+# Stop VM gracefully
+stop_vm() {
+ local vm_name="$1"
+ local timeout="${2:-60}"
+
+ if ! vm_is_running "$vm_name"; then
+ info "VM $vm_name is not running"
+ return 0
+ fi
+
+ step "Shutting down VM: $vm_name"
+ if virsh --connect "$LIBVIRT_URI" shutdown "$vm_name" >> "$LOGFILE" 2>&1; then
+ # Wait for graceful shutdown
+ local elapsed=0
+ while [ $elapsed -lt $timeout ]; do
+ if ! vm_is_running "$vm_name"; then
+ success "VM $vm_name stopped gracefully"
+ return 0
+ fi
+ sleep 2
+ elapsed=$((elapsed + 2))
+ done
+
+ warn "VM $vm_name did not stop gracefully, forcing..."
+ virsh --connect "$LIBVIRT_URI" destroy "$vm_name" >> "$LOGFILE" 2>&1
+ fi
+
+ success "VM $vm_name stopped"
+ return 0
+}
+
+# Destroy VM (force stop)
+destroy_vm() {
+ local vm_name="$1"
+
+ if ! vm_exists "$vm_name"; then
+ info "VM $vm_name does not exist"
+ return 0
+ fi
+
+ step "Destroying VM: $vm_name"
+ if vm_is_running "$vm_name"; then
+ virsh --connect "$LIBVIRT_URI" destroy "$vm_name" >> "$LOGFILE" 2>&1
+ fi
+
+ virsh --connect "$LIBVIRT_URI" undefine "$vm_name" --nvram >> "$LOGFILE" 2>&1
+ success "VM $vm_name destroyed"
+ return 0
+}
+
+# Create snapshot
+create_snapshot() {
+ local vm_name="$1"
+ local snapshot_name="$2"
+
+ step "Creating snapshot: $snapshot_name"
+ if virsh --connect "$LIBVIRT_URI" snapshot-create-as "$vm_name" "$snapshot_name" >> "$LOGFILE" 2>&1; then
+ success "Snapshot $snapshot_name created"
+ return 0
+ else
+ error "Failed to create snapshot $snapshot_name"
+ return 1
+ fi
+}
+
+# Restore snapshot
+restore_snapshot() {
+ local vm_name="$1"
+ local snapshot_name="$2"
+
+ step "Restoring snapshot: $snapshot_name"
+ if virsh --connect "$LIBVIRT_URI" snapshot-revert "$vm_name" "$snapshot_name" >> "$LOGFILE" 2>&1; then
+ success "Snapshot $snapshot_name restored"
+ return 0
+ else
+ error "Failed to restore snapshot $snapshot_name"
+ return 1
+ fi
+}
+
+# Delete snapshot
+delete_snapshot() {
+ local vm_name="$1"
+ local snapshot_name="$2"
+
+ step "Deleting snapshot: $snapshot_name"
+ if virsh --connect "$LIBVIRT_URI" snapshot-delete "$vm_name" "$snapshot_name" >> "$LOGFILE" 2>&1; then
+ success "Snapshot $snapshot_name deleted"
+ return 0
+ else
+ error "Failed to delete snapshot $snapshot_name"
+ return 1
+ fi
+}
+
+# Clone disk image (copy-on-write)
+clone_disk() {
+ local base_image="$1"
+ local new_image="$2"
+
+ if [ ! -f "$base_image" ]; then
+ error "Base image not found: $base_image"
+ return 1
+ fi
+
+ step "Cloning disk image (full copy)"
+ if qemu-img convert -f qcow2 -O qcow2 "$base_image" "$new_image" >> "$LOGFILE" 2>&1; then
+ success "Disk cloned: $new_image"
+ else
+ error "Failed to clone disk"
+ return 1
+ fi
+
+ # Truncate machine-id so systemd generates a new one on boot (avoids DHCP conflicts)
+ step "Clearing machine-id for unique network identity"
+ if guestfish -a "$new_image" -i truncate /etc/machine-id >> "$LOGFILE" 2>&1; then
+ success "Machine-ID cleared (will regenerate on boot)"
+ return 0
+ else
+ warn "Failed to clear machine-ID (guestfish failed)"
+ info "Network may conflict with base VM if both run simultaneously"
+ return 0 # Don't fail the whole operation
+ fi
+}
+
+# Get VM IP address (requires guest agent or DHCP lease)
+get_vm_ip() {
+ local vm_name="$1"
+
+ # Try guest agent first
+ local ip
+ ip=$(virsh --connect "$LIBVIRT_URI" domifaddr "$vm_name" 2>/dev/null | grep -oP '(\d+\.){3}\d+' | head -1)
+
+ if [ -n "$ip" ]; then
+ echo "$ip"
+ return 0
+ fi
+
+ # Fall back to DHCP leases
+ local mac
+ mac=$(virsh --connect "$LIBVIRT_URI" domiflist "$vm_name" | grep -oP '([0-9a-f]{2}:){5}[0-9a-f]{2}' | head -1)
+
+ if [ -n "$mac" ]; then
+ ip=$(grep "$mac" /var/lib/libvirt/dnsmasq/default.leases 2>/dev/null | awk '{print $3}')
+ if [ -n "$ip" ]; then
+ echo "$ip"
+ return 0
+ fi
+ fi
+
+ return 1
+}
+
+# Execute command in VM via SSH
+vm_exec() {
+ local vm_name="$1"
+ shift
+ local cmd="$*"
+
+ local ip
+ ip=$(get_vm_ip "$vm_name")
+
+ if [ -z "$ip" ]; then
+ error "Could not get IP address for VM $vm_name"
+ return 1
+ fi
+
+ ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+ "root@$ip" "$cmd" 2>> "$LOGFILE"
+}
+
+# Copy file to VM
+copy_to_vm() {
+ local vm_name="$1"
+ local local_file="$2"
+ local remote_path="$3"
+
+ local ip
+ ip=$(get_vm_ip "$vm_name")
+
+ if [ -z "$ip" ]; then
+ error "Could not get IP address for VM $vm_name"
+ return 1
+ fi
+
+ step "Copying $local_file to VM"
+ if scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+ "$local_file" "root@$ip:$remote_path" >> "$LOGFILE" 2>&1; then
+ success "File copied to VM"
+ return 0
+ else
+ error "Failed to copy file to VM"
+ return 1
+ fi
+}
+
+# Copy file from VM
+copy_from_vm() {
+ local vm_name="$1"
+ local remote_file="$2"
+ local local_path="$3"
+
+ local ip
+ ip=$(get_vm_ip "$vm_name")
+
+ if [ -z "$ip" ]; then
+ error "Could not get IP address for VM $vm_name"
+ return 1
+ fi
+
+ step "Copying $remote_file from VM"
+ if scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
+ "root@$ip:$remote_file" "$local_path" >> "$LOGFILE" 2>&1; then
+ success "File copied from VM"
+ return 0
+ else
+ error "Failed to copy file from VM"
+ return 1
+ fi
+}