aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-05-22 18:03:40 -0500
committerCraig Jennings <c@cjennings.net>2026-05-22 18:03:40 -0500
commitb6525a50fabf3aedf41eee70c164519b00d27704 (patch)
tree0b9900eb584509051c83ebed0ed1427ee9bee9e7
parent4ef30e5c84ab22ba1724608009093d6725a1ceda (diff)
downloadarchangel-b6525a50fabf3aedf41eee70c164519b00d27704.tar.gz
archangel-b6525a50fabf3aedf41eee70c164519b00d27704.zip
feat(install): add pre-flight environment and disk-target validation
archangel went straight from filesystem selection into a destructive install behind only a root check and a ZFS module load. A missing tool, a BIOS boot, a too-small or in-use disk, or a dead network surfaced as a confusing abort partway through, sometimes after partitioning had already run. Two gates now fail fast. validate_environment runs after filesystem selection, before any disk is touched: it confirms UEFI boot mode and that every required command is present, with the list coming from a new required_commands helper built like pacstrap_packages. validate_install_targets runs after disk selection, before the first wipe: it refuses a target that's mounted, holds active swap, or belongs to an imported pool or md array, rejects disks under 20 GB, and confirms a mirror is reachable via DNS plus a TCP probe (no ICMP, since some networks drop it). I folded the install_failure_cleanup hardening into the same change. It now falls back to lazy unmounts, so a pacstrap-interrupted target with busy bind mounts still releases the pool and unmounts the EFI partition. Without that, the disk-in-use guard would block the very retry the cleanup exists to enable. "Re-run to retry" only holds if the disk is genuinely freed first. The 20 GB floor is decimal on purpose. It reads as the natural minimum and clears a 20 GiB disk image with headroom instead of sitting on the boundary.
-rwxr-xr-xinstaller/archangel68
-rw-r--r--installer/lib/common.sh21
-rw-r--r--installer/lib/disk.sh59
-rw-r--r--tests/unit/test_archangel.bats133
-rw-r--r--tests/unit/test_common.bats48
-rw-r--r--tests/unit/test_disk.bats43
6 files changed, 369 insertions, 3 deletions
diff --git a/installer/archangel b/installer/archangel
index 537c1da..ea18202 100755
--- a/installer/archangel
+++ b/installer/archangel
@@ -93,6 +93,61 @@ filesystem_preflight() {
fi
}
+# Fail unless booted in UEFI mode. archangel is UEFI-only (EFI partitions,
+# efibootmgr, ZFSBootMenu / GRUB-EFI); a legacy BIOS boot would die late.
+is_uefi_boot() {
+ [[ -d /sys/firmware/efi ]]
+}
+
+# Succeed when an Arch mirror is reachable: DNS resolves and a TCP connection
+# to port 443 opens. Uses bash's /dev/tcp behind a timeout (no curl/nc
+# dependency) and no ICMP, since some networks drop it. Probes archlinux.org
+# as a stable stand-in for "internet + DNS work" before pacstrap runs.
+network_available() {
+ local host="archlinux.org"
+ getent hosts "$host" >/dev/null 2>&1 || return 1
+ timeout 10 bash -c "exec 3<>/dev/tcp/${host}/443" 2>/dev/null
+}
+
+# Environment pre-flight: checks that don't depend on disk selection. Runs
+# early in main() once FILESYSTEM is known, so a doomed install aborts before
+# any disk is touched. Hard-fails via error().
+validate_environment() {
+ step "Validating install environment"
+
+ is_uefi_boot || error "Not booted in UEFI mode (/sys/firmware/efi missing). archangel requires UEFI."
+
+ local cmd
+ while read -r cmd; do
+ require_command "$cmd"
+ done < <(required_commands "$FILESYSTEM")
+
+ info "Environment OK: UEFI mode and required commands present."
+}
+
+# Target pre-flight: checks that need SELECTED_DISKS finalized and the live
+# network up. Runs after gather_input, before the first destructive op.
+# Refuses any disk that's in active use, too small, or whose size can't be
+# read, and aborts if no mirror is reachable. Hard-fails via error().
+validate_install_targets() {
+ step "Validating target disks and connectivity"
+
+ local disk size
+ for disk in "${SELECTED_DISKS[@]}"; do
+ if disk_in_use "$disk"; then
+ error "Disk in use (mounted, swap, or active pool/array): $disk. Refusing to wipe it."
+ fi
+ size=$(disk_size_bytes "$disk")
+ disk_meets_min_size "$size" \
+ || error "Disk too small or unreadable: $disk (${size:-unknown} bytes; need >= ${MIN_DISK_BYTES} = 20 GB)."
+ done
+
+ network_available \
+ || error "No network connectivity (DNS + TCP 443 to archlinux.org failed). pacstrap needs a reachable mirror."
+
+ info "Targets OK: disks idle and large enough; mirror reachable."
+}
+
#############################
# Phase 1: Gather All Input
#############################
@@ -1250,8 +1305,13 @@ install_failure_cleanup() {
case "$FILESYSTEM" in
zfs)
- umount $EFI_DIR 2>/dev/null || true
- umount -R $MNTPOINT 2>/dev/null || true
+ # Lazy fallback (-l): a pacstrap-interrupted target can leave
+ # busy bind mounts (/proc, /sys, /dev) that a plain umount
+ # can't release. Detaching them lazily lets the pool export
+ # below succeed, so a retry / re-run sees a genuinely clean
+ # disk — the state validate_install_targets expects.
+ umount "$EFI_DIR" 2>/dev/null || umount -l "$EFI_DIR" 2>/dev/null || true
+ umount -R "$MNTPOINT" 2>/dev/null || umount -R -l "$MNTPOINT" 2>/dev/null || true
if zpool list "$POOL_NAME" >/dev/null 2>&1; then
zpool export "$POOL_NAME" 2>/dev/null \
|| zpool export -f "$POOL_NAME" 2>/dev/null \
@@ -1259,7 +1319,7 @@ install_failure_cleanup() {
fi
;;
btrfs)
- umount $EFI_DIR 2>/dev/null || true
+ umount "$EFI_DIR" 2>/dev/null || umount -l "$EFI_DIR" 2>/dev/null || true
btrfs_cleanup 2>/dev/null || true
btrfs_close_encryption 2>/dev/null || true
;;
@@ -1352,8 +1412,10 @@ main() {
preflight_checks
check_config
validate_filesystem
+ validate_environment
[[ "$UNATTENDED" == true ]] && validate_config
gather_input
+ validate_install_targets
filesystem_preflight
# Unattended installation begins
diff --git a/installer/lib/common.sh b/installer/lib/common.sh
index 2cd4798..7998eeb 100644
--- a/installer/lib/common.sh
+++ b/installer/lib/common.sh
@@ -102,6 +102,27 @@ pacstrap_packages() {
printf '%s\n' "${common[@]}" "${fs_specific[@]}"
}
+# Print the external commands the installer needs for the given filesystem,
+# one per line: common partitioning/bootstrap tools first, then
+# filesystem-specific ones. validate_environment loops over these and
+# require_command's each, so a missing tool fails fast on the live ISO
+# instead of mid-install. Returns 1 for unknown filesystem.
+#
+# Usage: mapfile -t cmds < <(required_commands zfs)
+required_commands() {
+ local fs="$1"
+ local common=(
+ sgdisk wipefs partprobe mkfs.fat pacstrap
+ )
+ local fs_specific
+ case "$fs" in
+ zfs) fs_specific=(zpool zfs) ;;
+ btrfs) fs_specific=(mkfs.btrfs grub-install) ;;
+ *) return 1 ;;
+ esac
+ printf '%s\n' "${common[@]}" "${fs_specific[@]}"
+}
+
#############################
# Password / Passphrase Input
#############################
diff --git a/installer/lib/disk.sh b/installer/lib/disk.sh
index b548b4f..ae7801b 100644
--- a/installer/lib/disk.sh
+++ b/installer/lib/disk.sh
@@ -131,3 +131,62 @@ select_disks() {
info "Selected disks: ${SELECTED_DISKS[*]}"
}
+#############################
+# Pre-flight: Disk Safety
+#############################
+
+# Minimum usable install disk. Root plus the 50G reservation, packages, and
+# snapshots needs real headroom; below this the install fails partway
+# through. 20 GB is a hard floor (validate_install_targets errors out).
+# Decimal GB (disk-vendor sizing) on purpose: it reads as the natural "20GB"
+# minimum and clears a 20 GiB disk image with headroom rather than sitting
+# exactly on the boundary.
+MIN_DISK_BYTES=20000000000 # 20 * 10^9 (20 GB)
+
+# Pure size predicate: succeed only when <bytes> is a non-negative integer
+# meeting MIN_DISK_BYTES. Non-numeric or empty input fails (treated as an
+# unknown size, which is itself a reason not to proceed).
+disk_meets_min_size() {
+ local bytes="$1"
+ [[ "$bytes" =~ ^[0-9]+$ ]] || return 1
+ (( bytes >= MIN_DISK_BYTES ))
+}
+
+# Size of a block device in bytes (live query). Thin wrapper over blockdev;
+# exercised by the VM integration harness rather than unit tests.
+disk_size_bytes() {
+ blockdev --getsize64 "$1" 2>/dev/null
+}
+
+# Succeed (return 0) when <disk> is in active use and must NOT be wiped:
+# any partition mounted, active swap on it, or membership in an imported
+# zpool or assembled md array. Over-detection errs on the safe side
+# (refuse). Live-state predicate — validated in the VM harness, where the
+# install disks are deliberately idle so the happy path returns 1.
+disk_in_use() {
+ local disk="$1"
+ local base
+ base=$(basename "$disk")
+
+ # Any mountpoint on the disk or its children.
+ if lsblk -nro MOUNTPOINT "$disk" 2>/dev/null | grep -q .; then
+ return 0
+ fi
+ # Active swap on the disk or a partition of it.
+ if swapon --show=NAME --noheadings 2>/dev/null | grep -q "^${disk}"; then
+ return 0
+ fi
+ # Member of an imported zpool. -P prints full device paths (/dev/vda2),
+ # so a fixed-string match on the disk path catches partition members too
+ # — a plain word match on the bare name would miss "vda2".
+ if command_exists zpool && zpool status -LP 2>/dev/null | grep -qF "$disk"; then
+ return 0
+ fi
+ # Member of an assembled md array. /proc/mdstat lists bare partition names
+ # (vda1[0]); substring-match the disk name (over-match errs toward refuse).
+ if grep -qsF "$base" /proc/mdstat 2>/dev/null; then
+ return 0
+ fi
+ return 1
+}
+
diff --git a/tests/unit/test_archangel.bats b/tests/unit/test_archangel.bats
index c7bbc56..c38dcb8 100644
--- a/tests/unit/test_archangel.bats
+++ b/tests/unit/test_archangel.bats
@@ -239,3 +239,136 @@ setup() {
[[ " ${CALLS[*]} " == *" zpool list zroot "* ]]
[[ " ${CALLS[*]} " != *" zpool export"* ]]
}
+
+@test "install_failure_cleanup ZFS path falls back to lazy unmount when a mount is busy" {
+ FILESYSTEM=zfs
+ POOL_NAME=zroot
+ CALLS=()
+
+ # A pacstrap-interrupted target can leave busy mounts that a plain
+ # umount can't release; cleanup must retry lazily so the retry sees a
+ # clean disk. Non-lazy umount fails here; the -l fallback succeeds.
+ umount() {
+ CALLS+=("umount $*")
+ [[ "$*" == *"-l"* ]] && return 0
+ return 1
+ }
+ zpool() { CALLS+=("zpool $*"); return 0; }
+ warn() { :; }
+ error() { return 1; }
+
+ install_failure_cleanup || true
+
+ [[ " ${CALLS[*]} " == *" umount -l /mnt/efi "* ]]
+ [[ " ${CALLS[*]} " == *" umount -R -l /mnt "* ]]
+ # The pool still gets exported after the lazy unmount.
+ [[ " ${CALLS[*]} " == *" zpool export zroot "* ]]
+}
+
+@test "install_failure_cleanup Btrfs path falls back to lazy unmount when EFI is busy" {
+ FILESYSTEM=btrfs
+ CALLS=()
+
+ umount() {
+ CALLS+=("umount $*")
+ [[ "$*" == *"-l"* ]] && return 0
+ return 1
+ }
+ btrfs_cleanup() { CALLS+=("btrfs_cleanup"); }
+ btrfs_close_encryption() { CALLS+=("btrfs_close_encryption"); }
+ warn() { :; }
+ error() { return 1; }
+
+ install_failure_cleanup || true
+
+ [[ " ${CALLS[*]} " == *" umount -l /mnt/efi "* ]]
+}
+
+#############################
+# validate_environment
+#############################
+# Boundary wrappers (is_uefi_boot, required_commands) are stubbed so the
+# composition's fail-fast wiring is exercised without depending on the
+# host's firmware mode or installed tools. The real command list lives in
+# test_common.bats; the real UEFI/network probes run in the VM harness.
+
+@test "validate_environment errors when not booted in UEFI mode" {
+ is_uefi_boot() { return 1; }
+ required_commands() { return 0; }
+ FILESYSTEM=zfs
+ run validate_environment
+ [ "$status" -eq 1 ]
+ [[ "$output" == *"UEFI"* ]]
+}
+
+@test "validate_environment errors when a required command is missing" {
+ is_uefi_boot() { return 0; }
+ required_commands() { echo "definitely-not-a-real-cmd-xyz"; }
+ FILESYSTEM=zfs
+ run validate_environment
+ [ "$status" -eq 1 ]
+ [[ "$output" == *"definitely-not-a-real-cmd-xyz"* ]]
+}
+
+@test "validate_environment passes when UEFI present and commands resolve" {
+ is_uefi_boot() { return 0; }
+ required_commands() { echo "bash"; }
+ FILESYSTEM=zfs
+ run validate_environment
+ [ "$status" -eq 0 ]
+}
+
+#############################
+# validate_install_targets
+#############################
+# disk_in_use / disk_size_bytes / network_available are the system-boundary
+# wrappers; stubbing them drives the real composition + real
+# disk_meets_min_size. Live probes run in the VM harness on the happy path.
+
+@test "validate_install_targets errors when a disk is in use" {
+ SELECTED_DISKS=(/dev/sda)
+ disk_in_use() { return 0; }
+ disk_size_bytes() { echo 500107862016; }
+ network_available() { return 0; }
+ run validate_install_targets
+ [ "$status" -eq 1 ]
+ [[ "$output" == *"in use"* ]]
+}
+
+@test "validate_install_targets errors when a disk is too small" {
+ SELECTED_DISKS=(/dev/sda)
+ disk_in_use() { return 1; }
+ disk_size_bytes() { echo 1000000; }
+ network_available() { return 0; }
+ run validate_install_targets
+ [ "$status" -eq 1 ]
+ [[ "$output" == *"too small"* ]]
+}
+
+@test "validate_install_targets errors when disk size is unreadable" {
+ SELECTED_DISKS=(/dev/sda)
+ disk_in_use() { return 1; }
+ disk_size_bytes() { echo ""; }
+ network_available() { return 0; }
+ run validate_install_targets
+ [ "$status" -eq 1 ]
+}
+
+@test "validate_install_targets errors when the network is unreachable" {
+ SELECTED_DISKS=(/dev/sda)
+ disk_in_use() { return 1; }
+ disk_size_bytes() { echo 500107862016; }
+ network_available() { return 1; }
+ run validate_install_targets
+ [ "$status" -eq 1 ]
+ [[ "$output" == *"network"* || "$output" == *"connectivity"* ]]
+}
+
+@test "validate_install_targets passes when disks idle, large enough, network up" {
+ SELECTED_DISKS=(/dev/sda /dev/sdb)
+ disk_in_use() { return 1; }
+ disk_size_bytes() { echo 500107862016; }
+ network_available() { return 0; }
+ run validate_install_targets
+ [ "$status" -eq 0 ]
+}
diff --git a/tests/unit/test_common.bats b/tests/unit/test_common.bats
index 8ce7280..a639a4e 100644
--- a/tests/unit/test_common.bats
+++ b/tests/unit/test_common.bats
@@ -520,3 +520,51 @@ Boot0001* ZFSBootMenu"
grep -qF 'HOOKS=(base udev)' "$f"
rm -f "$f"
}
+
+#############################
+# required_commands
+#############################
+
+@test "required_commands zfs includes zpool and zfs" {
+ run required_commands zfs
+ [ "$status" -eq 0 ]
+ [[ "$output" == *"zpool"* ]]
+ [[ "$output" == *"zfs"* ]]
+}
+
+@test "required_commands btrfs includes mkfs.btrfs and grub-install" {
+ run required_commands btrfs
+ [ "$status" -eq 0 ]
+ [[ "$output" == *"mkfs.btrfs"* ]]
+ [[ "$output" == *"grub-install"* ]]
+}
+
+@test "required_commands zfs excludes Btrfs-specific commands" {
+ run required_commands zfs
+ [ "$status" -eq 0 ]
+ [[ "$output" != *"mkfs.btrfs"* ]]
+ [[ "$output" != *"grub-install"* ]]
+}
+
+@test "required_commands btrfs excludes the zpool command" {
+ run required_commands btrfs
+ [ "$status" -eq 0 ]
+ [[ "$output" != *"zpool"* ]]
+}
+
+@test "required_commands includes partitioning + pacstrap commands for both filesystems" {
+ for fs in zfs btrfs; do
+ run required_commands "$fs"
+ [ "$status" -eq 0 ]
+ [[ "$output" == *"sgdisk"* ]]
+ [[ "$output" == *"wipefs"* ]]
+ [[ "$output" == *"partprobe"* ]]
+ [[ "$output" == *"mkfs.fat"* ]]
+ [[ "$output" == *"pacstrap"* ]]
+ done
+}
+
+@test "required_commands unknown filesystem returns 1" {
+ run required_commands ext4
+ [ "$status" -eq 1 ]
+}
diff --git a/tests/unit/test_disk.bats b/tests/unit/test_disk.bats
index 1b3cfba..6bc6d8d 100644
--- a/tests/unit/test_disk.bats
+++ b/tests/unit/test_disk.bats
@@ -232,3 +232,46 @@ partition_disks_setup() {
[[ " ${CALLS[*]} " != *"sgdisk"* ]]
[[ " ${CALLS[*]} " != *"wipefs"* ]]
}
+
+#############################
+# disk_meets_min_size / MIN_DISK_BYTES
+#############################
+
+@test "MIN_DISK_BYTES is 20 GB (decimal)" {
+ [ "$MIN_DISK_BYTES" -eq 20000000000 ]
+}
+
+@test "disk_meets_min_size: exactly the minimum passes" {
+ run disk_meets_min_size 20000000000
+ [ "$status" -eq 0 ]
+}
+
+@test "disk_meets_min_size: one byte under the minimum fails" {
+ run disk_meets_min_size 19999999999
+ [ "$status" -eq 1 ]
+}
+
+@test "disk_meets_min_size: a 20 GiB disk image clears the 20 GB floor" {
+ run disk_meets_min_size 21474836480
+ [ "$status" -eq 0 ]
+}
+
+@test "disk_meets_min_size: a large disk passes" {
+ run disk_meets_min_size 500107862016
+ [ "$status" -eq 0 ]
+}
+
+@test "disk_meets_min_size: zero fails" {
+ run disk_meets_min_size 0
+ [ "$status" -eq 1 ]
+}
+
+@test "disk_meets_min_size: non-numeric input fails" {
+ run disk_meets_min_size notanumber
+ [ "$status" -eq 1 ]
+}
+
+@test "disk_meets_min_size: empty input fails" {
+ run disk_meets_min_size ""
+ [ "$status" -eq 1 ]
+}