diff options
| author | Craig Jennings <c@cjennings.net> | 2026-05-22 18:03:40 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-05-22 18:03:40 -0500 |
| commit | b6525a50fabf3aedf41eee70c164519b00d27704 (patch) | |
| tree | 0b9900eb584509051c83ebed0ed1427ee9bee9e7 /installer | |
| parent | 4ef30e5c84ab22ba1724608009093d6725a1ceda (diff) | |
| download | archangel-b6525a50fabf3aedf41eee70c164519b00d27704.tar.gz archangel-b6525a50fabf3aedf41eee70c164519b00d27704.zip | |
feat(install): add pre-flight environment and disk-target validation
archangel went straight from filesystem selection into a destructive install behind only a root check and a ZFS module load. A missing tool, a BIOS boot, a too-small or in-use disk, or a dead network surfaced as a confusing abort partway through, sometimes after partitioning had already run.
Two gates now fail fast. validate_environment runs after filesystem selection, before any disk is touched: it confirms UEFI boot mode and that every required command is present, with the list coming from a new required_commands helper built like pacstrap_packages. validate_install_targets runs after disk selection, before the first wipe: it refuses a target that's mounted, holds active swap, or belongs to an imported pool or md array, rejects disks under 20 GB, and confirms a mirror is reachable via DNS plus a TCP probe (no ICMP, since some networks drop it).
I folded the install_failure_cleanup hardening into the same change. It now falls back to lazy unmounts, so a pacstrap-interrupted target with busy bind mounts still releases the pool and unmounts the EFI partition. Without that, the disk-in-use guard would block the very retry the cleanup exists to enable. "Re-run to retry" only holds if the disk is genuinely freed first.
The 20 GB floor is decimal on purpose. It reads as the natural minimum and clears a 20 GiB disk image with headroom instead of sitting on the boundary.
Diffstat (limited to 'installer')
| -rwxr-xr-x | installer/archangel | 68 | ||||
| -rw-r--r-- | installer/lib/common.sh | 21 | ||||
| -rw-r--r-- | installer/lib/disk.sh | 59 |
3 files changed, 145 insertions, 3 deletions
diff --git a/installer/archangel b/installer/archangel index 537c1da..ea18202 100755 --- a/installer/archangel +++ b/installer/archangel @@ -93,6 +93,61 @@ filesystem_preflight() { fi } +# Fail unless booted in UEFI mode. archangel is UEFI-only (EFI partitions, +# efibootmgr, ZFSBootMenu / GRUB-EFI); a legacy BIOS boot would die late. +is_uefi_boot() { + [[ -d /sys/firmware/efi ]] +} + +# Succeed when an Arch mirror is reachable: DNS resolves and a TCP connection +# to port 443 opens. Uses bash's /dev/tcp behind a timeout (no curl/nc +# dependency) and no ICMP, since some networks drop it. Probes archlinux.org +# as a stable stand-in for "internet + DNS work" before pacstrap runs. +network_available() { + local host="archlinux.org" + getent hosts "$host" >/dev/null 2>&1 || return 1 + timeout 10 bash -c "exec 3<>/dev/tcp/${host}/443" 2>/dev/null +} + +# Environment pre-flight: checks that don't depend on disk selection. Runs +# early in main() once FILESYSTEM is known, so a doomed install aborts before +# any disk is touched. Hard-fails via error(). +validate_environment() { + step "Validating install environment" + + is_uefi_boot || error "Not booted in UEFI mode (/sys/firmware/efi missing). archangel requires UEFI." + + local cmd + while read -r cmd; do + require_command "$cmd" + done < <(required_commands "$FILESYSTEM") + + info "Environment OK: UEFI mode and required commands present." +} + +# Target pre-flight: checks that need SELECTED_DISKS finalized and the live +# network up. Runs after gather_input, before the first destructive op. +# Refuses any disk that's in active use, too small, or whose size can't be +# read, and aborts if no mirror is reachable. Hard-fails via error(). +validate_install_targets() { + step "Validating target disks and connectivity" + + local disk size + for disk in "${SELECTED_DISKS[@]}"; do + if disk_in_use "$disk"; then + error "Disk in use (mounted, swap, or active pool/array): $disk. Refusing to wipe it." + fi + size=$(disk_size_bytes "$disk") + disk_meets_min_size "$size" \ + || error "Disk too small or unreadable: $disk (${size:-unknown} bytes; need >= ${MIN_DISK_BYTES} = 20 GB)." + done + + network_available \ + || error "No network connectivity (DNS + TCP 443 to archlinux.org failed). pacstrap needs a reachable mirror." + + info "Targets OK: disks idle and large enough; mirror reachable." +} + ############################# # Phase 1: Gather All Input ############################# @@ -1250,8 +1305,13 @@ install_failure_cleanup() { case "$FILESYSTEM" in zfs) - umount $EFI_DIR 2>/dev/null || true - umount -R $MNTPOINT 2>/dev/null || true + # Lazy fallback (-l): a pacstrap-interrupted target can leave + # busy bind mounts (/proc, /sys, /dev) that a plain umount + # can't release. Detaching them lazily lets the pool export + # below succeed, so a retry / re-run sees a genuinely clean + # disk — the state validate_install_targets expects. + umount "$EFI_DIR" 2>/dev/null || umount -l "$EFI_DIR" 2>/dev/null || true + umount -R "$MNTPOINT" 2>/dev/null || umount -R -l "$MNTPOINT" 2>/dev/null || true if zpool list "$POOL_NAME" >/dev/null 2>&1; then zpool export "$POOL_NAME" 2>/dev/null \ || zpool export -f "$POOL_NAME" 2>/dev/null \ @@ -1259,7 +1319,7 @@ install_failure_cleanup() { fi ;; btrfs) - umount $EFI_DIR 2>/dev/null || true + umount "$EFI_DIR" 2>/dev/null || umount -l "$EFI_DIR" 2>/dev/null || true btrfs_cleanup 2>/dev/null || true btrfs_close_encryption 2>/dev/null || true ;; @@ -1352,8 +1412,10 @@ main() { preflight_checks check_config validate_filesystem + validate_environment [[ "$UNATTENDED" == true ]] && validate_config gather_input + validate_install_targets filesystem_preflight # Unattended installation begins diff --git a/installer/lib/common.sh b/installer/lib/common.sh index 2cd4798..7998eeb 100644 --- a/installer/lib/common.sh +++ b/installer/lib/common.sh @@ -102,6 +102,27 @@ pacstrap_packages() { printf '%s\n' "${common[@]}" "${fs_specific[@]}" } +# Print the external commands the installer needs for the given filesystem, +# one per line: common partitioning/bootstrap tools first, then +# filesystem-specific ones. validate_environment loops over these and +# require_command's each, so a missing tool fails fast on the live ISO +# instead of mid-install. Returns 1 for unknown filesystem. +# +# Usage: mapfile -t cmds < <(required_commands zfs) +required_commands() { + local fs="$1" + local common=( + sgdisk wipefs partprobe mkfs.fat pacstrap + ) + local fs_specific + case "$fs" in + zfs) fs_specific=(zpool zfs) ;; + btrfs) fs_specific=(mkfs.btrfs grub-install) ;; + *) return 1 ;; + esac + printf '%s\n' "${common[@]}" "${fs_specific[@]}" +} + ############################# # Password / Passphrase Input ############################# diff --git a/installer/lib/disk.sh b/installer/lib/disk.sh index b548b4f..ae7801b 100644 --- a/installer/lib/disk.sh +++ b/installer/lib/disk.sh @@ -131,3 +131,62 @@ select_disks() { info "Selected disks: ${SELECTED_DISKS[*]}" } +############################# +# Pre-flight: Disk Safety +############################# + +# Minimum usable install disk. Root plus the 50G reservation, packages, and +# snapshots needs real headroom; below this the install fails partway +# through. 20 GB is a hard floor (validate_install_targets errors out). +# Decimal GB (disk-vendor sizing) on purpose: it reads as the natural "20GB" +# minimum and clears a 20 GiB disk image with headroom rather than sitting +# exactly on the boundary. +MIN_DISK_BYTES=20000000000 # 20 * 10^9 (20 GB) + +# Pure size predicate: succeed only when <bytes> is a non-negative integer +# meeting MIN_DISK_BYTES. Non-numeric or empty input fails (treated as an +# unknown size, which is itself a reason not to proceed). +disk_meets_min_size() { + local bytes="$1" + [[ "$bytes" =~ ^[0-9]+$ ]] || return 1 + (( bytes >= MIN_DISK_BYTES )) +} + +# Size of a block device in bytes (live query). Thin wrapper over blockdev; +# exercised by the VM integration harness rather than unit tests. +disk_size_bytes() { + blockdev --getsize64 "$1" 2>/dev/null +} + +# Succeed (return 0) when <disk> is in active use and must NOT be wiped: +# any partition mounted, active swap on it, or membership in an imported +# zpool or assembled md array. Over-detection errs on the safe side +# (refuse). Live-state predicate — validated in the VM harness, where the +# install disks are deliberately idle so the happy path returns 1. +disk_in_use() { + local disk="$1" + local base + base=$(basename "$disk") + + # Any mountpoint on the disk or its children. + if lsblk -nro MOUNTPOINT "$disk" 2>/dev/null | grep -q .; then + return 0 + fi + # Active swap on the disk or a partition of it. + if swapon --show=NAME --noheadings 2>/dev/null | grep -q "^${disk}"; then + return 0 + fi + # Member of an imported zpool. -P prints full device paths (/dev/vda2), + # so a fixed-string match on the disk path catches partition members too + # — a plain word match on the bare name would miss "vda2". + if command_exists zpool && zpool status -LP 2>/dev/null | grep -qF "$disk"; then + return 0 + fi + # Member of an assembled md array. /proc/mdstat lists bare partition names + # (vda1[0]); substring-match the disk name (over-match errs toward refuse). + if grep -qsF "$base" /proc/mdstat 2>/dev/null; then + return 0 + fi + return 1 +} + |
