From 6de9f378cf52b9e9b0e89b396a12b978700241ff Mon Sep 17 00:00:00 2001 From: Craig Jennings Date: Sun, 26 Apr 2026 01:31:38 -0500 Subject: fix: clean up partial installs via ERR/INT/TERM trap Failed installs left the system in an inconsistent state: /mnt mounted, the zpool imported, possibly LUKS containers open. The user had to manually unmount, export the pool, and clean up partitions before re-running the installer. The existing trap at the top of archangel was `trap 'error "Installation interrupted!"' INT TERM`. It just printed a message and exited. There was no ERR trap, so `set -e`-aborted commands ran no cleanup either. I added `install_failure_cleanup` next to the existing `cleanup`. It captures `$?` first, disarms the trap to prevent recursion, clears sensitive variables (ROOT_PASSWORD, ZFS_PASSPHRASE, LUKS_PASSPHRASE), and dispatches on FILESYSTEM: - ZFS path: unmount /mnt/efi, recursive umount /mnt, export the pool (with `-f` fallback) if it's still imported. - Btrfs path: unmount /mnt/efi, call the existing btrfs_cleanup and btrfs_close_encryption helpers. All cleanup steps swallow their own errors with `|| true` since partial state is expected when this fires mid-install. `install_zfs` and `install_btrfs` now both arm the trap as their first action and disarm it just before the success-path cleanup. The disarm matters because the success cleanup calls `zpool export` (or btrfs_close_encryption) directly, and those can produce non-zero exit codes that we don't want to interpret as "installation failed". The note in notes.org described this as "install_zfs has no mid-step recovery." The framing was off. Both paths were exposed: install_btrfs's `btrfs_cleanup` only runs on the success path, same as `cleanup` for ZFS. Both paths now have the same recovery shape. Added 4 bats tests for `install_failure_cleanup` that mock the system tools (umount, zpool, btrfs_cleanup, btrfs_close_encryption) via function override and track invocations through a CALLS array. Array assignment isn't affected by the production code's `>/dev/null 2>&1` redirects on `zpool list`, so we capture the call regardless of where the mock's stdout would have gone. Verified end-to-end on the dev box: sourced archangel, set FILESYSTEM=zfs, armed the trap, ran `false` to trigger `set -e`. The trap fired with exit code 1, dispatched to the ZFS cleanup path, called `umount /mnt/efi` and `umount -R /mnt`, checked `zpool list` (returned non-zero since no pool exists on the dev box), skipped the export, and exited via `error`. No behavior change on the success path. The existing `cleanup` and `btrfs_cleanup` stay unchanged. --- tests/unit/test_archangel.bats | 105 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) (limited to 'tests/unit') diff --git a/tests/unit/test_archangel.bats b/tests/unit/test_archangel.bats index c31cebb..ccabec7 100644 --- a/tests/unit/test_archangel.bats +++ b/tests/unit/test_archangel.bats @@ -212,3 +212,108 @@ setup() { gather_input >/dev/null [ -z "$RAID_LEVEL" ] } + +############################# +# install_failure_cleanup +############################# +# install_failure_cleanup is the trap target for ERR / INT / TERM +# during install_zfs and install_btrfs. It clears sensitive vars, +# dispatches on FILESYSTEM, and exits non-zero. Tests use function +# overrides to capture which system tools the cleanup invokes; the +# tools themselves (umount, zpool, btrfs_cleanup, +# btrfs_close_encryption) are deliberately VM-tested per +# testing-strategy.org. + +@test "install_failure_cleanup clears sensitive variables before exiting" { + FILESYSTEM=zfs + POOL_NAME=zroot + ROOT_PASSWORD="topsecret" + ZFS_PASSPHRASE="anothersecret" + LUKS_PASSPHRASE="thirdsecret" + + # Mocks: silent no-ops for system tools; error returns non-zero + # so the function returns instead of exiting the test process. + umount() { :; } + zpool() { return 1; } + btrfs_cleanup() { :; } + btrfs_close_encryption() { :; } + warn() { :; } + error() { return 1; } + + install_failure_cleanup || true + + [ -z "$ROOT_PASSWORD" ] + [ -z "$ZFS_PASSPHRASE" ] + [ -z "$LUKS_PASSPHRASE" ] +} + +@test "install_failure_cleanup dispatches to ZFS path when FILESYSTEM=zfs" { + FILESYSTEM=zfs + POOL_NAME=zroot + CALLS=() + + # Mocks track invocations via CALLS array. Array assignment is not + # affected by the production code's >/dev/null 2>&1 redirects on + # the zpool list check, so we capture the call regardless of where + # the mock's stdout would have gone. + umount() { CALLS+=("umount $*"); return 0; } + zpool() { + CALLS+=("zpool $*") + [[ "$1" == "list" ]] && return 0 + return 0 + } + btrfs_cleanup() { CALLS+=("btrfs_cleanup"); } + btrfs_close_encryption() { CALLS+=("btrfs_close_encryption"); } + warn() { :; } + error() { CALLS+=("error"); return 1; } + + install_failure_cleanup || true + + [[ " ${CALLS[*]} " == *" umount /mnt/efi "* ]] + [[ " ${CALLS[*]} " == *" umount -R /mnt "* ]] + [[ " ${CALLS[*]} " == *" zpool list zroot "* ]] + [[ " ${CALLS[*]} " == *" zpool export zroot "* ]] + [[ " ${CALLS[*]} " != *" btrfs_cleanup "* ]] + [[ " ${CALLS[*]} " != *" btrfs_close_encryption "* ]] +} + +@test "install_failure_cleanup dispatches to Btrfs path when FILESYSTEM=btrfs" { + FILESYSTEM=btrfs + CALLS=() + + umount() { CALLS+=("umount $*"); return 0; } + zpool() { CALLS+=("zpool $*"); return 0; } + btrfs_cleanup() { CALLS+=("btrfs_cleanup"); } + btrfs_close_encryption() { CALLS+=("btrfs_close_encryption"); } + warn() { :; } + error() { CALLS+=("error"); return 1; } + + install_failure_cleanup || true + + [[ " ${CALLS[*]} " == *" umount /mnt/efi "* ]] + [[ " ${CALLS[*]} " == *" btrfs_cleanup "* ]] + [[ " ${CALLS[*]} " == *" btrfs_close_encryption "* ]] + [[ " ${CALLS[*]} " != *" zpool"* ]] +} + +@test "install_failure_cleanup ZFS path skips zpool export when pool not imported" { + FILESYSTEM=zfs + POOL_NAME=zroot + CALLS=() + + umount() { CALLS+=("umount $*"); return 0; } + zpool() { + CALLS+=("zpool $*") + [[ "$1" == "list" ]] && return 1 # pool NOT imported + return 0 + } + btrfs_cleanup() { :; } + btrfs_close_encryption() { :; } + warn() { :; } + error() { return 1; } + + install_failure_cleanup || true + + [[ " ${CALLS[*]} " == *" zpool list zroot "* ]] + [[ " ${CALLS[*]} " != *" zpool export"* ]] +} -- cgit v1.2.3