aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-04-26 01:31:38 -0500
committerCraig Jennings <c@cjennings.net>2026-04-26 01:31:38 -0500
commit6de9f378cf52b9e9b0e89b396a12b978700241ff (patch)
tree9de36ebe2de4686970c8fd2bb4c124b403410bd6
parent1a261b0c220903c8bb628e7f2b94cf75a843f688 (diff)
downloadarchangel-6de9f378cf52b9e9b0e89b396a12b978700241ff.tar.gz
archangel-6de9f378cf52b9e9b0e89b396a12b978700241ff.zip
fix: clean up partial installs via ERR/INT/TERM trap
Failed installs left the system in an inconsistent state: /mnt mounted, the zpool imported, possibly LUKS containers open. The user had to manually unmount, export the pool, and clean up partitions before re-running the installer. The existing trap at the top of archangel was `trap 'error "Installation interrupted!"' INT TERM`. It just printed a message and exited. There was no ERR trap, so `set -e`-aborted commands ran no cleanup either. I added `install_failure_cleanup` next to the existing `cleanup`. It captures `$?` first, disarms the trap to prevent recursion, clears sensitive variables (ROOT_PASSWORD, ZFS_PASSPHRASE, LUKS_PASSPHRASE), and dispatches on FILESYSTEM: - ZFS path: unmount /mnt/efi, recursive umount /mnt, export the pool (with `-f` fallback) if it's still imported. - Btrfs path: unmount /mnt/efi, call the existing btrfs_cleanup and btrfs_close_encryption helpers. All cleanup steps swallow their own errors with `|| true` since partial state is expected when this fires mid-install. `install_zfs` and `install_btrfs` now both arm the trap as their first action and disarm it just before the success-path cleanup. The disarm matters because the success cleanup calls `zpool export` (or btrfs_close_encryption) directly, and those can produce non-zero exit codes that we don't want to interpret as "installation failed". The note in notes.org described this as "install_zfs has no mid-step recovery." The framing was off. Both paths were exposed: install_btrfs's `btrfs_cleanup` only runs on the success path, same as `cleanup` for ZFS. Both paths now have the same recovery shape. Added 4 bats tests for `install_failure_cleanup` that mock the system tools (umount, zpool, btrfs_cleanup, btrfs_close_encryption) via function override and track invocations through a CALLS array. Array assignment isn't affected by the production code's `>/dev/null 2>&1` redirects on `zpool list`, so we capture the call regardless of where the mock's stdout would have gone. Verified end-to-end on the dev box: sourced archangel, set FILESYSTEM=zfs, armed the trap, ran `false` to trigger `set -e`. The trap fired with exit code 1, dispatched to the ZFS cleanup path, called `umount /mnt/efi` and `umount -R /mnt`, checked `zpool list` (returned non-zero since no pool exists on the dev box), skipped the export, and exited via `error`. No behavior change on the success path. The existing `cleanup` and `btrfs_cleanup` stay unchanged.
-rwxr-xr-xinstaller/archangel58
-rw-r--r--tests/unit/test_archangel.bats105
2 files changed, 163 insertions, 0 deletions
diff --git a/installer/archangel b/installer/archangel
index b2fa2c0..55eac9c 100755
--- a/installer/archangel
+++ b/installer/archangel
@@ -1311,6 +1311,44 @@ cleanup() {
info "Cleanup complete."
}
+# Trap target for ERR / INT / TERM during install_zfs and
+# install_btrfs. Captures the failing exit code first, disarms the
+# trap to prevent recursion, clears sensitive variables, and
+# dispatches to the right per-filesystem cleanup before exiting via
+# error(). All cleanup steps swallow their own errors — partial
+# state is expected when this fires mid-install, so individual tool
+# failures are not fatal.
+install_failure_cleanup() {
+ local exit_code=$?
+ trap - ERR INT TERM
+
+ ROOT_PASSWORD=""
+ ZFS_PASSPHRASE=""
+ LUKS_PASSPHRASE=""
+
+ warn ""
+ warn "Installation failed (exit code $exit_code) — cleaning up..."
+
+ case "$FILESYSTEM" in
+ zfs)
+ umount /mnt/efi 2>/dev/null || true
+ umount -R /mnt 2>/dev/null || true
+ if zpool list "$POOL_NAME" >/dev/null 2>&1; then
+ zpool export "$POOL_NAME" 2>/dev/null \
+ || zpool export -f "$POOL_NAME" 2>/dev/null \
+ || true
+ fi
+ ;;
+ btrfs)
+ umount /mnt/efi 2>/dev/null || true
+ btrfs_cleanup 2>/dev/null || true
+ btrfs_close_encryption 2>/dev/null || true
+ ;;
+ esac
+
+ error "Installation failed; system cleaned up. Re-run the installer to retry."
+}
+
print_summary() {
echo ""
echo "╔═══════════════════════════════════════════════════════════════╗"
@@ -1416,6 +1454,13 @@ main() {
#############################
install_zfs() {
+ # Arm the failure trap before the first destructive operation. If
+ # any step below errors out or the user hits Ctrl-C, the trap runs
+ # cleanup (unmount /mnt + export pool) so the live ISO is left in a
+ # state where the user can re-run the installer without manual
+ # intervention.
+ trap 'install_failure_cleanup' ERR INT TERM
+
partition_disks
create_zfs_pool
create_datasets
@@ -1432,6 +1477,11 @@ install_zfs() {
configure_tmpfiles_private_tmp
sync_efi_partitions
create_genesis_snapshot
+
+ # Disarm the failure trap before the success-path cleanup. The
+ # success cleanup may emit non-zero exit codes that we don't want
+ # to interpret as "installation failed".
+ trap - ERR INT TERM
cleanup
print_summary
}
@@ -1441,6 +1491,10 @@ install_zfs() {
#############################
install_btrfs() {
+ # Arm the failure trap before any destructive operation. See the
+ # matching block in install_zfs() for the rationale.
+ trap 'install_failure_cleanup' ERR INT TERM
+
local btrfs_devices=()
local efi_parts=()
local root_parts=()
@@ -1488,6 +1542,10 @@ install_btrfs() {
configure_btrfs_pacman_hook
create_btrfs_genesis_snapshot
+ # Disarm the failure trap before the success-path cleanup. See
+ # the matching block in install_zfs() for the rationale.
+ trap - ERR INT TERM
+
# Cleanup
btrfs_cleanup
btrfs_close_encryption
diff --git a/tests/unit/test_archangel.bats b/tests/unit/test_archangel.bats
index c31cebb..ccabec7 100644
--- a/tests/unit/test_archangel.bats
+++ b/tests/unit/test_archangel.bats
@@ -212,3 +212,108 @@ setup() {
gather_input >/dev/null
[ -z "$RAID_LEVEL" ]
}
+
+#############################
+# install_failure_cleanup
+#############################
+# install_failure_cleanup is the trap target for ERR / INT / TERM
+# during install_zfs and install_btrfs. It clears sensitive vars,
+# dispatches on FILESYSTEM, and exits non-zero. Tests use function
+# overrides to capture which system tools the cleanup invokes; the
+# tools themselves (umount, zpool, btrfs_cleanup,
+# btrfs_close_encryption) are deliberately VM-tested per
+# testing-strategy.org.
+
+@test "install_failure_cleanup clears sensitive variables before exiting" {
+ FILESYSTEM=zfs
+ POOL_NAME=zroot
+ ROOT_PASSWORD="topsecret"
+ ZFS_PASSPHRASE="anothersecret"
+ LUKS_PASSPHRASE="thirdsecret"
+
+ # Mocks: silent no-ops for system tools; error returns non-zero
+ # so the function returns instead of exiting the test process.
+ umount() { :; }
+ zpool() { return 1; }
+ btrfs_cleanup() { :; }
+ btrfs_close_encryption() { :; }
+ warn() { :; }
+ error() { return 1; }
+
+ install_failure_cleanup || true
+
+ [ -z "$ROOT_PASSWORD" ]
+ [ -z "$ZFS_PASSPHRASE" ]
+ [ -z "$LUKS_PASSPHRASE" ]
+}
+
+@test "install_failure_cleanup dispatches to ZFS path when FILESYSTEM=zfs" {
+ FILESYSTEM=zfs
+ POOL_NAME=zroot
+ CALLS=()
+
+ # Mocks track invocations via CALLS array. Array assignment is not
+ # affected by the production code's >/dev/null 2>&1 redirects on
+ # the zpool list check, so we capture the call regardless of where
+ # the mock's stdout would have gone.
+ umount() { CALLS+=("umount $*"); return 0; }
+ zpool() {
+ CALLS+=("zpool $*")
+ [[ "$1" == "list" ]] && return 0
+ return 0
+ }
+ btrfs_cleanup() { CALLS+=("btrfs_cleanup"); }
+ btrfs_close_encryption() { CALLS+=("btrfs_close_encryption"); }
+ warn() { :; }
+ error() { CALLS+=("error"); return 1; }
+
+ install_failure_cleanup || true
+
+ [[ " ${CALLS[*]} " == *" umount /mnt/efi "* ]]
+ [[ " ${CALLS[*]} " == *" umount -R /mnt "* ]]
+ [[ " ${CALLS[*]} " == *" zpool list zroot "* ]]
+ [[ " ${CALLS[*]} " == *" zpool export zroot "* ]]
+ [[ " ${CALLS[*]} " != *" btrfs_cleanup "* ]]
+ [[ " ${CALLS[*]} " != *" btrfs_close_encryption "* ]]
+}
+
+@test "install_failure_cleanup dispatches to Btrfs path when FILESYSTEM=btrfs" {
+ FILESYSTEM=btrfs
+ CALLS=()
+
+ umount() { CALLS+=("umount $*"); return 0; }
+ zpool() { CALLS+=("zpool $*"); return 0; }
+ btrfs_cleanup() { CALLS+=("btrfs_cleanup"); }
+ btrfs_close_encryption() { CALLS+=("btrfs_close_encryption"); }
+ warn() { :; }
+ error() { CALLS+=("error"); return 1; }
+
+ install_failure_cleanup || true
+
+ [[ " ${CALLS[*]} " == *" umount /mnt/efi "* ]]
+ [[ " ${CALLS[*]} " == *" btrfs_cleanup "* ]]
+ [[ " ${CALLS[*]} " == *" btrfs_close_encryption "* ]]
+ [[ " ${CALLS[*]} " != *" zpool"* ]]
+}
+
+@test "install_failure_cleanup ZFS path skips zpool export when pool not imported" {
+ FILESYSTEM=zfs
+ POOL_NAME=zroot
+ CALLS=()
+
+ umount() { CALLS+=("umount $*"); return 0; }
+ zpool() {
+ CALLS+=("zpool $*")
+ [[ "$1" == "list" ]] && return 1 # pool NOT imported
+ return 0
+ }
+ btrfs_cleanup() { :; }
+ btrfs_close_encryption() { :; }
+ warn() { :; }
+ error() { return 1; }
+
+ install_failure_cleanup || true
+
+ [[ " ${CALLS[*]} " == *" zpool list zroot "* ]]
+ [[ " ${CALLS[*]} " != *" zpool export"* ]]
+}