aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-06-30 07:56:41 -0400
committerCraig Jennings <c@cjennings.net>2026-06-30 07:56:41 -0400
commit6bd832897813c730deb12768d1eb5b02af66ad20 (patch)
treefdb3b76316deb14c6a8dfd39e3b7d03e06283c32
parent394f3dbdadb29f7477d452634605f5c269aaed6f (diff)
downloadarchsetup-6bd832897813c730deb12768d1eb5b02af66ad20.tar.gz
archsetup-6bd832897813c730deb12768d1eb5b02af66ad20.zip
feat: install pre-pacman ZFS snapshot hook on ZFS-root systems
archsetup took sanoid from install-archzfs but never ported the pre-pacman snapshot hook, so a ZFS-root install had no transaction-triggered rollback point — the working setup only existed as a hand-placed script on velox, lost on reinstall. Add configure_pre_pacman_snapshots(): a PreTransaction pacman hook plus a self-pruning script that keeps the 10 most recent pre-pacman snapshots (sanoid ignores them — they aren't autosnap_ names). It's gated to ZFS-root and runs late in boot_ux, so the hook doesn't fire during the install's own package operations and the first snapshot is the fresh system. The script ships as scripts/zfs-pre-snapshot, made ZFS_PRE_* env-overridable so the pruning logic is unit-testable. Unit tests drive it against a fake zfs (creates a snapshot, prunes the oldest past KEEP, ignores non-pre-pacman snapshots, honors the lockfile interval, warns on failure); a Testinfra test asserts the hook and script land on a ZFS install; the orchestrator test pins the new boot_ux substep.
-rwxr-xr-xarchsetup34
-rw-r--r--docs/design/2026-06-29-zfs-pre-snapshot-installer.org33
-rw-r--r--scripts/testing/tests/test_boot.py16
-rwxr-xr-xscripts/zfs-pre-snapshot43
-rw-r--r--tests/installer-steps/test_orchestrators.py1
-rwxr-xr-xtests/zfs-pre-snapshot/fake-zfs14
-rw-r--r--tests/zfs-pre-snapshot/test_zfs_pre_snapshot.py116
7 files changed, 250 insertions, 7 deletions
diff --git a/archsetup b/archsetup
index 7c98147..acb14bd 100755
--- a/archsetup
+++ b/archsetup
@@ -1778,6 +1778,39 @@ configure_btrfs_snapshots() {
}
+configure_pre_pacman_snapshots() {
+ # ZFS only: a pacman PreTransaction hook that snapshots the root dataset
+ # before every transaction, with a self-pruning script that keeps the most
+ # recent 10 (sanoid ignores these — they aren't autosnap_ names). This is
+ # the transaction-triggered complement to the scheduled sanoid snapshots in
+ # configure_zfs_snapshots.
+ #
+ # Called from boot_ux (the last step), NOT from configure_zfs_snapshots
+ # (which runs early), so the hook doesn't fire during the install's own
+ # package operations — the first pre-pacman snapshot is the fresh system.
+ is_zfs_root || return 0
+
+ action="installing pre-pacman snapshot script" && display "task" "$action"
+ cp "$user_archsetup_dir/scripts/zfs-pre-snapshot" /usr/local/bin/zfs-pre-snapshot
+ chmod +x /usr/local/bin/zfs-pre-snapshot
+
+ action="installing pre-pacman snapshot hook" && display "task" "$action"
+ mkdir -p /etc/pacman.d/hooks
+ cat << 'EOF' > /etc/pacman.d/hooks/zfs-snapshot.hook
+[Trigger]
+Operation = Upgrade
+Operation = Install
+Operation = Remove
+Type = Package
+Target = *
+
+[Action]
+Description = Creating ZFS snapshot before pacman transaction...
+When = PreTransaction
+Exec = /usr/local/bin/zfs-pre-snapshot
+EOF
+}
+
configure_user_lingering() {
# User Services Lingering
@@ -2666,6 +2699,7 @@ boot_ux() {
configure_tlp_power
trim_firmware
configure_grub
+ configure_pre_pacman_snapshots # ZFS only; late so it skips the install itself
}
tighten_efi_permissions() {
diff --git a/docs/design/2026-06-29-zfs-pre-snapshot-installer.org b/docs/design/2026-06-29-zfs-pre-snapshot-installer.org
index 413bfa5..e5a339e 100644
--- a/docs/design/2026-06-29-zfs-pre-snapshot-installer.org
+++ b/docs/design/2026-06-29-zfs-pre-snapshot-installer.org
@@ -78,10 +78,29 @@ else
fi
#+end_src
-* Open items before implementation
-
-- Source or write =/etc/pacman.d/hooks/zfs-snapshot.hook= (the trigger).
-- Decide the exact insertion point in the ZFS-root install path.
-- Add a ZFS-root VM test asserting the hook + script land and the script
- self-prunes past =KEEP=.
-- Correct the stale 2026-01-17 security-doc line.
+* Implementation (2026-06-30)
+
+- Hook sourced from velox (=/etc/pacman.d/hooks/zfs-snapshot.hook=) and embedded
+ as a heredoc in =configure_pre_pacman_snapshots()=.
+- Insertion point: a new =configure_pre_pacman_snapshots()= gated on
+ =is_zfs_root=, called from =boot_ux= (the last step) so the hook doesn't fire
+ during the install's own package operations — the first pre-pacman snapshot is
+ the fresh system. The script ships as =scripts/zfs-pre-snapshot= (the
+ =zfs-replicate= pattern), made =ZFS_PRE_*=-env-overridable for testability.
+- Tests: =tests/zfs-pre-snapshot/= unit-tests the pruning logic against a fake
+ =zfs= (creates, prunes oldest-past-KEEP, ignores non-=pre-pacman_= snapshots,
+ honors the lockfile, warns on snapshot failure); =test_boot.py= asserts the
+ hook + script land on a ZFS install; the orchestrator test pins the new
+ =boot_ux= substep.
+
+* Note on the "stale security doc"
+
+The 2026-01-17 line "ZFS pre-pacman snapshots (already in install-archzfs)" is
+*not* stale: that file is an archive generated by install-archzfs (see its
+header and footer), and the claim is accurate for install-archzfs. The real gap
+was that archsetup took sanoid from install-archzfs but never ported the
+pre-pacman hook. This change ports it. The archive is left untouched.
+
+* Remaining
+
+- ZFS-root VM verification (=make test FS_PROFILE=zfs=) before the task closes.
diff --git a/scripts/testing/tests/test_boot.py b/scripts/testing/tests/test_boot.py
index 78b4404..e442682 100644
--- a/scripts/testing/tests/test_boot.py
+++ b/scripts/testing/tests/test_boot.py
@@ -65,3 +65,19 @@ def test_zfs_has_sanoid(host):
if not host.exists("zfs"):
pytest.skip("ZFS not installed (non-ZFS system)")
assert host.exists("sanoid"), "ZFS system should have sanoid installed"
+
+
+def test_zfs_pre_pacman_snapshot_hook(host):
+ # archsetup installs a PreTransaction pacman hook + a self-pruning script so
+ # every pacman transaction is preceded by a rollback snapshot (configure_
+ # pre_pacman_snapshots, run late in boot_ux). ZFS-root only.
+ if not host.exists("zfs"):
+ pytest.skip("ZFS not installed (non-ZFS system)")
+ script = host.file("/usr/local/bin/zfs-pre-snapshot")
+ assert script.exists and script.is_file, "pre-pacman snapshot script missing"
+ assert script.mode & 0o111, "pre-pacman snapshot script is not executable"
+ hook = host.file("/etc/pacman.d/hooks/zfs-snapshot.hook")
+ assert hook.exists and hook.is_file, "zfs-snapshot.hook missing"
+ assert "PreTransaction" in hook.content_string, "hook not PreTransaction"
+ assert "/usr/local/bin/zfs-pre-snapshot" in hook.content_string, \
+ "hook does not exec the snapshot script"
diff --git a/scripts/zfs-pre-snapshot b/scripts/zfs-pre-snapshot
new file mode 100755
index 0000000..ed914d0
--- /dev/null
+++ b/scripts/zfs-pre-snapshot
@@ -0,0 +1,43 @@
+#!/bin/bash
+# Snapshot the root dataset before a pacman transaction, then prune to the most
+# recent $KEEP pre-pacman snapshots. Run from the zfs-snapshot.hook pacman hook
+# (PreTransaction). Sanoid doesn't manage these (they aren't autosnap_ names),
+# so retention is enforced here at creation time.
+#
+# Defaults match the live zroot layout; the ZFS_PRE_* env vars override them so
+# the pruning logic is unit-testable against a fake zfs on PATH.
+
+POOL="${ZFS_PRE_POOL:-zroot}"
+DATASET="${ZFS_PRE_DATASET:-$POOL/ROOT/default}"
+LOCKFILE="${ZFS_PRE_LOCKFILE:-/tmp/.zfs-pre-snapshot.lock}"
+MIN_INTERVAL="${ZFS_PRE_MIN_INTERVAL:-60}"
+KEEP="${ZFS_PRE_KEEP:-10}" # pre-pacman snapshots to retain (recent-transaction rollback)
+
+# Skip if a snapshot was created within the last $MIN_INTERVAL seconds. A single
+# pacman invocation can fire several transactions; this stops a burst of them
+# from each cutting a near-identical snapshot.
+if [ -f "$LOCKFILE" ]; then
+ last=$(stat -c %Y "$LOCKFILE" 2>/dev/null || echo 0)
+ now=$(date +%s)
+ if (( now - last < MIN_INTERVAL )); then
+ exit 0
+ fi
+fi
+
+TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S)
+SNAPSHOT_NAME="pre-pacman_$TIMESTAMP"
+
+if zfs snapshot "$DATASET@$SNAPSHOT_NAME"; then
+ echo "Created snapshot: $DATASET@$SNAPSHOT_NAME"
+ touch "$LOCKFILE"
+
+ # Keep only the most recent $KEEP pre-pacman snapshots; destroy older ones.
+ zfs list -H -o name -t snapshot -s creation "$DATASET" 2>/dev/null \
+ | grep '@pre-pacman_' \
+ | head -n -"$KEEP" \
+ | while read -r old; do
+ zfs destroy "$old" && echo "Pruned old snapshot: $old"
+ done
+else
+ echo "Warning: Failed to create snapshot" >&2
+fi
diff --git a/tests/installer-steps/test_orchestrators.py b/tests/installer-steps/test_orchestrators.py
index e62c198..48b7508 100644
--- a/tests/installer-steps/test_orchestrators.py
+++ b/tests/installer-steps/test_orchestrators.py
@@ -46,6 +46,7 @@ ORCHESTRATORS = {
"tighten_efi_permissions", "add_nvme_early_module",
"configure_initramfs_hook", "configure_encrypted_autologin",
"configure_tlp_power", "trim_firmware", "configure_grub",
+ "configure_pre_pacman_snapshots",
],
"user_customizations": [
"clone_user_repos", "stow_dotfiles", "prune_waybar_battery",
diff --git a/tests/zfs-pre-snapshot/fake-zfs b/tests/zfs-pre-snapshot/fake-zfs
new file mode 100755
index 0000000..508c0f3
--- /dev/null
+++ b/tests/zfs-pre-snapshot/fake-zfs
@@ -0,0 +1,14 @@
+#!/bin/sh
+# Fake zfs for the zfs-pre-snapshot unit test. `snapshot` and `destroy` are
+# logged (FAKE_ZFS_LOG); `list` prints a fixture snapshot set (FAKE_ZFS_SNAPSHOTS).
+# Set FAKE_ZFS_SNAPSHOT_FAIL to make snapshot creation fail.
+case "$1" in
+ snapshot)
+ [ -n "$FAKE_ZFS_SNAPSHOT_FAIL" ] && exit 1
+ echo "snapshot $2" >> "$FAKE_ZFS_LOG"; exit 0 ;;
+ destroy)
+ echo "destroy $2" >> "$FAKE_ZFS_LOG"; exit 0 ;;
+ list)
+ cat "$FAKE_ZFS_SNAPSHOTS" 2>/dev/null; exit 0 ;;
+esac
+exit 0
diff --git a/tests/zfs-pre-snapshot/test_zfs_pre_snapshot.py b/tests/zfs-pre-snapshot/test_zfs_pre_snapshot.py
new file mode 100644
index 0000000..ed7731b
--- /dev/null
+++ b/tests/zfs-pre-snapshot/test_zfs_pre_snapshot.py
@@ -0,0 +1,116 @@
+"""Unit tests for scripts/zfs-pre-snapshot.
+
+The script snapshots the root dataset before a pacman transaction and prunes to
+the most recent KEEP pre-pacman snapshots. These tests drive the real script
+with a fake zfs on PATH (snapshot/destroy logged, list returns a fixture set)
+and env-rooted state, so nothing touches a real pool.
+"""
+
+import os
+import shutil
+import subprocess
+import tempfile
+import time
+import unittest
+
+REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+SCRIPT = os.path.join(REPO_ROOT, "scripts/zfs-pre-snapshot")
+FAKE_ZFS = os.path.join(os.path.dirname(__file__), "fake-zfs")
+
+DATASET = "tank/test"
+# Five pre-pacman snapshots oldest->newest (zfs list -s creation is ascending),
+# plus one autosnap that the grep filter must ignore.
+SNAPSHOTS = "\n".join([
+ f"{DATASET}@autosnap_2026-01-01",
+ f"{DATASET}@pre-pacman_2026-06-01",
+ f"{DATASET}@pre-pacman_2026-06-02",
+ f"{DATASET}@pre-pacman_2026-06-03",
+ f"{DATASET}@pre-pacman_2026-06-04",
+ f"{DATASET}@pre-pacman_2026-06-05",
+]) + "\n"
+
+
+class Harness(unittest.TestCase):
+ def setUp(self):
+ self.tmp = tempfile.mkdtemp(prefix="zfs-pre-snap-")
+ self.bin = os.path.join(self.tmp, "bin")
+ os.makedirs(self.bin)
+ shutil.copy(FAKE_ZFS, os.path.join(self.bin, "zfs"))
+ self.log = os.path.join(self.tmp, "zfs.log")
+ self.snaps = os.path.join(self.tmp, "snaps")
+ with open(self.snaps, "w") as f:
+ f.write(SNAPSHOTS)
+ self.lock = os.path.join(self.tmp, "lock")
+
+ def tearDown(self):
+ shutil.rmtree(self.tmp, ignore_errors=True)
+
+ def run_script(self, keep="3", fail=False, snaps=None):
+ env = os.environ.copy()
+ env["PATH"] = self.bin + os.pathsep + env["PATH"]
+ env["ZFS_PRE_DATASET"] = DATASET
+ env["ZFS_PRE_LOCKFILE"] = self.lock
+ env["ZFS_PRE_KEEP"] = keep
+ env["FAKE_ZFS_LOG"] = self.log
+ env["FAKE_ZFS_SNAPSHOTS"] = snaps if snaps is not None else self.snaps
+ if fail:
+ env["FAKE_ZFS_SNAPSHOT_FAIL"] = "1"
+ return subprocess.run([SCRIPT], env=env, capture_output=True, text=True,
+ timeout=15)
+
+ def log_lines(self):
+ try:
+ with open(self.log) as f:
+ return [ln for ln in f.read().splitlines() if ln.strip()]
+ except FileNotFoundError:
+ return []
+
+
+class TestSnapshot(Harness):
+ def test_creates_a_pre_pacman_snapshot(self):
+ self.run_script()
+ snaps = [ln for ln in self.log_lines() if ln.startswith("snapshot ")]
+ self.assertEqual(len(snaps), 1)
+ self.assertIn(f"snapshot {DATASET}@pre-pacman_", snaps[0])
+
+ def test_skips_when_lockfile_is_fresh(self):
+ # A lockfile newer than MIN_INTERVAL → no snapshot this run.
+ open(self.lock, "w").close()
+ os.utime(self.lock, (time.time(), time.time()))
+ self.run_script()
+ self.assertEqual([ln for ln in self.log_lines()
+ if ln.startswith("snapshot ")], [])
+
+
+class TestPrune(Harness):
+ def test_prunes_oldest_beyond_keep(self):
+ # 5 pre-pacman snapshots, KEEP=3 → the two oldest are destroyed.
+ self.run_script(keep="3")
+ destroyed = [ln.split(" ", 1)[1] for ln in self.log_lines()
+ if ln.startswith("destroy ")]
+ self.assertEqual(destroyed,
+ [f"{DATASET}@pre-pacman_2026-06-01",
+ f"{DATASET}@pre-pacman_2026-06-02"])
+
+ def test_never_destroys_non_pre_pacman_snapshots(self):
+ self.run_script(keep="1")
+ destroyed = [ln for ln in self.log_lines() if ln.startswith("destroy ")]
+ self.assertFalse(any("autosnap" in ln for ln in destroyed))
+
+ def test_no_prune_when_at_or_under_keep(self):
+ # KEEP=5 with exactly 5 pre-pacman snapshots → nothing destroyed.
+ self.run_script(keep="5")
+ self.assertEqual([ln for ln in self.log_lines()
+ if ln.startswith("destroy ")], [])
+
+
+class TestError(Harness):
+ def test_snapshot_failure_skips_prune_and_warns(self):
+ r = self.run_script(fail=True)
+ self.assertIn("Failed to create snapshot", r.stderr)
+ self.assertEqual([ln for ln in self.log_lines()
+ if ln.startswith("destroy ")], [])
+
+
+if __name__ == "__main__":
+ unittest.main()