diff options
| author | Craig Jennings <c@cjennings.net> | 2026-06-30 07:56:41 -0400 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-06-30 07:56:41 -0400 |
| commit | 6bd832897813c730deb12768d1eb5b02af66ad20 (patch) | |
| tree | fdb3b76316deb14c6a8dfd39e3b7d03e06283c32 | |
| parent | 394f3dbdadb29f7477d452634605f5c269aaed6f (diff) | |
| download | archsetup-6bd832897813c730deb12768d1eb5b02af66ad20.tar.gz archsetup-6bd832897813c730deb12768d1eb5b02af66ad20.zip | |
feat: install pre-pacman ZFS snapshot hook on ZFS-root systems
archsetup took sanoid from install-archzfs but never ported the pre-pacman snapshot hook, so a ZFS-root install had no transaction-triggered rollback point — the working setup only existed as a hand-placed script on velox, lost on reinstall. Add configure_pre_pacman_snapshots(): a PreTransaction pacman hook plus a self-pruning script that keeps the 10 most recent pre-pacman snapshots (sanoid ignores them — they aren't autosnap_ names). It's gated to ZFS-root and runs late in boot_ux, so the hook doesn't fire during the install's own package operations and the first snapshot is the fresh system.
The script ships as scripts/zfs-pre-snapshot, made ZFS_PRE_* env-overridable so the pruning logic is unit-testable. Unit tests drive it against a fake zfs (creates a snapshot, prunes the oldest past KEEP, ignores non-pre-pacman snapshots, honors the lockfile interval, warns on failure); a Testinfra test asserts the hook and script land on a ZFS install; the orchestrator test pins the new boot_ux substep.
| -rwxr-xr-x | archsetup | 34 | ||||
| -rw-r--r-- | docs/design/2026-06-29-zfs-pre-snapshot-installer.org | 33 | ||||
| -rw-r--r-- | scripts/testing/tests/test_boot.py | 16 | ||||
| -rwxr-xr-x | scripts/zfs-pre-snapshot | 43 | ||||
| -rw-r--r-- | tests/installer-steps/test_orchestrators.py | 1 | ||||
| -rwxr-xr-x | tests/zfs-pre-snapshot/fake-zfs | 14 | ||||
| -rw-r--r-- | tests/zfs-pre-snapshot/test_zfs_pre_snapshot.py | 116 |
7 files changed, 250 insertions, 7 deletions
@@ -1778,6 +1778,39 @@ configure_btrfs_snapshots() { } +configure_pre_pacman_snapshots() { + # ZFS only: a pacman PreTransaction hook that snapshots the root dataset + # before every transaction, with a self-pruning script that keeps the most + # recent 10 (sanoid ignores these — they aren't autosnap_ names). This is + # the transaction-triggered complement to the scheduled sanoid snapshots in + # configure_zfs_snapshots. + # + # Called from boot_ux (the last step), NOT from configure_zfs_snapshots + # (which runs early), so the hook doesn't fire during the install's own + # package operations — the first pre-pacman snapshot is the fresh system. + is_zfs_root || return 0 + + action="installing pre-pacman snapshot script" && display "task" "$action" + cp "$user_archsetup_dir/scripts/zfs-pre-snapshot" /usr/local/bin/zfs-pre-snapshot + chmod +x /usr/local/bin/zfs-pre-snapshot + + action="installing pre-pacman snapshot hook" && display "task" "$action" + mkdir -p /etc/pacman.d/hooks + cat << 'EOF' > /etc/pacman.d/hooks/zfs-snapshot.hook +[Trigger] +Operation = Upgrade +Operation = Install +Operation = Remove +Type = Package +Target = * + +[Action] +Description = Creating ZFS snapshot before pacman transaction... +When = PreTransaction +Exec = /usr/local/bin/zfs-pre-snapshot +EOF +} + configure_user_lingering() { # User Services Lingering @@ -2666,6 +2699,7 @@ boot_ux() { configure_tlp_power trim_firmware configure_grub + configure_pre_pacman_snapshots # ZFS only; late so it skips the install itself } tighten_efi_permissions() { diff --git a/docs/design/2026-06-29-zfs-pre-snapshot-installer.org b/docs/design/2026-06-29-zfs-pre-snapshot-installer.org index 413bfa5..e5a339e 100644 --- a/docs/design/2026-06-29-zfs-pre-snapshot-installer.org +++ b/docs/design/2026-06-29-zfs-pre-snapshot-installer.org @@ -78,10 +78,29 @@ else fi #+end_src -* Open items before implementation - -- Source or write =/etc/pacman.d/hooks/zfs-snapshot.hook= (the trigger). -- Decide the exact insertion point in the ZFS-root install path. -- Add a ZFS-root VM test asserting the hook + script land and the script - self-prunes past =KEEP=. -- Correct the stale 2026-01-17 security-doc line. +* Implementation (2026-06-30) + +- Hook sourced from velox (=/etc/pacman.d/hooks/zfs-snapshot.hook=) and embedded + as a heredoc in =configure_pre_pacman_snapshots()=. +- Insertion point: a new =configure_pre_pacman_snapshots()= gated on + =is_zfs_root=, called from =boot_ux= (the last step) so the hook doesn't fire + during the install's own package operations — the first pre-pacman snapshot is + the fresh system. The script ships as =scripts/zfs-pre-snapshot= (the + =zfs-replicate= pattern), made =ZFS_PRE_*=-env-overridable for testability. +- Tests: =tests/zfs-pre-snapshot/= unit-tests the pruning logic against a fake + =zfs= (creates, prunes oldest-past-KEEP, ignores non-=pre-pacman_= snapshots, + honors the lockfile, warns on snapshot failure); =test_boot.py= asserts the + hook + script land on a ZFS install; the orchestrator test pins the new + =boot_ux= substep. + +* Note on the "stale security doc" + +The 2026-01-17 line "ZFS pre-pacman snapshots (already in install-archzfs)" is +*not* stale: that file is an archive generated by install-archzfs (see its +header and footer), and the claim is accurate for install-archzfs. The real gap +was that archsetup took sanoid from install-archzfs but never ported the +pre-pacman hook. This change ports it. The archive is left untouched. + +* Remaining + +- ZFS-root VM verification (=make test FS_PROFILE=zfs=) before the task closes. diff --git a/scripts/testing/tests/test_boot.py b/scripts/testing/tests/test_boot.py index 78b4404..e442682 100644 --- a/scripts/testing/tests/test_boot.py +++ b/scripts/testing/tests/test_boot.py @@ -65,3 +65,19 @@ def test_zfs_has_sanoid(host): if not host.exists("zfs"): pytest.skip("ZFS not installed (non-ZFS system)") assert host.exists("sanoid"), "ZFS system should have sanoid installed" + + +def test_zfs_pre_pacman_snapshot_hook(host): + # archsetup installs a PreTransaction pacman hook + a self-pruning script so + # every pacman transaction is preceded by a rollback snapshot (configure_ + # pre_pacman_snapshots, run late in boot_ux). ZFS-root only. + if not host.exists("zfs"): + pytest.skip("ZFS not installed (non-ZFS system)") + script = host.file("/usr/local/bin/zfs-pre-snapshot") + assert script.exists and script.is_file, "pre-pacman snapshot script missing" + assert script.mode & 0o111, "pre-pacman snapshot script is not executable" + hook = host.file("/etc/pacman.d/hooks/zfs-snapshot.hook") + assert hook.exists and hook.is_file, "zfs-snapshot.hook missing" + assert "PreTransaction" in hook.content_string, "hook not PreTransaction" + assert "/usr/local/bin/zfs-pre-snapshot" in hook.content_string, \ + "hook does not exec the snapshot script" diff --git a/scripts/zfs-pre-snapshot b/scripts/zfs-pre-snapshot new file mode 100755 index 0000000..ed914d0 --- /dev/null +++ b/scripts/zfs-pre-snapshot @@ -0,0 +1,43 @@ +#!/bin/bash +# Snapshot the root dataset before a pacman transaction, then prune to the most +# recent $KEEP pre-pacman snapshots. Run from the zfs-snapshot.hook pacman hook +# (PreTransaction). Sanoid doesn't manage these (they aren't autosnap_ names), +# so retention is enforced here at creation time. +# +# Defaults match the live zroot layout; the ZFS_PRE_* env vars override them so +# the pruning logic is unit-testable against a fake zfs on PATH. + +POOL="${ZFS_PRE_POOL:-zroot}" +DATASET="${ZFS_PRE_DATASET:-$POOL/ROOT/default}" +LOCKFILE="${ZFS_PRE_LOCKFILE:-/tmp/.zfs-pre-snapshot.lock}" +MIN_INTERVAL="${ZFS_PRE_MIN_INTERVAL:-60}" +KEEP="${ZFS_PRE_KEEP:-10}" # pre-pacman snapshots to retain (recent-transaction rollback) + +# Skip if a snapshot was created within the last $MIN_INTERVAL seconds. A single +# pacman invocation can fire several transactions; this stops a burst of them +# from each cutting a near-identical snapshot. +if [ -f "$LOCKFILE" ]; then + last=$(stat -c %Y "$LOCKFILE" 2>/dev/null || echo 0) + now=$(date +%s) + if (( now - last < MIN_INTERVAL )); then + exit 0 + fi +fi + +TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S) +SNAPSHOT_NAME="pre-pacman_$TIMESTAMP" + +if zfs snapshot "$DATASET@$SNAPSHOT_NAME"; then + echo "Created snapshot: $DATASET@$SNAPSHOT_NAME" + touch "$LOCKFILE" + + # Keep only the most recent $KEEP pre-pacman snapshots; destroy older ones. + zfs list -H -o name -t snapshot -s creation "$DATASET" 2>/dev/null \ + | grep '@pre-pacman_' \ + | head -n -"$KEEP" \ + | while read -r old; do + zfs destroy "$old" && echo "Pruned old snapshot: $old" + done +else + echo "Warning: Failed to create snapshot" >&2 +fi diff --git a/tests/installer-steps/test_orchestrators.py b/tests/installer-steps/test_orchestrators.py index e62c198..48b7508 100644 --- a/tests/installer-steps/test_orchestrators.py +++ b/tests/installer-steps/test_orchestrators.py @@ -46,6 +46,7 @@ ORCHESTRATORS = { "tighten_efi_permissions", "add_nvme_early_module", "configure_initramfs_hook", "configure_encrypted_autologin", "configure_tlp_power", "trim_firmware", "configure_grub", + "configure_pre_pacman_snapshots", ], "user_customizations": [ "clone_user_repos", "stow_dotfiles", "prune_waybar_battery", diff --git a/tests/zfs-pre-snapshot/fake-zfs b/tests/zfs-pre-snapshot/fake-zfs new file mode 100755 index 0000000..508c0f3 --- /dev/null +++ b/tests/zfs-pre-snapshot/fake-zfs @@ -0,0 +1,14 @@ +#!/bin/sh +# Fake zfs for the zfs-pre-snapshot unit test. `snapshot` and `destroy` are +# logged (FAKE_ZFS_LOG); `list` prints a fixture snapshot set (FAKE_ZFS_SNAPSHOTS). +# Set FAKE_ZFS_SNAPSHOT_FAIL to make snapshot creation fail. +case "$1" in + snapshot) + [ -n "$FAKE_ZFS_SNAPSHOT_FAIL" ] && exit 1 + echo "snapshot $2" >> "$FAKE_ZFS_LOG"; exit 0 ;; + destroy) + echo "destroy $2" >> "$FAKE_ZFS_LOG"; exit 0 ;; + list) + cat "$FAKE_ZFS_SNAPSHOTS" 2>/dev/null; exit 0 ;; +esac +exit 0 diff --git a/tests/zfs-pre-snapshot/test_zfs_pre_snapshot.py b/tests/zfs-pre-snapshot/test_zfs_pre_snapshot.py new file mode 100644 index 0000000..ed7731b --- /dev/null +++ b/tests/zfs-pre-snapshot/test_zfs_pre_snapshot.py @@ -0,0 +1,116 @@ +"""Unit tests for scripts/zfs-pre-snapshot. + +The script snapshots the root dataset before a pacman transaction and prunes to +the most recent KEEP pre-pacman snapshots. These tests drive the real script +with a fake zfs on PATH (snapshot/destroy logged, list returns a fixture set) +and env-rooted state, so nothing touches a real pool. +""" + +import os +import shutil +import subprocess +import tempfile +import time +import unittest + +REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +SCRIPT = os.path.join(REPO_ROOT, "scripts/zfs-pre-snapshot") +FAKE_ZFS = os.path.join(os.path.dirname(__file__), "fake-zfs") + +DATASET = "tank/test" +# Five pre-pacman snapshots oldest->newest (zfs list -s creation is ascending), +# plus one autosnap that the grep filter must ignore. +SNAPSHOTS = "\n".join([ + f"{DATASET}@autosnap_2026-01-01", + f"{DATASET}@pre-pacman_2026-06-01", + f"{DATASET}@pre-pacman_2026-06-02", + f"{DATASET}@pre-pacman_2026-06-03", + f"{DATASET}@pre-pacman_2026-06-04", + f"{DATASET}@pre-pacman_2026-06-05", +]) + "\n" + + +class Harness(unittest.TestCase): + def setUp(self): + self.tmp = tempfile.mkdtemp(prefix="zfs-pre-snap-") + self.bin = os.path.join(self.tmp, "bin") + os.makedirs(self.bin) + shutil.copy(FAKE_ZFS, os.path.join(self.bin, "zfs")) + self.log = os.path.join(self.tmp, "zfs.log") + self.snaps = os.path.join(self.tmp, "snaps") + with open(self.snaps, "w") as f: + f.write(SNAPSHOTS) + self.lock = os.path.join(self.tmp, "lock") + + def tearDown(self): + shutil.rmtree(self.tmp, ignore_errors=True) + + def run_script(self, keep="3", fail=False, snaps=None): + env = os.environ.copy() + env["PATH"] = self.bin + os.pathsep + env["PATH"] + env["ZFS_PRE_DATASET"] = DATASET + env["ZFS_PRE_LOCKFILE"] = self.lock + env["ZFS_PRE_KEEP"] = keep + env["FAKE_ZFS_LOG"] = self.log + env["FAKE_ZFS_SNAPSHOTS"] = snaps if snaps is not None else self.snaps + if fail: + env["FAKE_ZFS_SNAPSHOT_FAIL"] = "1" + return subprocess.run([SCRIPT], env=env, capture_output=True, text=True, + timeout=15) + + def log_lines(self): + try: + with open(self.log) as f: + return [ln for ln in f.read().splitlines() if ln.strip()] + except FileNotFoundError: + return [] + + +class TestSnapshot(Harness): + def test_creates_a_pre_pacman_snapshot(self): + self.run_script() + snaps = [ln for ln in self.log_lines() if ln.startswith("snapshot ")] + self.assertEqual(len(snaps), 1) + self.assertIn(f"snapshot {DATASET}@pre-pacman_", snaps[0]) + + def test_skips_when_lockfile_is_fresh(self): + # A lockfile newer than MIN_INTERVAL → no snapshot this run. + open(self.lock, "w").close() + os.utime(self.lock, (time.time(), time.time())) + self.run_script() + self.assertEqual([ln for ln in self.log_lines() + if ln.startswith("snapshot ")], []) + + +class TestPrune(Harness): + def test_prunes_oldest_beyond_keep(self): + # 5 pre-pacman snapshots, KEEP=3 → the two oldest are destroyed. + self.run_script(keep="3") + destroyed = [ln.split(" ", 1)[1] for ln in self.log_lines() + if ln.startswith("destroy ")] + self.assertEqual(destroyed, + [f"{DATASET}@pre-pacman_2026-06-01", + f"{DATASET}@pre-pacman_2026-06-02"]) + + def test_never_destroys_non_pre_pacman_snapshots(self): + self.run_script(keep="1") + destroyed = [ln for ln in self.log_lines() if ln.startswith("destroy ")] + self.assertFalse(any("autosnap" in ln for ln in destroyed)) + + def test_no_prune_when_at_or_under_keep(self): + # KEEP=5 with exactly 5 pre-pacman snapshots → nothing destroyed. + self.run_script(keep="5") + self.assertEqual([ln for ln in self.log_lines() + if ln.startswith("destroy ")], []) + + +class TestError(Harness): + def test_snapshot_failure_skips_prune_and_warns(self): + r = self.run_script(fail=True) + self.assertIn("Failed to create snapshot", r.stderr) + self.assertEqual([ln for ln in self.log_lines() + if ln.startswith("destroy ")], []) + + +if __name__ == "__main__": + unittest.main() |
