diff options
| author | Craig Jennings <c@cjennings.net> | 2026-04-21 20:10:01 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-04-21 20:10:01 -0500 |
| commit | 11af802af31b69e8e478baae3ea6e5b5090bafaf (patch) | |
| tree | bb300af54a0f062d70b6b6bf821ecd69169b9c3e | |
| parent | 88b677cbcbbe126d50d5b334206a55559e5a4d29 (diff) | |
| download | archangel-11af802af31b69e8e478baae3ea6e5b5090bafaf.tar.gz archangel-11af802af31b69e8e478baae3ea6e5b5090bafaf.zip | |
feat: PrivateTmp=yes drop-in for systemd-tmpfiles on ZFS-root
On ZFS-on-root, statx() across sibling services'
/var/tmp/systemd-private-*/tmp mounts returns errno 132 (ENOTNAM).
This produces 10-30 journal errors per boot and causes
systemd-tmpfiles-clean.service to fail every periodic run
(exit 73 / CANTCREAT). Running tmpfiles inside its own mount
namespace avoids traversing sibling private-tmp paths.
install_zfs() now calls configure_tmpfiles_private_tmp() between
configure_zfs_tools and sync_efi_partitions, so the genesis snapshot
captures the drop-ins. Btrfs path is untouched — errno 132 is
ZFS-specific.
The drop-in file-writing is factored into install_dropin() in
lib/common.sh (service, name, root; body from stdin). Six bats tests
exercise path, content, directory permissions, idempotent overwrite,
empty content, and special-character preservation.
Full root-cause write-up and verification steps in
docs/zfs-tmpfiles-private-tmp-fix.md.
| -rw-r--r-- | docs/zfs-tmpfiles-private-tmp-fix.md | 72 | ||||
| -rwxr-xr-x | installer/archangel | 20 | ||||
| -rw-r--r-- | installer/lib/common.sh | 14 | ||||
| -rw-r--r-- | tests/unit/test_common.bats | 68 |
4 files changed, 174 insertions, 0 deletions
diff --git a/docs/zfs-tmpfiles-private-tmp-fix.md b/docs/zfs-tmpfiles-private-tmp-fix.md new file mode 100644 index 0000000..6d4c74a --- /dev/null +++ b/docs/zfs-tmpfiles-private-tmp-fix.md @@ -0,0 +1,72 @@ +# ZFS-on-root: PrivateTmp=yes drop-ins for systemd-tmpfiles services + +**Discovered:** 2026-04-21, on velox after Arch-on-ZFS reinstall via archangel. + +## The symptom + +Every boot of a fresh ZFS-on-root install produces 10-30 journal errors like: + +``` +systemd-tmpfiles[993]: statx(/var/tmp/systemd-private-<id>-<svc>.service-<rand>/tmp) failed: Protocol driver not attached +systemd-tmpfiles[993]: statx(/var/lib/containers/storage/tmp) failed: Protocol driver not attached +``` + +And `systemd-tmpfiles-clean.service` fails every periodic run with: + +``` +Main process exited, code=exited, status=73/CANTCREAT +Failed with result 'exit-code'. +``` + +## Root cause + +On ZFS, `statx()` against another service's `/var/tmp/systemd-private-*/tmp` +mount returns errno 132 (ENOTNAM, "Protocol driver not attached"). Other +filesystems (ext4, btrfs) don't surface this as an error. + +The stock `systemd-tmpfiles-setup.service` and `systemd-tmpfiles-clean.service` +units ship with no `PrivateTmp=` directive — they run in the root mount +namespace and try to traverse every service's private-tmp. + +## The fix (install-time) + +Drop identical `PrivateTmp=yes` conf into both service units. This puts +tmpfiles inside its own mount namespace, so it never sees (or tries to +statx) other services' private-tmp paths. + +```bash +# In archangel's post-install step (adjust path prefix as needed) +for svc in systemd-tmpfiles-setup systemd-tmpfiles-clean; do + install -d -m 755 /mnt/etc/systemd/system/${svc}.service.d + cat > /mnt/etc/systemd/system/${svc}.service.d/zfs-private-tmp.conf <<'EOF' +# ZFS: statx of sibling services' /var/tmp/systemd-private-*/tmp mounts +# returns errno 132. Running in own namespace avoids traversing them. +[Service] +PrivateTmp=yes +EOF +done +``` + +Scope: ZFS-on-root only. Not needed on Btrfs or ext4 installs. + +## Verification after install + +```bash +systemctl cat systemd-tmpfiles-setup.service | grep -A1 PrivateTmp +systemctl cat systemd-tmpfiles-clean.service | grep -A1 PrivateTmp +``` + +Both should show `PrivateTmp=yes` in the Drop-In section. After next boot, +`journalctl -u systemd-tmpfiles-setup.service -b -p err` should be empty. + +## Upstream + +Possibly reportable to openzfs (statx ENOTNAM on private-tmp boundary) or +systemd (tmpfiles traversal of sibling namespaces). The drop-in is the +practical fix regardless — upstream bugs move slowly. + +## Related session record + +`~/projects/homelab/.ai/sessions/2026-04-21-00-40-dual-host-health-check-workflow-refactor.org` +— see the "Fixes applied on velox" section and the `system-health-check.org` +Known Issues Log entry dated 2026-04-21. diff --git a/installer/archangel b/installer/archangel index f103fe9..aa8eeaa 100755 --- a/installer/archangel +++ b/installer/archangel @@ -1269,6 +1269,25 @@ configure_zfs_tools() { info "Tip: Install sanoid for automated snapshot retention." } +configure_tmpfiles_private_tmp() { + # On ZFS-on-root, statx() across sibling services' /var/tmp/systemd-private-*/tmp + # mounts returns errno 132 (ENOTNAM). Running tmpfiles in its own mount + # namespace avoids traversing them. See docs/zfs-tmpfiles-private-tmp-fix.md. + step "Isolating systemd-tmpfiles from sibling private-tmp (ZFS)" + + local svc + for svc in systemd-tmpfiles-setup systemd-tmpfiles-clean; do + install_dropin "$svc" zfs-private-tmp /mnt << 'EOF' +# ZFS: statx of sibling services' /var/tmp/systemd-private-*/tmp mounts +# returns errno 132. Running in own namespace avoids traversing them. +[Service] +PrivateTmp=yes +EOF + done + + info "systemd-tmpfiles drop-ins installed (PrivateTmp=yes)." +} + sync_efi_partitions() { # Skip if only one disk if [[ ${#EFI_PARTS[@]} -le 1 ]]; then @@ -1496,6 +1515,7 @@ install_zfs() { configure_zfs_services configure_pacman_hook configure_zfs_tools + configure_tmpfiles_private_tmp sync_efi_partitions create_genesis_snapshot cleanup diff --git a/installer/lib/common.sh b/installer/lib/common.sh index d181e0b..8193b19 100644 --- a/installer/lib/common.sh +++ b/installer/lib/common.sh @@ -222,6 +222,20 @@ disk_in_use() { return 1 } +# Install a systemd drop-in for $service under $root, reading its body +# from stdin. Creates $root/etc/systemd/system/$service.service.d/ at +# mode 755 (idempotent) and writes $dropin_name.conf there. Intended +# for post-pacstrap customization — pass "/mnt" as root at install +# time; tests pass a tempdir. +install_dropin() { + local service="$1" + local dropin_name="$2" + local root="$3" + local dir="${root}/etc/systemd/system/${service}.service.d" + install -d -m 755 "$dir" + cat > "${dir}/${dropin_name}.conf" +} + # List available disks (not in use) list_available_disks() { local disks=() diff --git a/tests/unit/test_common.bats b/tests/unit/test_common.bats index 04f4e09..c81d2e3 100644 --- a/tests/unit/test_common.bats +++ b/tests/unit/test_common.bats @@ -178,3 +178,71 @@ setup() { prompt_password PASS "label" 0 < <(printf '\n\n') >/dev/null [ -z "$PASS" ] } + +############################# +# install_dropin +############################# + +setup_dropin_tmp() { + DROPIN_ROOT=$(mktemp -d) +} + +teardown_dropin_tmp() { + [ -n "${DROPIN_ROOT:-}" ] && rm -rf "$DROPIN_ROOT" +} + +@test "install_dropin writes conf file at expected path" { + setup_dropin_tmp + install_dropin foo bar "$DROPIN_ROOT" <<< "[Service]" + [ -f "$DROPIN_ROOT/etc/systemd/system/foo.service.d/bar.conf" ] + teardown_dropin_tmp +} + +@test "install_dropin writes stdin content verbatim" { + setup_dropin_tmp + install_dropin foo bar "$DROPIN_ROOT" <<< "[Service] +PrivateTmp=yes" + run cat "$DROPIN_ROOT/etc/systemd/system/foo.service.d/bar.conf" + [ "$status" -eq 0 ] + [[ "$output" == *"[Service]"* ]] + [[ "$output" == *"PrivateTmp=yes"* ]] + teardown_dropin_tmp +} + +@test "install_dropin creates dropin dir with 755 perms" { + setup_dropin_tmp + install_dropin foo bar "$DROPIN_ROOT" <<< "x" + local perms + perms=$(stat -c '%a' "$DROPIN_ROOT/etc/systemd/system/foo.service.d") + [ "$perms" = "755" ] + teardown_dropin_tmp +} + +@test "install_dropin is idempotent — second call overwrites content" { + setup_dropin_tmp + install_dropin foo bar "$DROPIN_ROOT" <<< "first" + install_dropin foo bar "$DROPIN_ROOT" <<< "second" + run cat "$DROPIN_ROOT/etc/systemd/system/foo.service.d/bar.conf" + [ "$output" = "second" ] + teardown_dropin_tmp +} + +@test "install_dropin accepts empty content" { + setup_dropin_tmp + install_dropin foo bar "$DROPIN_ROOT" < /dev/null + [ -f "$DROPIN_ROOT/etc/systemd/system/foo.service.d/bar.conf" ] + [ ! -s "$DROPIN_ROOT/etc/systemd/system/foo.service.d/bar.conf" ] + teardown_dropin_tmp +} + +@test "install_dropin preserves special characters in content" { + setup_dropin_tmp + install_dropin foo bar "$DROPIN_ROOT" <<< '# comment with $var and `backtick` +[Service] +Environment="FOO=bar baz"' + run cat "$DROPIN_ROOT/etc/systemd/system/foo.service.d/bar.conf" + [[ "$output" == *'$var'* ]] + [[ "$output" == *'`backtick`'* ]] + [[ "$output" == *'"FOO=bar baz"'* ]] + teardown_dropin_tmp +} |
