aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-04-21 20:10:01 -0500
committerCraig Jennings <c@cjennings.net>2026-04-21 20:10:01 -0500
commit11af802af31b69e8e478baae3ea6e5b5090bafaf (patch)
treebb300af54a0f062d70b6b6bf821ecd69169b9c3e
parent88b677cbcbbe126d50d5b334206a55559e5a4d29 (diff)
downloadarchangel-11af802af31b69e8e478baae3ea6e5b5090bafaf.tar.gz
archangel-11af802af31b69e8e478baae3ea6e5b5090bafaf.zip
feat: PrivateTmp=yes drop-in for systemd-tmpfiles on ZFS-root
On ZFS-on-root, statx() across sibling services' /var/tmp/systemd-private-*/tmp mounts returns errno 132 (ENOTNAM). This produces 10-30 journal errors per boot and causes systemd-tmpfiles-clean.service to fail every periodic run (exit 73 / CANTCREAT). Running tmpfiles inside its own mount namespace avoids traversing sibling private-tmp paths. install_zfs() now calls configure_tmpfiles_private_tmp() between configure_zfs_tools and sync_efi_partitions, so the genesis snapshot captures the drop-ins. Btrfs path is untouched — errno 132 is ZFS-specific. The drop-in file-writing is factored into install_dropin() in lib/common.sh (service, name, root; body from stdin). Six bats tests exercise path, content, directory permissions, idempotent overwrite, empty content, and special-character preservation. Full root-cause write-up and verification steps in docs/zfs-tmpfiles-private-tmp-fix.md.
-rw-r--r--docs/zfs-tmpfiles-private-tmp-fix.md72
-rwxr-xr-xinstaller/archangel20
-rw-r--r--installer/lib/common.sh14
-rw-r--r--tests/unit/test_common.bats68
4 files changed, 174 insertions, 0 deletions
diff --git a/docs/zfs-tmpfiles-private-tmp-fix.md b/docs/zfs-tmpfiles-private-tmp-fix.md
new file mode 100644
index 0000000..6d4c74a
--- /dev/null
+++ b/docs/zfs-tmpfiles-private-tmp-fix.md
@@ -0,0 +1,72 @@
+# ZFS-on-root: PrivateTmp=yes drop-ins for systemd-tmpfiles services
+
+**Discovered:** 2026-04-21, on velox after Arch-on-ZFS reinstall via archangel.
+
+## The symptom
+
+Every boot of a fresh ZFS-on-root install produces 10-30 journal errors like:
+
+```
+systemd-tmpfiles[993]: statx(/var/tmp/systemd-private-<id>-<svc>.service-<rand>/tmp) failed: Protocol driver not attached
+systemd-tmpfiles[993]: statx(/var/lib/containers/storage/tmp) failed: Protocol driver not attached
+```
+
+And `systemd-tmpfiles-clean.service` fails every periodic run with:
+
+```
+Main process exited, code=exited, status=73/CANTCREAT
+Failed with result 'exit-code'.
+```
+
+## Root cause
+
+On ZFS, `statx()` against another service's `/var/tmp/systemd-private-*/tmp`
+mount returns errno 132 (ENOTNAM, "Protocol driver not attached"). Other
+filesystems (ext4, btrfs) don't surface this as an error.
+
+The stock `systemd-tmpfiles-setup.service` and `systemd-tmpfiles-clean.service`
+units ship with no `PrivateTmp=` directive — they run in the root mount
+namespace and try to traverse every service's private-tmp.
+
+## The fix (install-time)
+
+Drop identical `PrivateTmp=yes` conf into both service units. This puts
+tmpfiles inside its own mount namespace, so it never sees (or tries to
+statx) other services' private-tmp paths.
+
+```bash
+# In archangel's post-install step (adjust path prefix as needed)
+for svc in systemd-tmpfiles-setup systemd-tmpfiles-clean; do
+ install -d -m 755 /mnt/etc/systemd/system/${svc}.service.d
+ cat > /mnt/etc/systemd/system/${svc}.service.d/zfs-private-tmp.conf <<'EOF'
+# ZFS: statx of sibling services' /var/tmp/systemd-private-*/tmp mounts
+# returns errno 132. Running in own namespace avoids traversing them.
+[Service]
+PrivateTmp=yes
+EOF
+done
+```
+
+Scope: ZFS-on-root only. Not needed on Btrfs or ext4 installs.
+
+## Verification after install
+
+```bash
+systemctl cat systemd-tmpfiles-setup.service | grep -A1 PrivateTmp
+systemctl cat systemd-tmpfiles-clean.service | grep -A1 PrivateTmp
+```
+
+Both should show `PrivateTmp=yes` in the Drop-In section. After next boot,
+`journalctl -u systemd-tmpfiles-setup.service -b -p err` should be empty.
+
+## Upstream
+
+Possibly reportable to openzfs (statx ENOTNAM on private-tmp boundary) or
+systemd (tmpfiles traversal of sibling namespaces). The drop-in is the
+practical fix regardless — upstream bugs move slowly.
+
+## Related session record
+
+`~/projects/homelab/.ai/sessions/2026-04-21-00-40-dual-host-health-check-workflow-refactor.org`
+— see the "Fixes applied on velox" section and the `system-health-check.org`
+Known Issues Log entry dated 2026-04-21.
diff --git a/installer/archangel b/installer/archangel
index f103fe9..aa8eeaa 100755
--- a/installer/archangel
+++ b/installer/archangel
@@ -1269,6 +1269,25 @@ configure_zfs_tools() {
info "Tip: Install sanoid for automated snapshot retention."
}
+configure_tmpfiles_private_tmp() {
+ # On ZFS-on-root, statx() across sibling services' /var/tmp/systemd-private-*/tmp
+ # mounts returns errno 132 (ENOTNAM). Running tmpfiles in its own mount
+ # namespace avoids traversing them. See docs/zfs-tmpfiles-private-tmp-fix.md.
+ step "Isolating systemd-tmpfiles from sibling private-tmp (ZFS)"
+
+ local svc
+ for svc in systemd-tmpfiles-setup systemd-tmpfiles-clean; do
+ install_dropin "$svc" zfs-private-tmp /mnt << 'EOF'
+# ZFS: statx of sibling services' /var/tmp/systemd-private-*/tmp mounts
+# returns errno 132. Running in own namespace avoids traversing them.
+[Service]
+PrivateTmp=yes
+EOF
+ done
+
+ info "systemd-tmpfiles drop-ins installed (PrivateTmp=yes)."
+}
+
sync_efi_partitions() {
# Skip if only one disk
if [[ ${#EFI_PARTS[@]} -le 1 ]]; then
@@ -1496,6 +1515,7 @@ install_zfs() {
configure_zfs_services
configure_pacman_hook
configure_zfs_tools
+ configure_tmpfiles_private_tmp
sync_efi_partitions
create_genesis_snapshot
cleanup
diff --git a/installer/lib/common.sh b/installer/lib/common.sh
index d181e0b..8193b19 100644
--- a/installer/lib/common.sh
+++ b/installer/lib/common.sh
@@ -222,6 +222,20 @@ disk_in_use() {
return 1
}
+# Install a systemd drop-in for $service under $root, reading its body
+# from stdin. Creates $root/etc/systemd/system/$service.service.d/ at
+# mode 755 (idempotent) and writes $dropin_name.conf there. Intended
+# for post-pacstrap customization — pass "/mnt" as root at install
+# time; tests pass a tempdir.
+install_dropin() {
+ local service="$1"
+ local dropin_name="$2"
+ local root="$3"
+ local dir="${root}/etc/systemd/system/${service}.service.d"
+ install -d -m 755 "$dir"
+ cat > "${dir}/${dropin_name}.conf"
+}
+
# List available disks (not in use)
list_available_disks() {
local disks=()
diff --git a/tests/unit/test_common.bats b/tests/unit/test_common.bats
index 04f4e09..c81d2e3 100644
--- a/tests/unit/test_common.bats
+++ b/tests/unit/test_common.bats
@@ -178,3 +178,71 @@ setup() {
prompt_password PASS "label" 0 < <(printf '\n\n') >/dev/null
[ -z "$PASS" ]
}
+
+#############################
+# install_dropin
+#############################
+
+setup_dropin_tmp() {
+ DROPIN_ROOT=$(mktemp -d)
+}
+
+teardown_dropin_tmp() {
+ [ -n "${DROPIN_ROOT:-}" ] && rm -rf "$DROPIN_ROOT"
+}
+
+@test "install_dropin writes conf file at expected path" {
+ setup_dropin_tmp
+ install_dropin foo bar "$DROPIN_ROOT" <<< "[Service]"
+ [ -f "$DROPIN_ROOT/etc/systemd/system/foo.service.d/bar.conf" ]
+ teardown_dropin_tmp
+}
+
+@test "install_dropin writes stdin content verbatim" {
+ setup_dropin_tmp
+ install_dropin foo bar "$DROPIN_ROOT" <<< "[Service]
+PrivateTmp=yes"
+ run cat "$DROPIN_ROOT/etc/systemd/system/foo.service.d/bar.conf"
+ [ "$status" -eq 0 ]
+ [[ "$output" == *"[Service]"* ]]
+ [[ "$output" == *"PrivateTmp=yes"* ]]
+ teardown_dropin_tmp
+}
+
+@test "install_dropin creates dropin dir with 755 perms" {
+ setup_dropin_tmp
+ install_dropin foo bar "$DROPIN_ROOT" <<< "x"
+ local perms
+ perms=$(stat -c '%a' "$DROPIN_ROOT/etc/systemd/system/foo.service.d")
+ [ "$perms" = "755" ]
+ teardown_dropin_tmp
+}
+
+@test "install_dropin is idempotent — second call overwrites content" {
+ setup_dropin_tmp
+ install_dropin foo bar "$DROPIN_ROOT" <<< "first"
+ install_dropin foo bar "$DROPIN_ROOT" <<< "second"
+ run cat "$DROPIN_ROOT/etc/systemd/system/foo.service.d/bar.conf"
+ [ "$output" = "second" ]
+ teardown_dropin_tmp
+}
+
+@test "install_dropin accepts empty content" {
+ setup_dropin_tmp
+ install_dropin foo bar "$DROPIN_ROOT" < /dev/null
+ [ -f "$DROPIN_ROOT/etc/systemd/system/foo.service.d/bar.conf" ]
+ [ ! -s "$DROPIN_ROOT/etc/systemd/system/foo.service.d/bar.conf" ]
+ teardown_dropin_tmp
+}
+
+@test "install_dropin preserves special characters in content" {
+ setup_dropin_tmp
+ install_dropin foo bar "$DROPIN_ROOT" <<< '# comment with $var and `backtick`
+[Service]
+Environment="FOO=bar baz"'
+ run cat "$DROPIN_ROOT/etc/systemd/system/foo.service.d/bar.conf"
+ [[ "$output" == *'$var'* ]]
+ [[ "$output" == *'`backtick`'* ]]
+ [[ "$output" == *'"FOO=bar baz"'* ]]
+ teardown_dropin_tmp
+}