aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-05-22 20:28:15 -0500
committerCraig Jennings <c@cjennings.net>2026-05-22 20:28:15 -0500
commitbed054f46e3b41aae0d599ed7fbc3e1e42d6ddd7 (patch)
tree81a3a1fa97e44bb445bdf43877feeaffac1e4bff
parent0f8bbc7c1e2c2f6fec0b17753ac0d9c4a3ad4317 (diff)
downloadarchangel-bed054f46e3b41aae0d599ed7fbc3e1e42d6ddd7.tar.gz
archangel-bed054f46e3b41aae0d599ed7fbc3e1e42d6ddd7.zip
fix(build): clear stale archzfs from the pacoloco cache too
archzfs re-uploads its GitHub release assets under the same filename, so pacoloco keeps serving a zfs-dkms/zfs-utils it cached earlier while pacman fetches a fresh archzfs.db with a new checksum. The two mismatch and pacstrap aborts with "invalid or corrupted package." build.sh already drops the stale packages from the host pacman cache, but it never cleared the pacoloco layer, which the VM test installs route through too, so test-install.sh kept hitting the corruption (four times in one session). build.sh runs as root, so it now clears /var/cache/pacoloco/pkgs/archzfs/zfs-* alongside the host cache, which makes the build-then-test flow self-healing. The pacoloco cache is root-owned and test-install.sh runs as the user, so it can't clear it unattended. Instead, test-install.sh now recognizes the corruption (is_archzfs_cache_corruption) and prints how to clear it, the way it already names the SSH_PORT override on a port collision. A retry alone won't help since it hits the same cached file, so this fails fast with the hint rather than retrying.
-rwxr-xr-xbuild.sh9
-rwxr-xr-xscripts/test-install.sh17
-rw-r--r--tests/unit/test_test_install.bats34
3 files changed, 60 insertions, 0 deletions
diff --git a/build.sh b/build.sh
index 40a9be2..ace052f 100755
--- a/build.sh
+++ b/build.sh
@@ -493,6 +493,15 @@ info "Clearing archzfs packages from host pacman cache..."
rm -f /var/cache/pacman/pkg/zfs-dkms-*.pkg.tar.zst*
rm -f /var/cache/pacman/pkg/zfs-utils-*.pkg.tar.zst*
+# Same hazard one layer up: pacoloco caches the archzfs GitHub-releases
+# download by filename, so a re-uploaded asset keeps serving a stale
+# package that mismatches the fresh archzfs.db checksum — which also bites
+# the VM test installs that route through this same pacoloco. build.sh
+# runs as root, so clear it here too; rm -f no-ops when pacoloco isn't
+# installed.
+rm -f /var/cache/pacoloco/pkgs/archzfs/zfs-dkms-*.pkg.tar.zst*
+rm -f /var/cache/pacoloco/pkgs/archzfs/zfs-utils-*.pkg.tar.zst*
+
# Pre-create the build log in out/ so it survives work/ cleanup. Owned
# by SUDO_USER from the start so a failed build leaves a user-readable
# log; tee writes to it as root, but the file mode stays as set.
diff --git a/scripts/test-install.sh b/scripts/test-install.sh
index 4220d20..8d338b6 100755
--- a/scripts/test-install.sh
+++ b/scripts/test-install.sh
@@ -458,6 +458,19 @@ is_transient_install_failure() {
<<<"$log"
}
+# Recognize the archzfs stale-cache corruption: archzfs re-uploads its
+# GitHub release assets under the same filename, so a zfs-dkms/zfs-utils
+# cached in the host pacoloco can mismatch the fresh archzfs.db checksum
+# and pacstrap aborts with "invalid or corrupted package". Not transient
+# (a retry hits the same cached file), so the caller prints a cache-clear
+# hint rather than retrying.
+is_archzfs_cache_corruption() {
+ local log="$1"
+ grep -q "Failed to install packages to new root" <<<"$log" || return 1
+ grep -Eqi 'invalid or corrupted package|corrupted \(checksum\)' <<<"$log" || return 1
+ grep -Eqi 'zfs-dkms|zfs-utils|archzfs' <<<"$log"
+}
+
# Copy config to VM and run install
run_install() {
local config="$1"
@@ -952,6 +965,10 @@ run_test() {
fi
else
error "Installation failed or timed out"
+ if is_archzfs_cache_corruption "$install_log"; then
+ warn "Stale archzfs package in the host pacoloco cache (archzfs re-uploads same-filename assets)."
+ warn "Rebuild the ISO (build.sh clears it) or run: sudo rm -f /var/cache/pacoloco/pkgs/archzfs/zfs-* — then retry."
+ fi
stop_vm "$config_name"
# Save logs
diff --git a/tests/unit/test_test_install.bats b/tests/unit/test_test_install.bats
index bc4a63e..f339baf 100644
--- a/tests/unit/test_test_install.bats
+++ b/tests/unit/test_test_install.bats
@@ -266,3 +266,37 @@ EOF
run port_listening_in 2222 ""
[ "$status" -eq 1 ]
}
+
+#############################
+# is_archzfs_cache_corruption
+#############################
+# Recognizes the stale-archzfs-in-pacoloco failure (not transient — a retry
+# hits the same cached file), so the caller prints a cache-clear hint.
+
+@test "is_archzfs_cache_corruption matches an archzfs checksum corruption" {
+ local log="==> Installing base system
+:: File /mnt/var/cache/pacman/pkg/zfs-utils-2.4.2-2-x86_64.pkg.tar.zst is corrupted (invalid or corrupted package (checksum)).
+error: failed to commit transaction (invalid or corrupted package (checksum))
+==> ERROR: Failed to install packages to new root"
+ run is_archzfs_cache_corruption "$log"
+ [ "$status" -eq 0 ]
+}
+
+@test "is_archzfs_cache_corruption ignores a transient mirror flake" {
+ local log="error: failed retrieving file 'core.db' : Operation too slow
+==> ERROR: Failed to install packages to new root"
+ run is_archzfs_cache_corruption "$log"
+ [ "$status" -eq 1 ]
+}
+
+@test "is_archzfs_cache_corruption ignores corruption of a non-archzfs package" {
+ local log="==> ERROR: Failed to install packages to new root
+:: File /mnt/var/cache/pacman/pkg/glibc-2.43-1-x86_64.pkg.tar.zst is corrupted (invalid or corrupted package (checksum))."
+ run is_archzfs_cache_corruption "$log"
+ [ "$status" -eq 1 ]
+}
+
+@test "is_archzfs_cache_corruption returns 1 on a clean log" {
+ run is_archzfs_cache_corruption ""
+ [ "$status" -eq 1 ]
+}