diff options
| author | Craig Jennings <c@cjennings.net> | 2026-07-02 17:19:15 -0400 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-07-02 17:19:15 -0400 |
| commit | a0e1fa6b62257e90e415c6db4c4206afcffdffb6 (patch) | |
| tree | b76877b82763a37cc321f4efe3c6df482a67148d | |
| parent | 72c5fdf1f05a4232c6afda1371d00d2b89b65cd7 (diff) | |
| download | archsetup-a0e1fa6b62257e90e415c6db4c4206afcffdffb6.tar.gz archsetup-a0e1fa6b62257e90e415c6db4c4206afcffdffb6.zip | |
feat(errors): actionable fatals, log-capture for direct mutations
error_fatal gained an optional recovery-hint arg and now prints the last log lines, the full log path, and the resume pointer on every fatal; all 17 call sites carry specific hints. The silent sed/file-write mutations (locale.gen, pacman.conf, waybar battery, regdom, geoclue, paccache, udev, fstab, mkinitcpio, sudoers) now capture stderr to the log and warn on failure, and the run summary tells the reader how to find each error's output. No new shellcheck findings; 10 unit suites green.
| -rwxr-xr-x | archsetup | 84 | ||||
| -rw-r--r-- | todo.org | 12 |
2 files changed, 58 insertions, 38 deletions
@@ -623,7 +623,7 @@ intro() { # count the arch packages before install pacman -Q > "$packages_before" || \ - error_fatal "generating pre-install package list" "$?" + error_fatal "generating pre-install package list" "$?" "confirm pacman works on this system: pacman -Q | head" } ### Error Handling @@ -639,10 +639,20 @@ error_warn() { } # Fatal error - log and exit -# Usage: error_fatal "what failed" "$?" +# Usage: error_fatal "what failed" "$?" ["recovery hint"] +# Every fatal points at the log (with its last lines inline), prints the +# per-site recovery hint when one is given, and names the resume path — +# archsetup's step markers mean a re-run continues where it stopped. error_fatal() { printf "CRASH: %s (error: %s) @ %s. Halting.\n" \ "$1" "$2" "$(date +'%T')" | tee -a "$logfile" + if [ -f "$logfile" ]; then + printf " Last log lines:\n" + tail -n 5 "$logfile" | sed 's/^/ | /' + printf " Full log: %s\n" "$logfile" + fi + [ -n "${3:-}" ] && printf " Fix: %s\n" "$3" | tee -a "$logfile" + printf " Then re-run archsetup - completed steps are tracked, so it resumes here.\n" | tee -a "$logfile" exit 1 } @@ -993,13 +1003,14 @@ bootstrap_pacman_keyring() { fi action="ensuring current Arch Linux keyring" && display "task" "$action" - (pacman -Syy) >> "$logfile" 2>&1 || error_fatal "$action" "$?" + (pacman -Syy) >> "$logfile" 2>&1 || error_fatal "$action" "$?" \ + "check network and mirrors: ping archlinux.org; cat /etc/pacman.d/mirrorlist" (pacman -S --noconfirm archlinux-keyring) >> "$logfile" 2>&1 || \ - error_fatal "$action" "$?" + error_fatal "$action" "$?" "reinitialize the keyring: pacman-key --init && pacman-key --populate archlinux" display "task" "verifying Arch Linux keys" (pacman-key --populate archlinux >> "$logfile" 2>&1) || \ - error_fatal "verifying Arch Linux keys" "$?" + error_fatal "verifying Arch Linux keys" "$?" "reinitialize the keyring: pacman-key --init && pacman-key --populate archlinux" # The bulk refresh gets the same retry budget as per-package installs — # a single slow mirror ("Operation too slow") halted a full install at @@ -1015,7 +1026,7 @@ bootstrap_pacman_keyring() { [ "$attempt" -lt "$MAX_INSTALL_RETRIES" ] && \ display "task" "retrying package cache refresh (attempt $((attempt + 1))/$MAX_INSTALL_RETRIES)" done - $refresh_ok || error_fatal "$action" "$?" + $refresh_ok || error_fatal "$action" "$?" "run pacman -Syu manually to see the failure, or switch mirrors in /etc/pacman.d/mirrorlist" } @@ -1040,7 +1051,8 @@ configure_build_environment() { # Uncomment the selected locale in locale.gen (format: "en_US.UTF-8 UTF-8") locale_entry="${locale} ${locale##*.}" # e.g., "en_US.UTF-8 UTF-8" backup_system_file /etc/locale.gen - sed -i "s|^#${locale_entry}|${locale_entry}|" /etc/locale.gen + sed -i "s|^#${locale_entry}|${locale_entry}|" /etc/locale.gen 2>> "$logfile" || \ + error_warn "uncommenting $locale_entry in locale.gen" "$?" (locale-gen >> "$logfile" 2>&1) || error_warn "$action" "$?" echo "LANG=$locale" > /etc/locale.conf export LANG="$locale" @@ -1071,11 +1083,13 @@ configure_build_environment() { backup_system_file /etc/pacman.conf # Match a commented OR already-uncommented ParallelDownloads: current Arch # ships it uncommented at 5, so a "^#"-only match silently leaves it at 5. - sed -i "s/^#\?ParallelDownloads.*$/ParallelDownloads = 10/;s/^#Color$/Color/" /etc/pacman.conf + sed -i "s/^#\?ParallelDownloads.*$/ParallelDownloads = 10/;s/^#Color$/Color/" /etc/pacman.conf 2>> "$logfile" || \ + error_warn "configuring pacman.conf (ParallelDownloads/Color)" "$?" # enable multilib repository (required for 32-bit libraries, Steam, etc.) action="enabling multilib repository" && display "task" "$action" - sed -i '/^#\[multilib\]/{s/^#//;n;s/^#//}' /etc/pacman.conf + sed -i '/^#\[multilib\]/{s/^#//;n;s/^#//}' /etc/pacman.conf 2>> "$logfile" || \ + error_warn "$action" "$?" # Keep pacman.conf world-readable. User-level makepkg/yay reads it to # resolve dependencies, so a root-only file makes every AUR build fail with @@ -1127,7 +1141,7 @@ EOF [ -f /etc/sudoers.pacnew ] && cp /etc/sudoers.pacnew /etc/sudoers >> "$logfile" 2>&1 action="creating a directory to build/install software from git/AUR." - (mkdir -p "$source_dir") || error_fatal "creating the directory $source_dir" "$?" + (mkdir -p "$source_dir") || error_fatal "creating the directory $source_dir" "$?" "check permissions and free space: df -h" } @@ -1138,12 +1152,12 @@ create_user() { display "task" "checking if user exists" # halt if $username exists ( id -u "$username" >/dev/null 2>&1; ) && \ - error_fatal "user '$username' already exists" "user exists" + error_fatal "user '$username' already exists" "user exists" "pick a different USERNAME in archsetup.conf, or remove the user first: userdel -r $username" # create $username with home, group, shell, password action="creating user and home directory" && display "task" "$action" (useradd -m -G wheel -s /bin/zsh "$username" >> "$logfile" 2>&1) || \ - error_fatal "adding user '$username'" "$?" + error_fatal "adding user '$username'" "$?" "run the useradd manually to see why it failed" display "task" "assigning the password" echo "$username:$password" | chpasswd # any text is allowable! be careful! @@ -1152,7 +1166,7 @@ create_user() { display "task" "adding to appropriate groups" (usermod -aG \ sys,adm,network,scanner,power,uucp,audio,lp,rfkill,video,storage,optical,users \ - "$username" >> "$logfile" 2>&1) || error_fatal "adding $username to groups" "$?" + "$username" >> "$logfile" 2>&1) || error_fatal "adding $username to groups" "$?" "confirm the groups exist: getent group network rfkill video" display "task" "configuring shell" # zsh cache required: $username will install via yay; zsh will run those commands @@ -1161,19 +1175,19 @@ create_user() { # give $username sudo nopasswd rights (required for aur installs) action="granting permissions" && display "task" "$action" backup_system_file /etc/sudoers - (echo "%$username ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers) \ + (echo "%$username ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers) 2>> "$logfile" \ || error_warn "$action" "$?" # mount as ramdisk to speed aur/git build/installs if ! mountpoint -q "$source_dir" 2>/dev/null; then (mount -t tmpfs -o size=4G archsetup "$source_dir" >> "$logfile" 2>&1) || \ - error_fatal "mounting the RAM disk for archsetup" "$?" + error_fatal "mounting the RAM disk for archsetup" "$?" "check available memory (free -h) or mount a smaller tmpfs at $source_dir by hand" else display "task" "ramdisk already mounted at $source_dir" fi (chown -R "$username":wheel "$source_dir" >> "$logfile" 2>&1) || \ - error_fatal "changing ownership of $source_dir" "$?" + error_fatal "changing ownership of $source_dir" "$?" "run the chown manually to see the failure" # Bootstrap DNS for git clones and AUR installs (full config in essential_services) if [ ! -L /etc/resolv.conf ] || [ "$(readlink /etc/resolv.conf)" != "/run/systemd/resolve/stub-resolv.conf" ]; then @@ -1227,7 +1241,7 @@ clone_user_repos() { # Q5: the --adopt/restore conflict handling below needs a real git checkout. # Refuse to continue if the clone didn't produce one (bad URL, network, a # tarball drop) rather than silently skipping the restore step. - [[ -d "$dotfiles_dir/.git" ]] || error_fatal "dotfiles dir is not a git checkout: $dotfiles_dir" 1 + [[ -d "$dotfiles_dir/.git" ]] || error_fatal "dotfiles dir is not a git checkout: $dotfiles_dir" 1 "move that directory aside so archsetup can clone fresh, or point DOTFILES_DIR at a real clone" # root runs stow/restore against the user-owned clone; mark it safe. git config --global --add safe.directory "$dotfiles_dir" >> "$logfile" 2>&1 || true @@ -1274,10 +1288,10 @@ prune_waybar_battery() { action="removing waybar battery module (no battery detected)" && display "task" "$action" waybar_config="/home/$username/.config/waybar/config" # Remove "battery" from sysmonitor modules array and fix trailing comma - sed -i '/"battery"$/d' "$waybar_config" - sed -i 's/"custom\/disk",/"custom\/disk"/' "$waybar_config" + sed -i '/"battery"$/d' "$waybar_config" 2>> "$logfile" || error_warn "$action" "$?" + sed -i 's/"custom\/disk",/"custom\/disk"/' "$waybar_config" 2>> "$logfile" || error_warn "$action" "$?" # Remove the battery config block - sed -i '/"battery": {/,/^ },$/d' "$waybar_config" + sed -i '/"battery": {/,/^ },$/d' "$waybar_config" 2>> "$logfile" || error_warn "$action" "$?" fi } @@ -1381,14 +1395,14 @@ aur_installer() { if ! (sudo -u "$username" git clone --depth 1 "$yay_repo" "$build_dir" >> "$logfile" 2>&1); then error_warn "cloning source code for yay - directory may exist, removing and retrying" "$?" (safe_rm_rf "$build_dir" "$source_dir" >> "$logfile" 2>&1) || \ - error_fatal "removing existing directory for yay" "$?" + error_fatal "removing existing directory for yay" "$?" "remove the build dir manually: rm -rf $source_dir/yay" (sudo -u "$username" git clone --depth 1 "$yay_repo" "$build_dir" >> "$logfile" 2>&1) || \ - error_fatal "re-cloning source code for yay after cleanup" "$?" + error_fatal "re-cloning source code for yay after cleanup" "$?" "check that aur.archlinux.org is reachable" fi action="packaging and installing yay"; display "task" "$action" (cd "$build_dir" && sudo -u "$username" makepkg --noconfirm -si >> "$logfile" 2>&1) || \ - error_fatal "$action" "$?" + error_fatal "$action" "$?" "confirm base-devel is installed (pacman -S base-devel), then re-run" } ### Essential Services @@ -1447,7 +1461,8 @@ EOF wireless_region="${current_lang:3:2}" # extract country code (positions 3-4) action="configuring wireless regulatory domain ($wireless_region)" && display "task" "$action" backup_system_file /etc/conf.d/wireless-regdom - sed -i "s|^#WIRELESS_REGDOM=\"${wireless_region}\"|WIRELESS_REGDOM=\"${wireless_region}\"|" /etc/conf.d/wireless-regdom + sed -i "s|^#WIRELESS_REGDOM=\"${wireless_region}\"|WIRELESS_REGDOM=\"${wireless_region}\"|" /etc/conf.d/wireless-regdom 2>> "$logfile" || \ + error_warn "$action" "$?" # Encrypted DNS (DNS over TLS) @@ -1577,7 +1592,7 @@ configure_firewall() { # netfilter modules. This is a test environment limitation, not a bug. # On real hardware with proper kernel support, UFW activates correctly. action="verifying firewall is active" && display "task" "$action" - if ! ufw status | grep -q "Status: active"; then + if ! ufw status 2>> "$logfile" | grep -q "Status: active"; then error_messages=("FIREWALL NOT ACTIVE - run: sudo ufw enable" "${error_messages[@]}") error_warn "$action" "1" fi @@ -1608,7 +1623,8 @@ configure_service_discovery() { action="configuring geoclue to use BeaconDB location service" && display "task" "$action" if grep -q '^#url=https://api.beacondb.net/v1/geolocate' /etc/geoclue/geoclue.conf 2>/dev/null; then backup_system_file /etc/geoclue/geoclue.conf - sed -i 's|^#url=https://api.beacondb.net/v1/geolocate|url=https://api.beacondb.net/v1/geolocate|' /etc/geoclue/geoclue.conf + sed -i 's|^#url=https://api.beacondb.net/v1/geolocate|url=https://api.beacondb.net/v1/geolocate|' /etc/geoclue/geoclue.conf 2>> "$logfile" || \ + error_warn "$action" "$?" fi # Whitelist gammastep in geoclue config (geoclue demo agent is started via hyprland.conf exec-once) @@ -1660,7 +1676,8 @@ configure_package_cache() { action="configuring paccache to keep 3 versions" && display "task" "$action" backup_system_file /etc/conf.d/pacman-contrib - sed -i 's/^PACCACHE_ARGS=.*/PACCACHE_ARGS=-k3/' /etc/conf.d/pacman-contrib + sed -i 's/^PACCACHE_ARGS=.*/PACCACHE_ARGS=-k3/' /etc/conf.d/pacman-contrib 2>> "$logfile" || \ + error_warn "$action" "$?" } @@ -2012,7 +2029,8 @@ hyprland() { # Apply camera settings when Logitech BRIO is connected ACTION=="add", SUBSYSTEM=="video4linux", ATTRS{idVendor}=="046d", ATTRS{idProduct}=="085e", ATTR{index}=="0", RUN+="/home/ARCHSETUP_USERNAME/.local/bin/logitech-brio-settings.sh /dev/%k" UDEVEOF - sed -i "s/ARCHSETUP_USERNAME/${username}/" /etc/udev/rules.d/99-logitech-brio.rules + sed -i "s/ARCHSETUP_USERNAME/${username}/" /etc/udev/rules.d/99-logitech-brio.rules 2>> "$logfile" || \ + error_warn "personalizing the Logitech BRIO udev rule" "$?" chmod 644 /etc/udev/rules.d/99-logitech-brio.rules fi @@ -2073,7 +2091,7 @@ display_server() { action="Skipping display server (DESKTOP_ENV=none)" && display "task" "$action" ;; *) - error_fatal "Unknown DESKTOP_ENV: $desktop_env. Valid options: dwm, hyprland, none" "1" + error_fatal "Unknown DESKTOP_ENV: $desktop_env. Valid options: dwm, hyprland, none" "1" "set DESKTOP_ENV in archsetup.conf (or --desktop-env) to one of the valid options" ;; esac } @@ -2092,7 +2110,7 @@ window_manager() { action="Skipping window manager (DESKTOP_ENV=none)" && display "task" "$action" ;; *) - error_fatal "Unknown DESKTOP_ENV: $desktop_env. Valid options: dwm, hyprland, none" "1" + error_fatal "Unknown DESKTOP_ENV: $desktop_env. Valid options: dwm, hyprland, none" "1" "set DESKTOP_ENV in archsetup.conf (or --desktop-env) to one of the valid options" ;; esac } @@ -2796,7 +2814,7 @@ tighten_efi_permissions() { && ! grep -E "^[^#].*[[:space:]]/efi[[:space:]]+vfat[[:space:]]" /etc/fstab | grep -q "fmask="; then action="tightening /efi mount permissions in fstab" && display "task" "$action" backup_system_file /etc/fstab - sed -i -E '/^[^#].*[[:space:]]\/efi[[:space:]]+vfat[[:space:]]/ s/([[:space:]]+vfat[[:space:]]+)([^[:space:]]+)/\1\2,fmask=0177,dmask=0077/' /etc/fstab \ + sed -i -E '/^[^#].*[[:space:]]\/efi[[:space:]]+vfat[[:space:]]/ s/([[:space:]]+vfat[[:space:]]+)([^[:space:]]+)/\1\2,fmask=0177,dmask=0077/' /etc/fstab 2>> "$logfile" \ || error_warn "$action" "$?" fi @@ -2838,7 +2856,7 @@ configure_initramfs_hook() { if ! is_zfs_root; then action="delegating fsck messages from udev to systemd" && display "task" "$action" backup_system_file /etc/mkinitcpio.conf - sed -i '/^HOOKS=/ s/\budev\b/systemd/' /etc/mkinitcpio.conf || error_warn "$action" "$?" + sed -i '/^HOOKS=/ s/\budev\b/systemd/' /etc/mkinitcpio.conf 2>> "$logfile" || error_warn "$action" "$?" mkinitcpio -P >> "$logfile" 2>&1 || error_warn "running mkinitcpio -P to silence fsck messages" "$?" fi @@ -2987,6 +3005,8 @@ outro() { for msg in "${error_messages[@]}"; do printf " - %s\n" "$msg" | tee -a "$logfile" done + printf " Each error's command output is in the log: grep -n 'ERROR:' %s\n" "$logfile" | tee -a "$logfile" + printf " Fix the cause and re-run archsetup for anything that must succeed.\n" | tee -a "$logfile" fi printf "\n" @@ -829,19 +829,19 @@ Some entries are libraries likely pulled in as dependencies (blas-openblas, open - [ ] webkit2gtk - [ ] whisper.cpp -** TODO [#B] All error messages should be actionable with recovery steps +** DONE [#B] All error messages should be actionable with recovery steps +CLOSED: [2026-07-02 Thu] :PROPERTIES: :LAST_REVIEWED: 2026-06-24 :END: -Currently just reports errors without guidance on how to fix them +Shipped 2026-07-02 (speedrun). Structural fix at the helper: =error_fatal= now takes an optional third recovery-hint arg and every fatal prints the last five log lines inline, the full log path, the per-site "Fix:" when given, and the resume pointer (step markers mean a re-run continues where it stopped) — so even a hint-less fatal is actionable. All 17 fatal call sites got specific hints (keyring reinit, mirrorlist switch, userdel/USERNAME conflict, base-devel for makepkg, DESKTOP_ENV values, dotfiles-dir cleanup, tmpfs sizing, aur.archlinux.org reachability). The end-of-run Error Summary now closes with the grep-the-log line and the fix-and-re-run pointer. =error_warn= already carried what-failed + exit code into the summary; unchanged. -** TODO [#B] Improve logging consistency +** DONE [#B] Improve logging consistency +CLOSED: [2026-07-02 Thu] :PROPERTIES: :LAST_REVIEWED: 2026-06-24 :END: -Some operations log to ~$logfile~, others don't - standardize logging -All package installs should log, all system modifications should log, all errors should log with context -Makes debugging failed installations easier +Shipped 2026-07-02 (speedrun), paired with the actionable-errors task. Audit result: the install helpers (pacman_install/aur_install/retry_install/run_task/git_install/pipx) and error helpers already tee/append everything to $logfile — the gaps were direct mutations whose stderr went to the console and vanished. Swept every =sed -i= and file-write mutation lacking capture (locale.gen uncomment, pacman.conf ParallelDownloads/Color + multilib, waybar battery removal x3, wireless-regdom, geoclue BeaconDB, paccache, BRIO udev rule, fstab fmask, mkinitcpio HOOKS, sudoers append, ufw status read): each now sends stderr to $logfile, and the previously-silent ones (locale.gen, pacman.conf, multilib, waybar, regdom, geoclue, paccache, udev) gained =error_warn= handlers so failures land in the summary instead of passing silently. Verified: bash -n clean, 10 unit suites green, shellcheck warning-diff vs HEAD empty (no new findings). ** TODO [#B] Security hardening + audit :security: :PROPERTIES: |
