diff options
| -rwxr-xr-x | archsetup | 84 | ||||
| -rw-r--r-- | todo.org | 12 |
2 files changed, 58 insertions, 38 deletions
@@ -623,7 +623,7 @@ intro() { # count the arch packages before install pacman -Q > "$packages_before" || \ - error_fatal "generating pre-install package list" "$?" + error_fatal "generating pre-install package list" "$?" "confirm pacman works on this system: pacman -Q | head" } ### Error Handling @@ -639,10 +639,20 @@ error_warn() { } # Fatal error - log and exit -# Usage: error_fatal "what failed" "$?" +# Usage: error_fatal "what failed" "$?" ["recovery hint"] +# Every fatal points at the log (with its last lines inline), prints the +# per-site recovery hint when one is given, and names the resume path — +# archsetup's step markers mean a re-run continues where it stopped. error_fatal() { printf "CRASH: %s (error: %s) @ %s. Halting.\n" \ "$1" "$2" "$(date +'%T')" | tee -a "$logfile" + if [ -f "$logfile" ]; then + printf " Last log lines:\n" + tail -n 5 "$logfile" | sed 's/^/ | /' + printf " Full log: %s\n" "$logfile" + fi + [ -n "${3:-}" ] && printf " Fix: %s\n" "$3" | tee -a "$logfile" + printf " Then re-run archsetup - completed steps are tracked, so it resumes here.\n" | tee -a "$logfile" exit 1 } @@ -993,13 +1003,14 @@ bootstrap_pacman_keyring() { fi action="ensuring current Arch Linux keyring" && display "task" "$action" - (pacman -Syy) >> "$logfile" 2>&1 || error_fatal "$action" "$?" + (pacman -Syy) >> "$logfile" 2>&1 || error_fatal "$action" "$?" \ + "check network and mirrors: ping archlinux.org; cat /etc/pacman.d/mirrorlist" (pacman -S --noconfirm archlinux-keyring) >> "$logfile" 2>&1 || \ - error_fatal "$action" "$?" + error_fatal "$action" "$?" "reinitialize the keyring: pacman-key --init && pacman-key --populate archlinux" display "task" "verifying Arch Linux keys" (pacman-key --populate archlinux >> "$logfile" 2>&1) || \ - error_fatal "verifying Arch Linux keys" "$?" + error_fatal "verifying Arch Linux keys" "$?" "reinitialize the keyring: pacman-key --init && pacman-key --populate archlinux" # The bulk refresh gets the same retry budget as per-package installs — # a single slow mirror ("Operation too slow") halted a full install at @@ -1015,7 +1026,7 @@ bootstrap_pacman_keyring() { [ "$attempt" -lt "$MAX_INSTALL_RETRIES" ] && \ display "task" "retrying package cache refresh (attempt $((attempt + 1))/$MAX_INSTALL_RETRIES)" done - $refresh_ok || error_fatal "$action" "$?" + $refresh_ok || error_fatal "$action" "$?" "run pacman -Syu manually to see the failure, or switch mirrors in /etc/pacman.d/mirrorlist" } @@ -1040,7 +1051,8 @@ configure_build_environment() { # Uncomment the selected locale in locale.gen (format: "en_US.UTF-8 UTF-8") locale_entry="${locale} ${locale##*.}" # e.g., "en_US.UTF-8 UTF-8" backup_system_file /etc/locale.gen - sed -i "s|^#${locale_entry}|${locale_entry}|" /etc/locale.gen + sed -i "s|^#${locale_entry}|${locale_entry}|" /etc/locale.gen 2>> "$logfile" || \ + error_warn "uncommenting $locale_entry in locale.gen" "$?" (locale-gen >> "$logfile" 2>&1) || error_warn "$action" "$?" echo "LANG=$locale" > /etc/locale.conf export LANG="$locale" @@ -1071,11 +1083,13 @@ configure_build_environment() { backup_system_file /etc/pacman.conf # Match a commented OR already-uncommented ParallelDownloads: current Arch # ships it uncommented at 5, so a "^#"-only match silently leaves it at 5. - sed -i "s/^#\?ParallelDownloads.*$/ParallelDownloads = 10/;s/^#Color$/Color/" /etc/pacman.conf + sed -i "s/^#\?ParallelDownloads.*$/ParallelDownloads = 10/;s/^#Color$/Color/" /etc/pacman.conf 2>> "$logfile" || \ + error_warn "configuring pacman.conf (ParallelDownloads/Color)" "$?" # enable multilib repository (required for 32-bit libraries, Steam, etc.) action="enabling multilib repository" && display "task" "$action" - sed -i '/^#\[multilib\]/{s/^#//;n;s/^#//}' /etc/pacman.conf + sed -i '/^#\[multilib\]/{s/^#//;n;s/^#//}' /etc/pacman.conf 2>> "$logfile" || \ + error_warn "$action" "$?" # Keep pacman.conf world-readable. User-level makepkg/yay reads it to # resolve dependencies, so a root-only file makes every AUR build fail with @@ -1127,7 +1141,7 @@ EOF [ -f /etc/sudoers.pacnew ] && cp /etc/sudoers.pacnew /etc/sudoers >> "$logfile" 2>&1 action="creating a directory to build/install software from git/AUR." - (mkdir -p "$source_dir") || error_fatal "creating the directory $source_dir" "$?" + (mkdir -p "$source_dir") || error_fatal "creating the directory $source_dir" "$?" "check permissions and free space: df -h" } @@ -1138,12 +1152,12 @@ create_user() { display "task" "checking if user exists" # halt if $username exists ( id -u "$username" >/dev/null 2>&1; ) && \ - error_fatal "user '$username' already exists" "user exists" + error_fatal "user '$username' already exists" "user exists" "pick a different USERNAME in archsetup.conf, or remove the user first: userdel -r $username" # create $username with home, group, shell, password action="creating user and home directory" && display "task" "$action" (useradd -m -G wheel -s /bin/zsh "$username" >> "$logfile" 2>&1) || \ - error_fatal "adding user '$username'" "$?" + error_fatal "adding user '$username'" "$?" "run the useradd manually to see why it failed" display "task" "assigning the password" echo "$username:$password" | chpasswd # any text is allowable! be careful! @@ -1152,7 +1166,7 @@ create_user() { display "task" "adding to appropriate groups" (usermod -aG \ sys,adm,network,scanner,power,uucp,audio,lp,rfkill,video,storage,optical,users \ - "$username" >> "$logfile" 2>&1) || error_fatal "adding $username to groups" "$?" + "$username" >> "$logfile" 2>&1) || error_fatal "adding $username to groups" "$?" "confirm the groups exist: getent group network rfkill video" display "task" "configuring shell" # zsh cache required: $username will install via yay; zsh will run those commands @@ -1161,19 +1175,19 @@ create_user() { # give $username sudo nopasswd rights (required for aur installs) action="granting permissions" && display "task" "$action" backup_system_file /etc/sudoers - (echo "%$username ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers) \ + (echo "%$username ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers) 2>> "$logfile" \ || error_warn "$action" "$?" # mount as ramdisk to speed aur/git build/installs if ! mountpoint -q "$source_dir" 2>/dev/null; then (mount -t tmpfs -o size=4G archsetup "$source_dir" >> "$logfile" 2>&1) || \ - error_fatal "mounting the RAM disk for archsetup" "$?" + error_fatal "mounting the RAM disk for archsetup" "$?" "check available memory (free -h) or mount a smaller tmpfs at $source_dir by hand" else display "task" "ramdisk already mounted at $source_dir" fi (chown -R "$username":wheel "$source_dir" >> "$logfile" 2>&1) || \ - error_fatal "changing ownership of $source_dir" "$?" + error_fatal "changing ownership of $source_dir" "$?" "run the chown manually to see the failure" # Bootstrap DNS for git clones and AUR installs (full config in essential_services) if [ ! -L /etc/resolv.conf ] || [ "$(readlink /etc/resolv.conf)" != "/run/systemd/resolve/stub-resolv.conf" ]; then @@ -1227,7 +1241,7 @@ clone_user_repos() { # Q5: the --adopt/restore conflict handling below needs a real git checkout. # Refuse to continue if the clone didn't produce one (bad URL, network, a # tarball drop) rather than silently skipping the restore step. - [[ -d "$dotfiles_dir/.git" ]] || error_fatal "dotfiles dir is not a git checkout: $dotfiles_dir" 1 + [[ -d "$dotfiles_dir/.git" ]] || error_fatal "dotfiles dir is not a git checkout: $dotfiles_dir" 1 "move that directory aside so archsetup can clone fresh, or point DOTFILES_DIR at a real clone" # root runs stow/restore against the user-owned clone; mark it safe. git config --global --add safe.directory "$dotfiles_dir" >> "$logfile" 2>&1 || true @@ -1274,10 +1288,10 @@ prune_waybar_battery() { action="removing waybar battery module (no battery detected)" && display "task" "$action" waybar_config="/home/$username/.config/waybar/config" # Remove "battery" from sysmonitor modules array and fix trailing comma - sed -i '/"battery"$/d' "$waybar_config" - sed -i 's/"custom\/disk",/"custom\/disk"/' "$waybar_config" + sed -i '/"battery"$/d' "$waybar_config" 2>> "$logfile" || error_warn "$action" "$?" + sed -i 's/"custom\/disk",/"custom\/disk"/' "$waybar_config" 2>> "$logfile" || error_warn "$action" "$?" # Remove the battery config block - sed -i '/"battery": {/,/^ },$/d' "$waybar_config" + sed -i '/"battery": {/,/^ },$/d' "$waybar_config" 2>> "$logfile" || error_warn "$action" "$?" fi } @@ -1381,14 +1395,14 @@ aur_installer() { if ! (sudo -u "$username" git clone --depth 1 "$yay_repo" "$build_dir" >> "$logfile" 2>&1); then error_warn "cloning source code for yay - directory may exist, removing and retrying" "$?" (safe_rm_rf "$build_dir" "$source_dir" >> "$logfile" 2>&1) || \ - error_fatal "removing existing directory for yay" "$?" + error_fatal "removing existing directory for yay" "$?" "remove the build dir manually: rm -rf $source_dir/yay" (sudo -u "$username" git clone --depth 1 "$yay_repo" "$build_dir" >> "$logfile" 2>&1) || \ - error_fatal "re-cloning source code for yay after cleanup" "$?" + error_fatal "re-cloning source code for yay after cleanup" "$?" "check that aur.archlinux.org is reachable" fi action="packaging and installing yay"; display "task" "$action" (cd "$build_dir" && sudo -u "$username" makepkg --noconfirm -si >> "$logfile" 2>&1) || \ - error_fatal "$action" "$?" + error_fatal "$action" "$?" "confirm base-devel is installed (pacman -S base-devel), then re-run" } ### Essential Services @@ -1447,7 +1461,8 @@ EOF wireless_region="${current_lang:3:2}" # extract country code (positions 3-4) action="configuring wireless regulatory domain ($wireless_region)" && display "task" "$action" backup_system_file /etc/conf.d/wireless-regdom - sed -i "s|^#WIRELESS_REGDOM=\"${wireless_region}\"|WIRELESS_REGDOM=\"${wireless_region}\"|" /etc/conf.d/wireless-regdom + sed -i "s|^#WIRELESS_REGDOM=\"${wireless_region}\"|WIRELESS_REGDOM=\"${wireless_region}\"|" /etc/conf.d/wireless-regdom 2>> "$logfile" || \ + error_warn "$action" "$?" # Encrypted DNS (DNS over TLS) @@ -1577,7 +1592,7 @@ configure_firewall() { # netfilter modules. This is a test environment limitation, not a bug. # On real hardware with proper kernel support, UFW activates correctly. action="verifying firewall is active" && display "task" "$action" - if ! ufw status | grep -q "Status: active"; then + if ! ufw status 2>> "$logfile" | grep -q "Status: active"; then error_messages=("FIREWALL NOT ACTIVE - run: sudo ufw enable" "${error_messages[@]}") error_warn "$action" "1" fi @@ -1608,7 +1623,8 @@ configure_service_discovery() { action="configuring geoclue to use BeaconDB location service" && display "task" "$action" if grep -q '^#url=https://api.beacondb.net/v1/geolocate' /etc/geoclue/geoclue.conf 2>/dev/null; then backup_system_file /etc/geoclue/geoclue.conf - sed -i 's|^#url=https://api.beacondb.net/v1/geolocate|url=https://api.beacondb.net/v1/geolocate|' /etc/geoclue/geoclue.conf + sed -i 's|^#url=https://api.beacondb.net/v1/geolocate|url=https://api.beacondb.net/v1/geolocate|' /etc/geoclue/geoclue.conf 2>> "$logfile" || \ + error_warn "$action" "$?" fi # Whitelist gammastep in geoclue config (geoclue demo agent is started via hyprland.conf exec-once) @@ -1660,7 +1676,8 @@ configure_package_cache() { action="configuring paccache to keep 3 versions" && display "task" "$action" backup_system_file /etc/conf.d/pacman-contrib - sed -i 's/^PACCACHE_ARGS=.*/PACCACHE_ARGS=-k3/' /etc/conf.d/pacman-contrib + sed -i 's/^PACCACHE_ARGS=.*/PACCACHE_ARGS=-k3/' /etc/conf.d/pacman-contrib 2>> "$logfile" || \ + error_warn "$action" "$?" } @@ -2012,7 +2029,8 @@ hyprland() { # Apply camera settings when Logitech BRIO is connected ACTION=="add", SUBSYSTEM=="video4linux", ATTRS{idVendor}=="046d", ATTRS{idProduct}=="085e", ATTR{index}=="0", RUN+="/home/ARCHSETUP_USERNAME/.local/bin/logitech-brio-settings.sh /dev/%k" UDEVEOF - sed -i "s/ARCHSETUP_USERNAME/${username}/" /etc/udev/rules.d/99-logitech-brio.rules + sed -i "s/ARCHSETUP_USERNAME/${username}/" /etc/udev/rules.d/99-logitech-brio.rules 2>> "$logfile" || \ + error_warn "personalizing the Logitech BRIO udev rule" "$?" chmod 644 /etc/udev/rules.d/99-logitech-brio.rules fi @@ -2073,7 +2091,7 @@ display_server() { action="Skipping display server (DESKTOP_ENV=none)" && display "task" "$action" ;; *) - error_fatal "Unknown DESKTOP_ENV: $desktop_env. Valid options: dwm, hyprland, none" "1" + error_fatal "Unknown DESKTOP_ENV: $desktop_env. Valid options: dwm, hyprland, none" "1" "set DESKTOP_ENV in archsetup.conf (or --desktop-env) to one of the valid options" ;; esac } @@ -2092,7 +2110,7 @@ window_manager() { action="Skipping window manager (DESKTOP_ENV=none)" && display "task" "$action" ;; *) - error_fatal "Unknown DESKTOP_ENV: $desktop_env. Valid options: dwm, hyprland, none" "1" + error_fatal "Unknown DESKTOP_ENV: $desktop_env. Valid options: dwm, hyprland, none" "1" "set DESKTOP_ENV in archsetup.conf (or --desktop-env) to one of the valid options" ;; esac } @@ -2796,7 +2814,7 @@ tighten_efi_permissions() { && ! grep -E "^[^#].*[[:space:]]/efi[[:space:]]+vfat[[:space:]]" /etc/fstab | grep -q "fmask="; then action="tightening /efi mount permissions in fstab" && display "task" "$action" backup_system_file /etc/fstab - sed -i -E '/^[^#].*[[:space:]]\/efi[[:space:]]+vfat[[:space:]]/ s/([[:space:]]+vfat[[:space:]]+)([^[:space:]]+)/\1\2,fmask=0177,dmask=0077/' /etc/fstab \ + sed -i -E '/^[^#].*[[:space:]]\/efi[[:space:]]+vfat[[:space:]]/ s/([[:space:]]+vfat[[:space:]]+)([^[:space:]]+)/\1\2,fmask=0177,dmask=0077/' /etc/fstab 2>> "$logfile" \ || error_warn "$action" "$?" fi @@ -2838,7 +2856,7 @@ configure_initramfs_hook() { if ! is_zfs_root; then action="delegating fsck messages from udev to systemd" && display "task" "$action" backup_system_file /etc/mkinitcpio.conf - sed -i '/^HOOKS=/ s/\budev\b/systemd/' /etc/mkinitcpio.conf || error_warn "$action" "$?" + sed -i '/^HOOKS=/ s/\budev\b/systemd/' /etc/mkinitcpio.conf 2>> "$logfile" || error_warn "$action" "$?" mkinitcpio -P >> "$logfile" 2>&1 || error_warn "running mkinitcpio -P to silence fsck messages" "$?" fi @@ -2987,6 +3005,8 @@ outro() { for msg in "${error_messages[@]}"; do printf " - %s\n" "$msg" | tee -a "$logfile" done + printf " Each error's command output is in the log: grep -n 'ERROR:' %s\n" "$logfile" | tee -a "$logfile" + printf " Fix the cause and re-run archsetup for anything that must succeed.\n" | tee -a "$logfile" fi printf "\n" @@ -829,19 +829,19 @@ Some entries are libraries likely pulled in as dependencies (blas-openblas, open - [ ] webkit2gtk - [ ] whisper.cpp -** TODO [#B] All error messages should be actionable with recovery steps +** DONE [#B] All error messages should be actionable with recovery steps +CLOSED: [2026-07-02 Thu] :PROPERTIES: :LAST_REVIEWED: 2026-06-24 :END: -Currently just reports errors without guidance on how to fix them +Shipped 2026-07-02 (speedrun). Structural fix at the helper: =error_fatal= now takes an optional third recovery-hint arg and every fatal prints the last five log lines inline, the full log path, the per-site "Fix:" when given, and the resume pointer (step markers mean a re-run continues where it stopped) — so even a hint-less fatal is actionable. All 17 fatal call sites got specific hints (keyring reinit, mirrorlist switch, userdel/USERNAME conflict, base-devel for makepkg, DESKTOP_ENV values, dotfiles-dir cleanup, tmpfs sizing, aur.archlinux.org reachability). The end-of-run Error Summary now closes with the grep-the-log line and the fix-and-re-run pointer. =error_warn= already carried what-failed + exit code into the summary; unchanged. -** TODO [#B] Improve logging consistency +** DONE [#B] Improve logging consistency +CLOSED: [2026-07-02 Thu] :PROPERTIES: :LAST_REVIEWED: 2026-06-24 :END: -Some operations log to ~$logfile~, others don't - standardize logging -All package installs should log, all system modifications should log, all errors should log with context -Makes debugging failed installations easier +Shipped 2026-07-02 (speedrun), paired with the actionable-errors task. Audit result: the install helpers (pacman_install/aur_install/retry_install/run_task/git_install/pipx) and error helpers already tee/append everything to $logfile — the gaps were direct mutations whose stderr went to the console and vanished. Swept every =sed -i= and file-write mutation lacking capture (locale.gen uncomment, pacman.conf ParallelDownloads/Color + multilib, waybar battery removal x3, wireless-regdom, geoclue BeaconDB, paccache, BRIO udev rule, fstab fmask, mkinitcpio HOOKS, sudoers append, ufw status read): each now sends stderr to $logfile, and the previously-silent ones (locale.gen, pacman.conf, multilib, waybar, regdom, geoclue, paccache, udev) gained =error_warn= handlers so failures land in the summary instead of passing silently. Verified: bash -n clean, 10 unit suites green, shellcheck warning-diff vs HEAD empty (no new findings). ** TODO [#B] Security hardening + audit :security: :PROPERTIES: |
