1 files changed, 74 insertions, 22 deletions
diff --git a/scripts/testing/lib/network-diagnostics.sh b/scripts/testing/lib/network-diagnostics.sh
index 38788e5..dc54334 100644
--- a/scripts/testing/lib/network-diagnostics.sh
+++ b/scripts/testing/lib/network-diagnostics.sh
@@ -6,58 +6,110 @@
 
 # Note: logging.sh and vm-utils.sh should already be sourced by the calling script
 # Uses globals: ROOT_PASSWORD, SSH_PORT, SSH_OPTS, VM_IP (from vm-utils.sh or calling script)
+# Optional global: TEST_RESULTS_DIR (raw command outputs are saved there when set)
 
-# Run quick network diagnostics
+# Gather one read-only fact from the VM, print it, and save the raw output.
+# Facts are collected regardless of pass/fail so a failing install still leaves
+# the IP/route/resolver evidence in the log and the results dir.
+#   $1 label   human-readable label for the fact
+#   $2 slug    filename slug for the saved raw output
+#   $3 cmd     remote command to run over the shared ssh_base
+# Uses the caller's locals ssh_base and results_dir (dynamic scope).
+_netdiag_fact() {
+    local label="$1" slug="$2" cmd="$3" out
+    out="$($ssh_base "$cmd" 2>&1)"
+    info "${label}:"
+    printf '%s\n' "$out" | while IFS= read -r line; do
+        info "  $line"
+    done
+    if [ -n "$results_dir" ]; then
+        printf '%s\n' "$out" > "$results_dir/netdiag-${slug}.txt" 2>/dev/null || true
+    fi
+}
+
+# Run quick network diagnostics.
+#
+# Evidence first: collect read-only facts (interfaces, route, resolver)
+# unconditionally, then run every reachability check and report all failures at
+# the end. A DNS failure is named as a DNS failure, not masked as a generic "no
+# internet" or misattributed to the Arch mirror. Returns 0 when all checks pass,
+# non-zero when any check fails, so callers keep their success/failure contract.
 run_network_diagnostics() {
     local password="${ROOT_PASSWORD:-archsetup}"
     local port="${SSH_PORT:-22}"
     local host="${VM_IP:-localhost}"
     local ssh_base="sshpass -p $password ssh $SSH_OPTS -p $port root@$host"
+    local results_dir="${TEST_RESULTS_DIR:-}"
+    local failures=()
 
     section "Pre-flight Network Diagnostics"
 
-    # Test 1: Basic connectivity (use curl instead of ping - SLIRP may not handle ICMP)
-    step "Testing internet connectivity"
-    if $ssh_base "curl -s --connect-timeout 5 -o /dev/null http://archlinux.org" 2>/dev/null; then
-        success "Internet connectivity OK"
-    else
-        error "No internet connectivity"
-        return 1
-    fi
+    # --- Phase 1: collect read-only facts, unconditionally ---
+    # These never gate the outcome; they exist so a failed install still has
+    # the interface/route/resolver evidence to diagnose from.
+    step "Collecting interface addresses"
+    _netdiag_fact "Interface addresses (ip -brief addr)" "ip-addr" "ip -brief addr"
+
+    step "Collecting default route"
+    _netdiag_fact "Default route (ip route show default)" "ip-route" "ip route show default"
 
-    # Test 2: DNS resolution (use getent which is always available, unlike nslookup/dig)
+    step "Reading resolver configuration"
+    _netdiag_fact "Resolver (/etc/resolv.conf)" "resolv-conf" "cat /etc/resolv.conf"
+
+    # --- Phase 2: generic connectivity checks (run all, don't short-circuit) ---
+    # DNS, egress, and TLS are independent failure modes. Keeping them separate
+    # means a resolver problem reads as DNS, not as a downstream mirror failure.
     step "Testing DNS resolution"
     if $ssh_base "getent hosts archlinux.org >/dev/null 2>&1" 2>/dev/null; then
         success "DNS resolution OK"
     else
         error "DNS resolution failed"
-        return 1
+        failures+=("DNS resolution (getent hosts archlinux.org)")
     fi
 
-    # Test 3: Arch mirror accessibility
+    step "Testing HTTP egress"
+    if $ssh_base "curl -s --connect-timeout 5 -o /dev/null http://archlinux.org" 2>/dev/null; then
+        success "HTTP egress OK"
+    else
+        error "HTTP egress failed"
+        failures+=("HTTP egress (http://archlinux.org)")
+    fi
+
+    step "Testing TLS/HTTPS egress"
+    if $ssh_base "curl -s --connect-timeout 5 -o /dev/null https://archlinux.org" 2>/dev/null; then
+        success "TLS/HTTPS egress OK"
+    else
+        error "TLS/HTTPS egress failed"
+        failures+=("TLS/HTTPS egress (https://archlinux.org)")
+    fi
+
+    # --- Phase 3: Arch-specific checks (run all, don't short-circuit) ---
     step "Testing Arch mirror access"
     if $ssh_base "curl -s -I https://geo.mirror.pkgbuild.com/ | head -1 | grep -qE '(200|301|302)'" 2>/dev/null; then
         success "Arch mirrors accessible"
     else
         error "Cannot reach Arch mirrors"
-        return 1
+        failures+=("Arch mirror (https://geo.mirror.pkgbuild.com/)")
     fi
 
-    # Test 4: AUR accessibility
     step "Testing AUR access"
     if $ssh_base "curl -s -I https://aur.archlinux.org/ | head -1 | grep -qE '(200|405)'" 2>/dev/null; then
         success "AUR accessible"
     else
         error "Cannot reach AUR"
-        return 1
+        failures+=("AUR (https://aur.archlinux.org/)")
     fi
 
-    # Show network info
-    info "Network configuration:"
-    $ssh_base "ip addr show | grep 'inet ' | grep -v '127.0.0.1'" 2>/dev/null | while IFS= read -r line; do
-        info "  $line"
-    done
+    # --- Summary: report every failure, not just the first ---
+    if [ ${#failures[@]} -eq 0 ]; then
+        success "Network diagnostics complete - all checks passed"
+        return 0
+    fi
 
-    success "Network diagnostics complete"
-    return 0
+    error "Network diagnostics found ${#failures[@]} failure(s):"
+    local f
+    for f in "${failures[@]}"; do
+        error "  - $f"
+    done
+    return 1
 }