aboutsummaryrefslogtreecommitdiff
path: root/scripts/testing/lib/validation.sh
blob: fa7ddcc9d2a17255d621e96ec4ac4c0afebb1788 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
#!/bin/bash
# SPDX-License-Identifier: GPL-3.0-or-later
# Validation utilities for archsetup testing
# Author: Craig Jennings <craigmartinjennings@gmail.com>
# License: GNU GPLv3
#
# This module provides comprehensive validation checks for archsetup installations.
# It captures pre-install state, runs post-install validations, and attributes
# issues to either archsetup or the base install (archzfs/vanilla Arch).

# Validation counters
VALIDATION_PASSED=0
VALIDATION_FAILED=0
VALIDATION_WARNINGS=0

# Arrays to track issues
declare -a ARCHSETUP_ISSUES
declare -a BASE_INSTALL_ISSUES
declare -a UNKNOWN_ISSUES

# SSH helper (uses globals: VM_IP, ROOT_PASSWORD)
ssh_cmd() {
    sshpass -p "$ROOT_PASSWORD" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
        -o ConnectTimeout=10 ${SSH_KEY_OPT:-} -p "${SSH_PORT:-22}" "root@$VM_IP" "$@" 2>/dev/null
}

# Attribute an issue to archsetup or base install
attribute_issue() {
    local issue="$1"
    local source="$2"  # "archsetup", "base", or "unknown"

    case "$source" in
        archsetup)
            ARCHSETUP_ISSUES+=("$issue")
            ;;
        base)
            BASE_INSTALL_ISSUES+=("$issue")
            ;;
        *)
            UNKNOWN_ISSUES+=("$issue")
            ;;
    esac
}

#=============================================================================
# PRE-INSTALL LOG CAPTURE
#=============================================================================

capture_pre_install_state() {
    local output_dir="$1"

    section "Capturing Pre-Install State"
    mkdir -p "$output_dir/pre-install"

    step "Capturing system logs before archsetup"

    # Capture journal
    ssh_cmd "journalctl -b --no-pager" > "$output_dir/pre-install/journal.log" 2>&1 || true

    # Capture dmesg
    ssh_cmd "dmesg" > "$output_dir/pre-install/dmesg.log" 2>&1 || true

    # Capture package list
    ssh_cmd "pacman -Q" > "$output_dir/pre-install/packages.txt" 2>&1 || true

    # Capture service status
    ssh_cmd "systemctl list-units --type=service --all" > "$output_dir/pre-install/services.txt" 2>&1 || true

    # Capture failed services
    ssh_cmd "systemctl --failed" > "$output_dir/pre-install/failed-services.txt" 2>&1 || true

    # Capture existing errors in logs
    ssh_cmd "journalctl -b -p err --no-pager" > "$output_dir/pre-install/errors.log" 2>&1 || true

    # Count pre-existing errors
    PRE_INSTALL_ERROR_COUNT=$(wc -l < "$output_dir/pre-install/errors.log" 2>/dev/null || echo 0)

    success "Pre-install state captured ($PRE_INSTALL_ERROR_COUNT pre-existing error lines)"
}

#=============================================================================
# POST-INSTALL LOG CAPTURE
#=============================================================================

capture_post_install_state() {
    local output_dir="$1"

    section "Capturing Post-Install State"
    mkdir -p "$output_dir/post-install"

    step "Capturing system logs after archsetup"

    # Capture journal
    ssh_cmd "journalctl -b --no-pager" > "$output_dir/post-install/journal.log" 2>&1 || true

    # Capture dmesg
    ssh_cmd "dmesg" > "$output_dir/post-install/dmesg.log" 2>&1 || true

    # Capture package list
    ssh_cmd "pacman -Q" > "$output_dir/post-install/packages.txt" 2>&1 || true

    # Capture service status
    ssh_cmd "systemctl list-units --type=service --all" > "$output_dir/post-install/services.txt" 2>&1 || true

    # Capture failed services
    ssh_cmd "systemctl --failed" > "$output_dir/post-install/failed-services.txt" 2>&1 || true

    # Capture all errors
    ssh_cmd "journalctl -b -p err --no-pager" > "$output_dir/post-install/errors.log" 2>&1 || true

    # Capture archsetup log
    ssh_cmd "cat /var/log/archsetup-*.log 2>/dev/null" > "$output_dir/post-install/archsetup.log" 2>&1 || true

    success "Post-install state captured"
}

#=============================================================================
# LOG DIFF ANALYSIS
#=============================================================================

analyze_log_diff() {
    local output_dir="$1"

    section "Analyzing Log Differences"
    mkdir -p "$output_dir/analysis"

    step "Comparing pre and post install errors"

    # Find new errors (in post but not in pre)
    if [ -f "$output_dir/pre-install/errors.log" ] && [ -f "$output_dir/post-install/errors.log" ]; then
        comm -13 <(sort "$output_dir/pre-install/errors.log") <(sort "$output_dir/post-install/errors.log") \
            > "$output_dir/analysis/new-errors.log" 2>/dev/null || true

        NEW_ERROR_COUNT=$(wc -l < "$output_dir/analysis/new-errors.log" 2>/dev/null || echo 0)

        if [ "$NEW_ERROR_COUNT" -gt 0 ]; then
            warn "Found $NEW_ERROR_COUNT new error lines after archsetup"
            # Categorize errors
            categorize_errors "$output_dir/analysis/new-errors.log" "$output_dir/analysis"
        else
            success "No new errors introduced by archsetup"
        fi
    fi

    step "Checking for new failed services"

    # Compare failed services
    if [ -f "$output_dir/pre-install/failed-services.txt" ] && [ -f "$output_dir/post-install/failed-services.txt" ]; then
        local pre_failed post_failed
        pre_failed=$(grep -c "failed" "$output_dir/pre-install/failed-services.txt" 2>/dev/null | tr -d '[:space:]')
        post_failed=$(grep -c "failed" "$output_dir/post-install/failed-services.txt" 2>/dev/null | tr -d '[:space:]')
        # Default to 0 if empty
        pre_failed=${pre_failed:-0}
        post_failed=${post_failed:-0}

        if [ "$post_failed" -gt "$pre_failed" ]; then
            warn "New failed services detected (before: $pre_failed, after: $post_failed)"
            diff "$output_dir/pre-install/failed-services.txt" "$output_dir/post-install/failed-services.txt" \
                > "$output_dir/analysis/failed-services-diff.txt" 2>/dev/null || true
        else
            success "No new service failures"
        fi
    fi

    step "Counting new packages installed"

    if [ -f "$output_dir/pre-install/packages.txt" ] && [ -f "$output_dir/post-install/packages.txt" ]; then
        comm -13 <(sort "$output_dir/pre-install/packages.txt") <(sort "$output_dir/post-install/packages.txt") \
            > "$output_dir/analysis/new-packages.txt" 2>/dev/null || true

        local new_pkg_count=$(wc -l < "$output_dir/analysis/new-packages.txt" 2>/dev/null || echo 0)
        info "Installed $new_pkg_count new packages"
    fi
}

categorize_errors() {
    local error_log="$1"
    local output_dir="$2"

    # Known benign errors/warnings to ignore
    local -a BENIGN_PATTERNS=(
        "SPL:.*module verification failed"
        "ZFS:.*module verification failed"
        "tainting kernel"
        "RAS:.*Correctable Errors"
        "ACPI.*AE_NOT_FOUND"
        "firmware.*regulatory"
        "Invalid user name.*in service file"  # dbus-broker timing during package install
    )

    # Patterns that indicate archsetup issues
    local -a ARCHSETUP_PATTERNS=(
        "archsetup"
        "stow"
        "yay"
        "makepkg"
        "pacman.*error"
    )

    # Filter and categorize
    while IFS= read -r line; do
        local is_benign=false
        local is_archsetup=false

        # Check if benign
        for pattern in "${BENIGN_PATTERNS[@]}"; do
            if echo "$line" | grep -qiE "$pattern"; then
                is_benign=true
                break
            fi
        done

        if $is_benign; then
            echo "$line" >> "$output_dir/benign-errors.log"
            continue
        fi

        # Check if archsetup-related
        for pattern in "${ARCHSETUP_PATTERNS[@]}"; do
            if echo "$line" | grep -qiE "$pattern"; then
                is_archsetup=true
                break
            fi
        done

        if $is_archsetup; then
            echo "$line" >> "$output_dir/archsetup-errors.log"
            attribute_issue "$line" "archsetup"
        else
            echo "$line" >> "$output_dir/base-install-errors.log"
            attribute_issue "$line" "base"
        fi
    done < "$error_log"
}

#=============================================================================
# ISSUE REPORTING
#=============================================================================

generate_issue_report() {
    local output_dir="$1"
    local archzfs_inbox="$2"

    section "Issue Attribution Report"

    local report_file="$output_dir/issue-report.txt"

    cat > "$report_file" << EOF
========================================
Issue Attribution Report
Generated: $(date +'%Y-%m-%d %H:%M:%S')
========================================

VALIDATION SUMMARY
------------------
Passed:   $VALIDATION_PASSED
Failed:   $VALIDATION_FAILED
Warnings: $VALIDATION_WARNINGS

EOF

    if [ ${#ARCHSETUP_ISSUES[@]} -gt 0 ]; then
        echo "ARCHSETUP ISSUES (${#ARCHSETUP_ISSUES[@]})" >> "$report_file"
        echo "-------------------------------------------" >> "$report_file"
        for issue in "${ARCHSETUP_ISSUES[@]}"; do
            echo "  - $issue" >> "$report_file"
        done
        echo "" >> "$report_file"

        error "Found ${#ARCHSETUP_ISSUES[@]} archsetup issues"
    fi

    if [ ${#BASE_INSTALL_ISSUES[@]} -gt 0 ]; then
        echo "BASE INSTALL ISSUES (${#BASE_INSTALL_ISSUES[@]})" >> "$report_file"
        echo "-------------------------------------------" >> "$report_file"
        for issue in "${BASE_INSTALL_ISSUES[@]}"; do
            echo "  - $issue" >> "$report_file"
        done
        echo "" >> "$report_file"

        warn "Found ${#BASE_INSTALL_ISSUES[@]} base install issues"

        # If archzfs inbox provided, create issue files
        if [ -n "$archzfs_inbox" ] && [ -d "$archzfs_inbox" ]; then
            local issue_file="$archzfs_inbox/$(date +'%Y-%m-%d')-test-issues.txt"
            echo "Base install issues from archsetup test run:" > "$issue_file"
            echo "Date: $(date +'%Y-%m-%d %H:%M:%S')" >> "$issue_file"
            echo "" >> "$issue_file"
            for issue in "${BASE_INSTALL_ISSUES[@]}"; do
                echo "- $issue" >> "$issue_file"
            done
            info "Created archzfs issue file: $issue_file"
        fi
    fi

    if [ ${#UNKNOWN_ISSUES[@]} -gt 0 ]; then
        echo "UNKNOWN/UNATTRIBUTED ISSUES (${#UNKNOWN_ISSUES[@]})" >> "$report_file"
        echo "-------------------------------------------" >> "$report_file"
        for issue in "${UNKNOWN_ISSUES[@]}"; do
            echo "  - $issue" >> "$report_file"
        done
        echo "" >> "$report_file"

        warn "Found ${#UNKNOWN_ISSUES[@]} unattributed issues"
    fi

    if [ ${#ARCHSETUP_ISSUES[@]} -eq 0 ] && [ ${#BASE_INSTALL_ISSUES[@]} -eq 0 ] && [ ${#UNKNOWN_ISSUES[@]} -eq 0 ]; then
        echo "No issues found!" >> "$report_file"
        success "No issues found!"
    fi

    info "Issue report saved: $report_file"
}