aboutsummaryrefslogtreecommitdiff
path: root/scripts/kb-hygiene.sh
blob: 0d42e2e47621eda2d5a274ee1d771cbe312401c6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env bash
# kb-hygiene.sh — monthly hygiene report for the agent knowledge base (spec Phase 4).
#
# Inventories :agent:-tagged nodes, flags orphans (no [[id:]] link anywhere in
# the KB points at them), duplicate agent titles, and stray sync-conflict
# files. Writes an org report into the rulesets inbox; the next session
# processes it through the normal inbox flow and proposes deletions to Craig.
# The script only reads the KB — it never deletes anything itself.
#
# Run by the roam-hygiene systemd user timer (scripts/systemd/) monthly.
#
# Usage: kb-hygiene.sh [kb-path] [report-dir]
#        defaults: ~/org/roam  ~/code/rulesets/inbox

set -euo pipefail

kb="${1:-$HOME/org/roam}"
outdir="${2:-$HOME/code/rulesets/inbox}"

if [ ! -d "$kb" ]; then
    echo "kb-hygiene: no KB at $kb" >&2
    exit 1
fi
mkdir -p "$outdir"

report="$outdir/$(date +%Y-%m-%d-%H%M)-kb-hygiene-report.org"

# All agent nodes: files carrying the :agent: filetag.
mapfile -t agent_files < <(grep -rlE '^#\+filetags:.*:agent:' "$kb" --include='*.org' 2>/dev/null | grep -v 'sync-conflict' | sort)

total_org=$(find "$kb" -name '*.org' ! -name '*sync-conflict*' | wc -l)
conflicts=$(find "$kb" -name '*sync-conflict*' | wc -l)

# Orphans: agent nodes whose :ID: no other file references via id: link.
orphans=()
for f in "${agent_files[@]}"; do
    id=$(awk '/^:ID:/ {print $2; exit}' "$f")
    [ -z "$id" ] && continue
    if ! grep -rlF "id:$id" "$kb" --include='*.org' 2>/dev/null | grep -v "^$f$" | grep -qv 'sync-conflict'; then
        orphans+=("$f")
    fi
done

# Duplicate titles among agent nodes.
dupes=$(for f in "${agent_files[@]}"; do
    awk -F'#\\+title: *' '/^#\+title:/ {print $2; exit}' "$f"
done | sort | uniq -d)

{
    echo "#+TITLE: KB hygiene report — $(date '+%Y-%m-%d')"
    echo "#+DATE: $(date '+%Y-%m-%d')"
    echo
    echo "* Counts"
    echo "- Agent nodes: ${#agent_files[@]}"
    echo "- Total KB org files: $total_org"
    echo "- Conflict files: $conflicts"
    echo
    echo "* Orphans (no [[id:]] link points at them — review for pruning or linking)"
    if [ "${#orphans[@]}" -eq 0 ]; then
        echo "- none"
    else
        for f in "${orphans[@]}"; do
            echo "- ${f#"$kb"/}"
        done
    fi
    echo
    echo "* Duplicate agent titles"
    if [ -z "$dupes" ]; then
        echo "- none"
    else
        # shellcheck disable=SC2001  # multiline per-line prefix; ${var//} can't do anchored-per-line
        echo "$dupes" | sed 's/^/- duplicate: /'
    fi
    echo
    echo "* Disposition"
    echo "Propose deletions or merges to Craig per the inbox flow; auto-cleanup is allowed only for :agent:-tagged nodes after approval."
} > "$report"

echo "kb-hygiene: report written to $report"