diff options
| author | Craig Jennings <c@cjennings.net> | 2026-06-10 18:21:15 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-06-10 18:21:15 -0500 |
| commit | b0140951ebe0f0c2d33a868a2d1cda2eafd29044 (patch) | |
| tree | a259806847559e32153bbbb6f60bddfb4bb2c8fe /scripts/kb-hygiene.sh | |
| parent | 242b95ea44d4ba12a611a0b2acb3edc109ce74f5 (diff) | |
| download | rulesets-b0140951ebe0f0c2d33a868a2d1cda2eafd29044.tar.gz rulesets-b0140951ebe0f0c2d33a868a2d1cda2eafd29044.zip | |
feat(kb): monthly hygiene report for agent KB nodes
Phase 4 of the agent KB spec. kb-hygiene.sh inventories :agent: nodes, flags orphans (no id: link anywhere in the KB points at them), duplicate titles, and stray conflict files, then writes an org report into the rulesets inbox for the normal inbox flow to propose dispositions. Read-only by design — it never deletes. A monthly systemd user timer (Persistent=true) runs it; bats covers the counts, orphan detection, duplicates, conflict tally, and the missing-KB error path.
Diffstat (limited to 'scripts/kb-hygiene.sh')
| -rwxr-xr-x | scripts/kb-hygiene.sh | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/scripts/kb-hygiene.sh b/scripts/kb-hygiene.sh new file mode 100755 index 0000000..0d42e2e --- /dev/null +++ b/scripts/kb-hygiene.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# kb-hygiene.sh — monthly hygiene report for the agent knowledge base (spec Phase 4). +# +# Inventories :agent:-tagged nodes, flags orphans (no [[id:]] link anywhere in +# the KB points at them), duplicate agent titles, and stray sync-conflict +# files. Writes an org report into the rulesets inbox; the next session +# processes it through the normal inbox flow and proposes deletions to Craig. +# The script only reads the KB — it never deletes anything itself. +# +# Run by the roam-hygiene systemd user timer (scripts/systemd/) monthly. +# +# Usage: kb-hygiene.sh [kb-path] [report-dir] +# defaults: ~/org/roam ~/code/rulesets/inbox + +set -euo pipefail + +kb="${1:-$HOME/org/roam}" +outdir="${2:-$HOME/code/rulesets/inbox}" + +if [ ! -d "$kb" ]; then + echo "kb-hygiene: no KB at $kb" >&2 + exit 1 +fi +mkdir -p "$outdir" + +report="$outdir/$(date +%Y-%m-%d-%H%M)-kb-hygiene-report.org" + +# All agent nodes: files carrying the :agent: filetag. +mapfile -t agent_files < <(grep -rlE '^#\+filetags:.*:agent:' "$kb" --include='*.org' 2>/dev/null | grep -v 'sync-conflict' | sort) + +total_org=$(find "$kb" -name '*.org' ! -name '*sync-conflict*' | wc -l) +conflicts=$(find "$kb" -name '*sync-conflict*' | wc -l) + +# Orphans: agent nodes whose :ID: no other file references via id: link. +orphans=() +for f in "${agent_files[@]}"; do + id=$(awk '/^:ID:/ {print $2; exit}' "$f") + [ -z "$id" ] && continue + if ! grep -rlF "id:$id" "$kb" --include='*.org' 2>/dev/null | grep -v "^$f$" | grep -qv 'sync-conflict'; then + orphans+=("$f") + fi +done + +# Duplicate titles among agent nodes. +dupes=$(for f in "${agent_files[@]}"; do + awk -F'#\\+title: *' '/^#\+title:/ {print $2; exit}' "$f" +done | sort | uniq -d) + +{ + echo "#+TITLE: KB hygiene report — $(date '+%Y-%m-%d')" + echo "#+DATE: $(date '+%Y-%m-%d')" + echo + echo "* Counts" + echo "- Agent nodes: ${#agent_files[@]}" + echo "- Total KB org files: $total_org" + echo "- Conflict files: $conflicts" + echo + echo "* Orphans (no [[id:]] link points at them — review for pruning or linking)" + if [ "${#orphans[@]}" -eq 0 ]; then + echo "- none" + else + for f in "${orphans[@]}"; do + echo "- ${f#"$kb"/}" + done + fi + echo + echo "* Duplicate agent titles" + if [ -z "$dupes" ]; then + echo "- none" + else + # shellcheck disable=SC2001 # multiline per-line prefix; ${var//} can't do anchored-per-line + echo "$dupes" | sed 's/^/- duplicate: /' + fi + echo + echo "* Disposition" + echo "Propose deletions or merges to Craig per the inbox flow; auto-cleanup is allowed only for :agent:-tagged nodes after approval." +} > "$report" + +echo "kb-hygiene: report written to $report" |
