From b0140951ebe0f0c2d33a868a2d1cda2eafd29044 Mon Sep 17 00:00:00 2001 From: Craig Jennings Date: Wed, 10 Jun 2026 18:21:15 -0500 Subject: feat(kb): monthly hygiene report for agent KB nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 4 of the agent KB spec. kb-hygiene.sh inventories :agent: nodes, flags orphans (no id: link anywhere in the KB points at them), duplicate titles, and stray conflict files, then writes an org report into the rulesets inbox for the normal inbox flow to propose dispositions. Read-only by design — it never deletes. A monthly systemd user timer (Persistent=true) runs it; bats covers the counts, orphan detection, duplicates, conflict tally, and the missing-KB error path. --- scripts/kb-hygiene.sh | 79 ++++++++++++++++++++++++++++++++ scripts/systemd/roam-hygiene.service | 9 ++++ scripts/systemd/roam-hygiene.timer | 10 +++++ scripts/tests/kb-hygiene.bats | 87 ++++++++++++++++++++++++++++++++++++ 4 files changed, 185 insertions(+) create mode 100755 scripts/kb-hygiene.sh create mode 100644 scripts/systemd/roam-hygiene.service create mode 100644 scripts/systemd/roam-hygiene.timer create mode 100644 scripts/tests/kb-hygiene.bats diff --git a/scripts/kb-hygiene.sh b/scripts/kb-hygiene.sh new file mode 100755 index 0000000..0d42e2e --- /dev/null +++ b/scripts/kb-hygiene.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# kb-hygiene.sh — monthly hygiene report for the agent knowledge base (spec Phase 4). +# +# Inventories :agent:-tagged nodes, flags orphans (no [[id:]] link anywhere in +# the KB points at them), duplicate agent titles, and stray sync-conflict +# files. Writes an org report into the rulesets inbox; the next session +# processes it through the normal inbox flow and proposes deletions to Craig. +# The script only reads the KB — it never deletes anything itself. +# +# Run by the roam-hygiene systemd user timer (scripts/systemd/) monthly. +# +# Usage: kb-hygiene.sh [kb-path] [report-dir] +# defaults: ~/org/roam ~/code/rulesets/inbox + +set -euo pipefail + +kb="${1:-$HOME/org/roam}" +outdir="${2:-$HOME/code/rulesets/inbox}" + +if [ ! -d "$kb" ]; then + echo "kb-hygiene: no KB at $kb" >&2 + exit 1 +fi +mkdir -p "$outdir" + +report="$outdir/$(date +%Y-%m-%d-%H%M)-kb-hygiene-report.org" + +# All agent nodes: files carrying the :agent: filetag. +mapfile -t agent_files < <(grep -rlE '^#\+filetags:.*:agent:' "$kb" --include='*.org' 2>/dev/null | grep -v 'sync-conflict' | sort) + +total_org=$(find "$kb" -name '*.org' ! -name '*sync-conflict*' | wc -l) +conflicts=$(find "$kb" -name '*sync-conflict*' | wc -l) + +# Orphans: agent nodes whose :ID: no other file references via id: link. +orphans=() +for f in "${agent_files[@]}"; do + id=$(awk '/^:ID:/ {print $2; exit}' "$f") + [ -z "$id" ] && continue + if ! grep -rlF "id:$id" "$kb" --include='*.org' 2>/dev/null | grep -v "^$f$" | grep -qv 'sync-conflict'; then + orphans+=("$f") + fi +done + +# Duplicate titles among agent nodes. +dupes=$(for f in "${agent_files[@]}"; do + awk -F'#\\+title: *' '/^#\+title:/ {print $2; exit}' "$f" +done | sort | uniq -d) + +{ + echo "#+TITLE: KB hygiene report — $(date '+%Y-%m-%d')" + echo "#+DATE: $(date '+%Y-%m-%d')" + echo + echo "* Counts" + echo "- Agent nodes: ${#agent_files[@]}" + echo "- Total KB org files: $total_org" + echo "- Conflict files: $conflicts" + echo + echo "* Orphans (no [[id:]] link points at them — review for pruning or linking)" + if [ "${#orphans[@]}" -eq 0 ]; then + echo "- none" + else + for f in "${orphans[@]}"; do + echo "- ${f#"$kb"/}" + done + fi + echo + echo "* Duplicate agent titles" + if [ -z "$dupes" ]; then + echo "- none" + else + # shellcheck disable=SC2001 # multiline per-line prefix; ${var//} can't do anchored-per-line + echo "$dupes" | sed 's/^/- duplicate: /' + fi + echo + echo "* Disposition" + echo "Propose deletions or merges to Craig per the inbox flow; auto-cleanup is allowed only for :agent:-tagged nodes after approval." +} > "$report" + +echo "kb-hygiene: report written to $report" diff --git a/scripts/systemd/roam-hygiene.service b/scripts/systemd/roam-hygiene.service new file mode 100644 index 0000000..4966290 --- /dev/null +++ b/scripts/systemd/roam-hygiene.service @@ -0,0 +1,9 @@ +# Monthly agent-KB hygiene report (spec Phase 4). Install: +# cp scripts/systemd/roam-hygiene.* ~/.config/systemd/user/ +# systemctl --user daemon-reload && systemctl --user enable --now roam-hygiene.timer +[Unit] +Description=Monthly hygiene report for the agent knowledge base + +[Service] +Type=oneshot +ExecStart=%h/code/rulesets/scripts/kb-hygiene.sh %h/org/roam %h/code/rulesets/inbox diff --git a/scripts/systemd/roam-hygiene.timer b/scripts/systemd/roam-hygiene.timer new file mode 100644 index 0000000..bef56d1 --- /dev/null +++ b/scripts/systemd/roam-hygiene.timer @@ -0,0 +1,10 @@ +[Unit] +Description=Run the agent-KB hygiene report monthly + +[Timer] +OnCalendar=monthly +Persistent=true +RandomizedDelaySec=1h + +[Install] +WantedBy=timers.target diff --git a/scripts/tests/kb-hygiene.bats b/scripts/tests/kb-hygiene.bats new file mode 100644 index 0000000..a9f9e58 --- /dev/null +++ b/scripts/tests/kb-hygiene.bats @@ -0,0 +1,87 @@ +#!/usr/bin/env bats +# Tests for scripts/kb-hygiene.sh — the monthly agent-KB hygiene report. + +setup() { + SCRIPT="$BATS_TEST_DIRNAME/../kb-hygiene.sh" + KB="$BATS_TEST_TMPDIR/kb" + OUT="$BATS_TEST_TMPDIR/inbox" + mkdir -p "$KB/agents" "$OUT" + + # A hand-authored node that links to agent node AAA. + cat > "$KB/20240101000000-craig-note.org" << 'EOF' +:PROPERTIES: +:ID: craig-1111 +:END: +#+title: Craig's note + +See [[id:agent-aaa][the agent fact]]. +EOF + + # Agent node AAA — linked from Craig's note (not an orphan). + cat > "$KB/agents/20250101000000-linked-fact.org" << 'EOF' +:PROPERTIES: +:ID: agent-aaa +:END: +#+title: Linked fact +#+filetags: :agent:reference: + +A fact someone links to. +EOF + + # Agent node BBB — nothing links to it (orphan). + cat > "$KB/agents/20250102000000-orphan-fact.org" << 'EOF' +:PROPERTIES: +:ID: agent-bbb +:END: +#+title: Orphan fact +#+filetags: :agent:reference: + +Nobody links here. +EOF +} + +@test "missing KB path: exits nonzero with a message" { + run "$SCRIPT" "$BATS_TEST_TMPDIR/nope" "$OUT" + [ "$status" -ne 0 ] + [[ "$output" == *"no KB"* ]] +} + +@test "writes a dated report file into the report dir" { + run "$SCRIPT" "$KB" "$OUT" + [ "$status" -eq 0 ] + ls "$OUT" | grep -q "kb-hygiene-report.org" +} + +@test "counts agent nodes correctly" { + run "$SCRIPT" "$KB" "$OUT" + report=$(ls "$OUT"/*kb-hygiene-report.org) + grep -q "Agent nodes: 2" "$report" +} + +@test "flags the orphan and not the linked node" { + run "$SCRIPT" "$KB" "$OUT" + report=$(ls "$OUT"/*kb-hygiene-report.org) + grep -q "orphan-fact" "$report" + ! grep -A2 "Orphans" "$report" | grep -q "linked-fact" +} + +@test "flags duplicate agent titles" { + cat > "$KB/agents/20250103000000-dupe.org" << 'EOF' +:PROPERTIES: +:ID: agent-ccc +:END: +#+title: Orphan fact +#+filetags: :agent:reference: +EOF + run "$SCRIPT" "$KB" "$OUT" + report=$(ls "$OUT"/*kb-hygiene-report.org) + grep -qi "duplicate" "$report" + grep -c "Orphan fact" "$report" | grep -qv '^0$' +} + +@test "reports sync-conflict file count" { + touch "$KB/junk.sync-conflict-20260101-000000-XXXX.org" + run "$SCRIPT" "$KB" "$OUT" + report=$(ls "$OUT"/*kb-hygiene-report.org) + grep -q "Conflict files: 1" "$report" +} -- cgit v1.2.3