aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-06-10 18:21:15 -0500
committerCraig Jennings <c@cjennings.net>2026-06-10 18:21:15 -0500
commitb0140951ebe0f0c2d33a868a2d1cda2eafd29044 (patch)
treea259806847559e32153bbbb6f60bddfb4bb2c8fe /scripts
parent242b95ea44d4ba12a611a0b2acb3edc109ce74f5 (diff)
downloadrulesets-b0140951ebe0f0c2d33a868a2d1cda2eafd29044.tar.gz
rulesets-b0140951ebe0f0c2d33a868a2d1cda2eafd29044.zip
feat(kb): monthly hygiene report for agent KB nodes
Phase 4 of the agent KB spec. kb-hygiene.sh inventories :agent: nodes, flags orphans (no id: link anywhere in the KB points at them), duplicate titles, and stray conflict files, then writes an org report into the rulesets inbox for the normal inbox flow to propose dispositions. Read-only by design — it never deletes. A monthly systemd user timer (Persistent=true) runs it; bats covers the counts, orphan detection, duplicates, conflict tally, and the missing-KB error path.
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/kb-hygiene.sh79
-rw-r--r--scripts/systemd/roam-hygiene.service9
-rw-r--r--scripts/systemd/roam-hygiene.timer10
-rw-r--r--scripts/tests/kb-hygiene.bats87
4 files changed, 185 insertions, 0 deletions
diff --git a/scripts/kb-hygiene.sh b/scripts/kb-hygiene.sh
new file mode 100755
index 0000000..0d42e2e
--- /dev/null
+++ b/scripts/kb-hygiene.sh
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+# kb-hygiene.sh — monthly hygiene report for the agent knowledge base (spec Phase 4).
+#
+# Inventories :agent:-tagged nodes, flags orphans (no [[id:]] link anywhere in
+# the KB points at them), duplicate agent titles, and stray sync-conflict
+# files. Writes an org report into the rulesets inbox; the next session
+# processes it through the normal inbox flow and proposes deletions to Craig.
+# The script only reads the KB — it never deletes anything itself.
+#
+# Run by the roam-hygiene systemd user timer (scripts/systemd/) monthly.
+#
+# Usage: kb-hygiene.sh [kb-path] [report-dir]
+# defaults: ~/org/roam ~/code/rulesets/inbox
+
+set -euo pipefail
+
+kb="${1:-$HOME/org/roam}"
+outdir="${2:-$HOME/code/rulesets/inbox}"
+
+if [ ! -d "$kb" ]; then
+ echo "kb-hygiene: no KB at $kb" >&2
+ exit 1
+fi
+mkdir -p "$outdir"
+
+report="$outdir/$(date +%Y-%m-%d-%H%M)-kb-hygiene-report.org"
+
+# All agent nodes: files carrying the :agent: filetag.
+mapfile -t agent_files < <(grep -rlE '^#\+filetags:.*:agent:' "$kb" --include='*.org' 2>/dev/null | grep -v 'sync-conflict' | sort)
+
+total_org=$(find "$kb" -name '*.org' ! -name '*sync-conflict*' | wc -l)
+conflicts=$(find "$kb" -name '*sync-conflict*' | wc -l)
+
+# Orphans: agent nodes whose :ID: no other file references via id: link.
+orphans=()
+for f in "${agent_files[@]}"; do
+ id=$(awk '/^:ID:/ {print $2; exit}' "$f")
+ [ -z "$id" ] && continue
+ if ! grep -rlF "id:$id" "$kb" --include='*.org' 2>/dev/null | grep -v "^$f$" | grep -qv 'sync-conflict'; then
+ orphans+=("$f")
+ fi
+done
+
+# Duplicate titles among agent nodes.
+dupes=$(for f in "${agent_files[@]}"; do
+ awk -F'#\\+title: *' '/^#\+title:/ {print $2; exit}' "$f"
+done | sort | uniq -d)
+
+{
+ echo "#+TITLE: KB hygiene report — $(date '+%Y-%m-%d')"
+ echo "#+DATE: $(date '+%Y-%m-%d')"
+ echo
+ echo "* Counts"
+ echo "- Agent nodes: ${#agent_files[@]}"
+ echo "- Total KB org files: $total_org"
+ echo "- Conflict files: $conflicts"
+ echo
+ echo "* Orphans (no [[id:]] link points at them — review for pruning or linking)"
+ if [ "${#orphans[@]}" -eq 0 ]; then
+ echo "- none"
+ else
+ for f in "${orphans[@]}"; do
+ echo "- ${f#"$kb"/}"
+ done
+ fi
+ echo
+ echo "* Duplicate agent titles"
+ if [ -z "$dupes" ]; then
+ echo "- none"
+ else
+ # shellcheck disable=SC2001 # multiline per-line prefix; ${var//} can't do anchored-per-line
+ echo "$dupes" | sed 's/^/- duplicate: /'
+ fi
+ echo
+ echo "* Disposition"
+ echo "Propose deletions or merges to Craig per the inbox flow; auto-cleanup is allowed only for :agent:-tagged nodes after approval."
+} > "$report"
+
+echo "kb-hygiene: report written to $report"
diff --git a/scripts/systemd/roam-hygiene.service b/scripts/systemd/roam-hygiene.service
new file mode 100644
index 0000000..4966290
--- /dev/null
+++ b/scripts/systemd/roam-hygiene.service
@@ -0,0 +1,9 @@
+# Monthly agent-KB hygiene report (spec Phase 4). Install:
+# cp scripts/systemd/roam-hygiene.* ~/.config/systemd/user/
+# systemctl --user daemon-reload && systemctl --user enable --now roam-hygiene.timer
+[Unit]
+Description=Monthly hygiene report for the agent knowledge base
+
+[Service]
+Type=oneshot
+ExecStart=%h/code/rulesets/scripts/kb-hygiene.sh %h/org/roam %h/code/rulesets/inbox
diff --git a/scripts/systemd/roam-hygiene.timer b/scripts/systemd/roam-hygiene.timer
new file mode 100644
index 0000000..bef56d1
--- /dev/null
+++ b/scripts/systemd/roam-hygiene.timer
@@ -0,0 +1,10 @@
+[Unit]
+Description=Run the agent-KB hygiene report monthly
+
+[Timer]
+OnCalendar=monthly
+Persistent=true
+RandomizedDelaySec=1h
+
+[Install]
+WantedBy=timers.target
diff --git a/scripts/tests/kb-hygiene.bats b/scripts/tests/kb-hygiene.bats
new file mode 100644
index 0000000..a9f9e58
--- /dev/null
+++ b/scripts/tests/kb-hygiene.bats
@@ -0,0 +1,87 @@
+#!/usr/bin/env bats
+# Tests for scripts/kb-hygiene.sh — the monthly agent-KB hygiene report.
+
+setup() {
+ SCRIPT="$BATS_TEST_DIRNAME/../kb-hygiene.sh"
+ KB="$BATS_TEST_TMPDIR/kb"
+ OUT="$BATS_TEST_TMPDIR/inbox"
+ mkdir -p "$KB/agents" "$OUT"
+
+ # A hand-authored node that links to agent node AAA.
+ cat > "$KB/20240101000000-craig-note.org" << 'EOF'
+:PROPERTIES:
+:ID: craig-1111
+:END:
+#+title: Craig's note
+
+See [[id:agent-aaa][the agent fact]].
+EOF
+
+ # Agent node AAA — linked from Craig's note (not an orphan).
+ cat > "$KB/agents/20250101000000-linked-fact.org" << 'EOF'
+:PROPERTIES:
+:ID: agent-aaa
+:END:
+#+title: Linked fact
+#+filetags: :agent:reference:
+
+A fact someone links to.
+EOF
+
+ # Agent node BBB — nothing links to it (orphan).
+ cat > "$KB/agents/20250102000000-orphan-fact.org" << 'EOF'
+:PROPERTIES:
+:ID: agent-bbb
+:END:
+#+title: Orphan fact
+#+filetags: :agent:reference:
+
+Nobody links here.
+EOF
+}
+
+@test "missing KB path: exits nonzero with a message" {
+ run "$SCRIPT" "$BATS_TEST_TMPDIR/nope" "$OUT"
+ [ "$status" -ne 0 ]
+ [[ "$output" == *"no KB"* ]]
+}
+
+@test "writes a dated report file into the report dir" {
+ run "$SCRIPT" "$KB" "$OUT"
+ [ "$status" -eq 0 ]
+ ls "$OUT" | grep -q "kb-hygiene-report.org"
+}
+
+@test "counts agent nodes correctly" {
+ run "$SCRIPT" "$KB" "$OUT"
+ report=$(ls "$OUT"/*kb-hygiene-report.org)
+ grep -q "Agent nodes: 2" "$report"
+}
+
+@test "flags the orphan and not the linked node" {
+ run "$SCRIPT" "$KB" "$OUT"
+ report=$(ls "$OUT"/*kb-hygiene-report.org)
+ grep -q "orphan-fact" "$report"
+ ! grep -A2 "Orphans" "$report" | grep -q "linked-fact"
+}
+
+@test "flags duplicate agent titles" {
+ cat > "$KB/agents/20250103000000-dupe.org" << 'EOF'
+:PROPERTIES:
+:ID: agent-ccc
+:END:
+#+title: Orphan fact
+#+filetags: :agent:reference:
+EOF
+ run "$SCRIPT" "$KB" "$OUT"
+ report=$(ls "$OUT"/*kb-hygiene-report.org)
+ grep -qi "duplicate" "$report"
+ grep -c "Orphan fact" "$report" | grep -qv '^0$'
+}
+
+@test "reports sync-conflict file count" {
+ touch "$KB/junk.sync-conflict-20260101-000000-XXXX.org"
+ run "$SCRIPT" "$KB" "$OUT"
+ report=$(ls "$OUT"/*kb-hygiene-report.org)
+ grep -q "Conflict files: 1" "$report"
+}