aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xscripts/kb-hygiene.sh79
-rw-r--r--scripts/systemd/roam-hygiene.service9
-rw-r--r--scripts/systemd/roam-hygiene.timer10
-rw-r--r--scripts/tests/kb-hygiene.bats87
4 files changed, 185 insertions, 0 deletions
diff --git a/scripts/kb-hygiene.sh b/scripts/kb-hygiene.sh
new file mode 100755
index 0000000..0d42e2e
--- /dev/null
+++ b/scripts/kb-hygiene.sh
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+# kb-hygiene.sh — monthly hygiene report for the agent knowledge base (spec Phase 4).
+#
+# Inventories :agent:-tagged nodes, flags orphans (no [[id:]] link anywhere in
+# the KB points at them), duplicate agent titles, and stray sync-conflict
+# files. Writes an org report into the rulesets inbox; the next session
+# processes it through the normal inbox flow and proposes deletions to Craig.
+# The script only reads the KB — it never deletes anything itself.
+#
+# Run by the roam-hygiene systemd user timer (scripts/systemd/) monthly.
+#
+# Usage: kb-hygiene.sh [kb-path] [report-dir]
+# defaults: ~/org/roam ~/code/rulesets/inbox
+
+set -euo pipefail
+
+kb="${1:-$HOME/org/roam}"
+outdir="${2:-$HOME/code/rulesets/inbox}"
+
+if [ ! -d "$kb" ]; then
+ echo "kb-hygiene: no KB at $kb" >&2
+ exit 1
+fi
+mkdir -p "$outdir"
+
+report="$outdir/$(date +%Y-%m-%d-%H%M)-kb-hygiene-report.org"
+
+# All agent nodes: files carrying the :agent: filetag.
+mapfile -t agent_files < <(grep -rlE '^#\+filetags:.*:agent:' "$kb" --include='*.org' 2>/dev/null | grep -v 'sync-conflict' | sort)
+
+total_org=$(find "$kb" -name '*.org' ! -name '*sync-conflict*' | wc -l)
+conflicts=$(find "$kb" -name '*sync-conflict*' | wc -l)
+
+# Orphans: agent nodes whose :ID: no other file references via id: link.
+orphans=()
+for f in "${agent_files[@]}"; do
+ id=$(awk '/^:ID:/ {print $2; exit}' "$f")
+ [ -z "$id" ] && continue
+ if ! grep -rlF "id:$id" "$kb" --include='*.org' 2>/dev/null | grep -v "^$f$" | grep -qv 'sync-conflict'; then
+ orphans+=("$f")
+ fi
+done
+
+# Duplicate titles among agent nodes.
+dupes=$(for f in "${agent_files[@]}"; do
+ awk -F'#\\+title: *' '/^#\+title:/ {print $2; exit}' "$f"
+done | sort | uniq -d)
+
+{
+ echo "#+TITLE: KB hygiene report — $(date '+%Y-%m-%d')"
+ echo "#+DATE: $(date '+%Y-%m-%d')"
+ echo
+ echo "* Counts"
+ echo "- Agent nodes: ${#agent_files[@]}"
+ echo "- Total KB org files: $total_org"
+ echo "- Conflict files: $conflicts"
+ echo
+ echo "* Orphans (no [[id:]] link points at them — review for pruning or linking)"
+ if [ "${#orphans[@]}" -eq 0 ]; then
+ echo "- none"
+ else
+ for f in "${orphans[@]}"; do
+ echo "- ${f#"$kb"/}"
+ done
+ fi
+ echo
+ echo "* Duplicate agent titles"
+ if [ -z "$dupes" ]; then
+ echo "- none"
+ else
+ # shellcheck disable=SC2001 # multiline per-line prefix; ${var//} can't do anchored-per-line
+ echo "$dupes" | sed 's/^/- duplicate: /'
+ fi
+ echo
+ echo "* Disposition"
+ echo "Propose deletions or merges to Craig per the inbox flow; auto-cleanup is allowed only for :agent:-tagged nodes after approval."
+} > "$report"
+
+echo "kb-hygiene: report written to $report"
diff --git a/scripts/systemd/roam-hygiene.service b/scripts/systemd/roam-hygiene.service
new file mode 100644
index 0000000..4966290
--- /dev/null
+++ b/scripts/systemd/roam-hygiene.service
@@ -0,0 +1,9 @@
+# Monthly agent-KB hygiene report (spec Phase 4). Install:
+# cp scripts/systemd/roam-hygiene.* ~/.config/systemd/user/
+# systemctl --user daemon-reload && systemctl --user enable --now roam-hygiene.timer
+[Unit]
+Description=Monthly hygiene report for the agent knowledge base
+
+[Service]
+Type=oneshot
+ExecStart=%h/code/rulesets/scripts/kb-hygiene.sh %h/org/roam %h/code/rulesets/inbox
diff --git a/scripts/systemd/roam-hygiene.timer b/scripts/systemd/roam-hygiene.timer
new file mode 100644
index 0000000..bef56d1
--- /dev/null
+++ b/scripts/systemd/roam-hygiene.timer
@@ -0,0 +1,10 @@
+[Unit]
+Description=Run the agent-KB hygiene report monthly
+
+[Timer]
+OnCalendar=monthly
+Persistent=true
+RandomizedDelaySec=1h
+
+[Install]
+WantedBy=timers.target
diff --git a/scripts/tests/kb-hygiene.bats b/scripts/tests/kb-hygiene.bats
new file mode 100644
index 0000000..a9f9e58
--- /dev/null
+++ b/scripts/tests/kb-hygiene.bats
@@ -0,0 +1,87 @@
+#!/usr/bin/env bats
+# Tests for scripts/kb-hygiene.sh — the monthly agent-KB hygiene report.
+
+setup() {
+ SCRIPT="$BATS_TEST_DIRNAME/../kb-hygiene.sh"
+ KB="$BATS_TEST_TMPDIR/kb"
+ OUT="$BATS_TEST_TMPDIR/inbox"
+ mkdir -p "$KB/agents" "$OUT"
+
+ # A hand-authored node that links to agent node AAA.
+ cat > "$KB/20240101000000-craig-note.org" << 'EOF'
+:PROPERTIES:
+:ID: craig-1111
+:END:
+#+title: Craig's note
+
+See [[id:agent-aaa][the agent fact]].
+EOF
+
+ # Agent node AAA — linked from Craig's note (not an orphan).
+ cat > "$KB/agents/20250101000000-linked-fact.org" << 'EOF'
+:PROPERTIES:
+:ID: agent-aaa
+:END:
+#+title: Linked fact
+#+filetags: :agent:reference:
+
+A fact someone links to.
+EOF
+
+ # Agent node BBB — nothing links to it (orphan).
+ cat > "$KB/agents/20250102000000-orphan-fact.org" << 'EOF'
+:PROPERTIES:
+:ID: agent-bbb
+:END:
+#+title: Orphan fact
+#+filetags: :agent:reference:
+
+Nobody links here.
+EOF
+}
+
+@test "missing KB path: exits nonzero with a message" {
+ run "$SCRIPT" "$BATS_TEST_TMPDIR/nope" "$OUT"
+ [ "$status" -ne 0 ]
+ [[ "$output" == *"no KB"* ]]
+}
+
+@test "writes a dated report file into the report dir" {
+ run "$SCRIPT" "$KB" "$OUT"
+ [ "$status" -eq 0 ]
+ ls "$OUT" | grep -q "kb-hygiene-report.org"
+}
+
+@test "counts agent nodes correctly" {
+ run "$SCRIPT" "$KB" "$OUT"
+ report=$(ls "$OUT"/*kb-hygiene-report.org)
+ grep -q "Agent nodes: 2" "$report"
+}
+
+@test "flags the orphan and not the linked node" {
+ run "$SCRIPT" "$KB" "$OUT"
+ report=$(ls "$OUT"/*kb-hygiene-report.org)
+ grep -q "orphan-fact" "$report"
+ ! grep -A2 "Orphans" "$report" | grep -q "linked-fact"
+}
+
+@test "flags duplicate agent titles" {
+ cat > "$KB/agents/20250103000000-dupe.org" << 'EOF'
+:PROPERTIES:
+:ID: agent-ccc
+:END:
+#+title: Orphan fact
+#+filetags: :agent:reference:
+EOF
+ run "$SCRIPT" "$KB" "$OUT"
+ report=$(ls "$OUT"/*kb-hygiene-report.org)
+ grep -qi "duplicate" "$report"
+ grep -c "Orphan fact" "$report" | grep -qv '^0$'
+}
+
+@test "reports sync-conflict file count" {
+ touch "$KB/junk.sync-conflict-20260101-000000-XXXX.org"
+ run "$SCRIPT" "$KB" "$OUT"
+ report=$(ls "$OUT"/*kb-hygiene-report.org)
+ grep -q "Conflict files: 1" "$report"
+}