aboutsummaryrefslogtreecommitdiff
path: root/scripts/kb-hygiene.sh
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/kb-hygiene.sh')
-rwxr-xr-xscripts/kb-hygiene.sh79
1 files changed, 79 insertions, 0 deletions
diff --git a/scripts/kb-hygiene.sh b/scripts/kb-hygiene.sh
new file mode 100755
index 0000000..0d42e2e
--- /dev/null
+++ b/scripts/kb-hygiene.sh
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+# kb-hygiene.sh — monthly hygiene report for the agent knowledge base (spec Phase 4).
+#
+# Inventories :agent:-tagged nodes, flags orphans (no [[id:]] link anywhere in
+# the KB points at them), duplicate agent titles, and stray sync-conflict
+# files. Writes an org report into the rulesets inbox; the next session
+# processes it through the normal inbox flow and proposes deletions to Craig.
+# The script only reads the KB — it never deletes anything itself.
+#
+# Run by the roam-hygiene systemd user timer (scripts/systemd/) monthly.
+#
+# Usage: kb-hygiene.sh [kb-path] [report-dir]
+# defaults: ~/org/roam ~/code/rulesets/inbox
+
+set -euo pipefail
+
+kb="${1:-$HOME/org/roam}"
+outdir="${2:-$HOME/code/rulesets/inbox}"
+
+if [ ! -d "$kb" ]; then
+ echo "kb-hygiene: no KB at $kb" >&2
+ exit 1
+fi
+mkdir -p "$outdir"
+
+report="$outdir/$(date +%Y-%m-%d-%H%M)-kb-hygiene-report.org"
+
+# All agent nodes: files carrying the :agent: filetag.
+mapfile -t agent_files < <(grep -rlE '^#\+filetags:.*:agent:' "$kb" --include='*.org' 2>/dev/null | grep -v 'sync-conflict' | sort)
+
+total_org=$(find "$kb" -name '*.org' ! -name '*sync-conflict*' | wc -l)
+conflicts=$(find "$kb" -name '*sync-conflict*' | wc -l)
+
+# Orphans: agent nodes whose :ID: no other file references via id: link.
+orphans=()
+for f in "${agent_files[@]}"; do
+ id=$(awk '/^:ID:/ {print $2; exit}' "$f")
+ [ -z "$id" ] && continue
+ if ! grep -rlF "id:$id" "$kb" --include='*.org' 2>/dev/null | grep -v "^$f$" | grep -qv 'sync-conflict'; then
+ orphans+=("$f")
+ fi
+done
+
+# Duplicate titles among agent nodes.
+dupes=$(for f in "${agent_files[@]}"; do
+ awk -F'#\\+title: *' '/^#\+title:/ {print $2; exit}' "$f"
+done | sort | uniq -d)
+
+{
+ echo "#+TITLE: KB hygiene report — $(date '+%Y-%m-%d')"
+ echo "#+DATE: $(date '+%Y-%m-%d')"
+ echo
+ echo "* Counts"
+ echo "- Agent nodes: ${#agent_files[@]}"
+ echo "- Total KB org files: $total_org"
+ echo "- Conflict files: $conflicts"
+ echo
+ echo "* Orphans (no [[id:]] link points at them — review for pruning or linking)"
+ if [ "${#orphans[@]}" -eq 0 ]; then
+ echo "- none"
+ else
+ for f in "${orphans[@]}"; do
+ echo "- ${f#"$kb"/}"
+ done
+ fi
+ echo
+ echo "* Duplicate agent titles"
+ if [ -z "$dupes" ]; then
+ echo "- none"
+ else
+ # shellcheck disable=SC2001 # multiline per-line prefix; ${var//} can't do anchored-per-line
+ echo "$dupes" | sed 's/^/- duplicate: /'
+ fi
+ echo
+ echo "* Disposition"
+ echo "Propose deletions or merges to Craig per the inbox flow; auto-cleanup is allowed only for :agent:-tagged nodes after approval."
+} > "$report"
+
+echo "kb-hygiene: report written to $report"