From b0140951ebe0f0c2d33a868a2d1cda2eafd29044 Mon Sep 17 00:00:00 2001 From: Craig Jennings Date: Wed, 10 Jun 2026 18:21:15 -0500 Subject: feat(kb): monthly hygiene report for agent KB nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 4 of the agent KB spec. kb-hygiene.sh inventories :agent: nodes, flags orphans (no id: link anywhere in the KB points at them), duplicate titles, and stray conflict files, then writes an org report into the rulesets inbox for the normal inbox flow to propose dispositions. Read-only by design — it never deletes. A monthly systemd user timer (Persistent=true) runs it; bats covers the counts, orphan detection, duplicates, conflict tally, and the missing-KB error path. --- scripts/kb-hygiene.sh | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100755 scripts/kb-hygiene.sh (limited to 'scripts/kb-hygiene.sh') diff --git a/scripts/kb-hygiene.sh b/scripts/kb-hygiene.sh new file mode 100755 index 0000000..0d42e2e --- /dev/null +++ b/scripts/kb-hygiene.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# kb-hygiene.sh — monthly hygiene report for the agent knowledge base (spec Phase 4). +# +# Inventories :agent:-tagged nodes, flags orphans (no [[id:]] link anywhere in +# the KB points at them), duplicate agent titles, and stray sync-conflict +# files. Writes an org report into the rulesets inbox; the next session +# processes it through the normal inbox flow and proposes deletions to Craig. +# The script only reads the KB — it never deletes anything itself. +# +# Run by the roam-hygiene systemd user timer (scripts/systemd/) monthly. +# +# Usage: kb-hygiene.sh [kb-path] [report-dir] +# defaults: ~/org/roam ~/code/rulesets/inbox + +set -euo pipefail + +kb="${1:-$HOME/org/roam}" +outdir="${2:-$HOME/code/rulesets/inbox}" + +if [ ! -d "$kb" ]; then + echo "kb-hygiene: no KB at $kb" >&2 + exit 1 +fi +mkdir -p "$outdir" + +report="$outdir/$(date +%Y-%m-%d-%H%M)-kb-hygiene-report.org" + +# All agent nodes: files carrying the :agent: filetag. +mapfile -t agent_files < <(grep -rlE '^#\+filetags:.*:agent:' "$kb" --include='*.org' 2>/dev/null | grep -v 'sync-conflict' | sort) + +total_org=$(find "$kb" -name '*.org' ! -name '*sync-conflict*' | wc -l) +conflicts=$(find "$kb" -name '*sync-conflict*' | wc -l) + +# Orphans: agent nodes whose :ID: no other file references via id: link. +orphans=() +for f in "${agent_files[@]}"; do + id=$(awk '/^:ID:/ {print $2; exit}' "$f") + [ -z "$id" ] && continue + if ! grep -rlF "id:$id" "$kb" --include='*.org' 2>/dev/null | grep -v "^$f$" | grep -qv 'sync-conflict'; then + orphans+=("$f") + fi +done + +# Duplicate titles among agent nodes. +dupes=$(for f in "${agent_files[@]}"; do + awk -F'#\\+title: *' '/^#\+title:/ {print $2; exit}' "$f" +done | sort | uniq -d) + +{ + echo "#+TITLE: KB hygiene report — $(date '+%Y-%m-%d')" + echo "#+DATE: $(date '+%Y-%m-%d')" + echo + echo "* Counts" + echo "- Agent nodes: ${#agent_files[@]}" + echo "- Total KB org files: $total_org" + echo "- Conflict files: $conflicts" + echo + echo "* Orphans (no [[id:]] link points at them — review for pruning or linking)" + if [ "${#orphans[@]}" -eq 0 ]; then + echo "- none" + else + for f in "${orphans[@]}"; do + echo "- ${f#"$kb"/}" + done + fi + echo + echo "* Duplicate agent titles" + if [ -z "$dupes" ]; then + echo "- none" + else + # shellcheck disable=SC2001 # multiline per-line prefix; ${var//} can't do anchored-per-line + echo "$dupes" | sed 's/^/- duplicate: /' + fi + echo + echo "* Disposition" + echo "Propose deletions or merges to Craig per the inbox flow; auto-cleanup is allowed only for :agent:-tagged nodes after approval." +} > "$report" + +echo "kb-hygiene: report written to $report" -- cgit v1.2.3