diff options
Diffstat (limited to 'scripts/kb-hygiene.sh')
| -rwxr-xr-x | scripts/kb-hygiene.sh | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/scripts/kb-hygiene.sh b/scripts/kb-hygiene.sh new file mode 100755 index 0000000..0d42e2e --- /dev/null +++ b/scripts/kb-hygiene.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# kb-hygiene.sh — monthly hygiene report for the agent knowledge base (spec Phase 4). +# +# Inventories :agent:-tagged nodes, flags orphans (no [[id:]] link anywhere in +# the KB points at them), duplicate agent titles, and stray sync-conflict +# files. Writes an org report into the rulesets inbox; the next session +# processes it through the normal inbox flow and proposes deletions to Craig. +# The script only reads the KB — it never deletes anything itself. +# +# Run by the roam-hygiene systemd user timer (scripts/systemd/) monthly. +# +# Usage: kb-hygiene.sh [kb-path] [report-dir] +# defaults: ~/org/roam ~/code/rulesets/inbox + +set -euo pipefail + +kb="${1:-$HOME/org/roam}" +outdir="${2:-$HOME/code/rulesets/inbox}" + +if [ ! -d "$kb" ]; then + echo "kb-hygiene: no KB at $kb" >&2 + exit 1 +fi +mkdir -p "$outdir" + +report="$outdir/$(date +%Y-%m-%d-%H%M)-kb-hygiene-report.org" + +# All agent nodes: files carrying the :agent: filetag. +mapfile -t agent_files < <(grep -rlE '^#\+filetags:.*:agent:' "$kb" --include='*.org' 2>/dev/null | grep -v 'sync-conflict' | sort) + +total_org=$(find "$kb" -name '*.org' ! -name '*sync-conflict*' | wc -l) +conflicts=$(find "$kb" -name '*sync-conflict*' | wc -l) + +# Orphans: agent nodes whose :ID: no other file references via id: link. +orphans=() +for f in "${agent_files[@]}"; do + id=$(awk '/^:ID:/ {print $2; exit}' "$f") + [ -z "$id" ] && continue + if ! grep -rlF "id:$id" "$kb" --include='*.org' 2>/dev/null | grep -v "^$f$" | grep -qv 'sync-conflict'; then + orphans+=("$f") + fi +done + +# Duplicate titles among agent nodes. +dupes=$(for f in "${agent_files[@]}"; do + awk -F'#\\+title: *' '/^#\+title:/ {print $2; exit}' "$f" +done | sort | uniq -d) + +{ + echo "#+TITLE: KB hygiene report — $(date '+%Y-%m-%d')" + echo "#+DATE: $(date '+%Y-%m-%d')" + echo + echo "* Counts" + echo "- Agent nodes: ${#agent_files[@]}" + echo "- Total KB org files: $total_org" + echo "- Conflict files: $conflicts" + echo + echo "* Orphans (no [[id:]] link points at them — review for pruning or linking)" + if [ "${#orphans[@]}" -eq 0 ]; then + echo "- none" + else + for f in "${orphans[@]}"; do + echo "- ${f#"$kb"/}" + done + fi + echo + echo "* Duplicate agent titles" + if [ -z "$dupes" ]; then + echo "- none" + else + # shellcheck disable=SC2001 # multiline per-line prefix; ${var//} can't do anchored-per-line + echo "$dupes" | sed 's/^/- duplicate: /' + fi + echo + echo "* Disposition" + echo "Propose deletions or merges to Craig per the inbox flow; auto-cleanup is allowed only for :agent:-tagged nodes after approval." +} > "$report" + +echo "kb-hygiene: report written to $report" |
