#!/usr/bin/env bash # kb-hygiene.sh — monthly hygiene report for the agent knowledge base (spec Phase 4). # # Inventories :agent:-tagged nodes, flags orphans (no [[id:]] link anywhere in # the KB points at them), duplicate agent titles, and stray sync-conflict # files. Writes an org report into the rulesets inbox; the next session # processes it through the normal inbox flow and proposes deletions to Craig. # The script only reads the KB — it never deletes anything itself. # # Run by the roam-hygiene systemd user timer (scripts/systemd/) monthly. # # Usage: kb-hygiene.sh [kb-path] [report-dir] # defaults: ~/org/roam ~/code/rulesets/inbox set -euo pipefail kb="${1:-$HOME/org/roam}" outdir="${2:-$HOME/code/rulesets/inbox}" if [ ! -d "$kb" ]; then echo "kb-hygiene: no KB at $kb" >&2 exit 1 fi mkdir -p "$outdir" report="$outdir/$(date +%Y-%m-%d-%H%M)-kb-hygiene-report.org" # All agent nodes: files carrying the :agent: filetag. mapfile -t agent_files < <(grep -rlE '^#\+filetags:.*:agent:' "$kb" --include='*.org' 2>/dev/null | grep -v 'sync-conflict' | sort) total_org=$(find "$kb" -name '*.org' ! -name '*sync-conflict*' | wc -l) conflicts=$(find "$kb" -name '*sync-conflict*' | wc -l) # Orphans: agent nodes whose :ID: no other file references via id: link. orphans=() for f in "${agent_files[@]}"; do id=$(awk '/^:ID:/ {print $2; exit}' "$f") [ -z "$id" ] && continue if ! grep -rlF "id:$id" "$kb" --include='*.org' 2>/dev/null | grep -v "^$f$" | grep -qv 'sync-conflict'; then orphans+=("$f") fi done # Duplicate titles among agent nodes. dupes=$(for f in "${agent_files[@]}"; do awk -F'#\\+title: *' '/^#\+title:/ {print $2; exit}' "$f" done | sort | uniq -d) { echo "#+TITLE: KB hygiene report — $(date '+%Y-%m-%d')" echo "#+DATE: $(date '+%Y-%m-%d')" echo echo "* Counts" echo "- Agent nodes: ${#agent_files[@]}" echo "- Total KB org files: $total_org" echo "- Conflict files: $conflicts" echo echo "* Orphans (no [[id:]] link points at them — review for pruning or linking)" if [ "${#orphans[@]}" -eq 0 ]; then echo "- none" else for f in "${orphans[@]}"; do echo "- ${f#"$kb"/}" done fi echo echo "* Duplicate agent titles" if [ -z "$dupes" ]; then echo "- none" else # shellcheck disable=SC2001 # multiline per-line prefix; ${var//} can't do anchored-per-line echo "$dupes" | sed 's/^/- duplicate: /' fi echo echo "* Disposition" echo "Propose deletions or merges to Craig per the inbox flow; auto-cleanup is allowed only for :agent:-tagged nodes after approval." } > "$report" echo "kb-hygiene: report written to $report"