5 files changed, 465 insertions, 1 deletions
diff --git a/.ai/scripts/task-review-staleness.sh b/.ai/scripts/task-review-staleness.sh
new file mode 100755
index 0000000..b52cd3d
--- /dev/null
+++ b/.ai/scripts/task-review-staleness.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+#
+# task-review-staleness.sh — count top-level todo.org tasks whose review
+# has gone stale.
+#
+# Usage: task-review-staleness.sh <todo-file> <threshold-days>
+#
+# Prints a single integer to stdout: the number of qualifying tasks that
+# are stale. Shared by the wrap-up health check (threshold 30) and the
+# startup reminder (threshold 7) so both count the same way.
+#
+# A qualifying task is a depth-2 (**) heading carrying a TODO/DOING/VERIFY
+# keyword and an [#A]/[#B]/[#C] priority cookie. DONE/CANCELLED tasks,
+# deeper headings, and cookie-less headings are not review units.
+#
+# A qualifying task is stale when its :LAST_REVIEWED: property is missing
+# or unparseable (NIL sorts oldest), or when its age strictly exceeds the
+# threshold (age > N days; age == N is still fresh).
+
+set -euo pipefail
+
+die() { echo "task-review-staleness: $*" >&2; exit 2; }
+
+[ "$#" -eq 2 ] || die "usage: $(basename "$0") <todo-file> <threshold-days>"
+
+todo_file="$1"
+threshold="$2"
+
+[ -f "$todo_file" ] || die "no such file: $todo_file"
+[[ "$threshold" =~ ^[0-9]+$ ]] || die "threshold must be a non-negative integer: $threshold"
+
+# Emit one line per qualifying top-level task: its LAST_REVIEWED value, or
+# "NONE" when the task has no such property. Body prose is ignored; only
+# the property drawer between a qualifying heading and the next heading is
+# scanned.
+extract_review_dates() {
+  awk '
+    function flush() {
+      if (in_task) print (have_lr ? lr : "NONE")
+    }
+    /^\*+ / {
+      flush()
+      have_lr = 0; lr = ""
+      in_task = ($0 ~ /^\*\* (TODO|DOING|VERIFY) \[#[ABC]\]/)
+      next
+    }
+    in_task && /^[ \t]*:LAST_REVIEWED:[ \t]*/ {
+      line = $0
+      sub(/^[ \t]*:LAST_REVIEWED:[ \t]*/, "", line)
+      sub(/[ \t]*$/, "", line)
+      lr = line; have_lr = 1
+      next
+    }
+    END { flush() }
+  ' "$todo_file"
+}
+
+# Normalize "today" to local midnight so a task reviewed exactly N days
+# ago measures as N, not N-and-a-fraction. LAST_REVIEWED dates parse to
+# midnight already, so both ends of the diff sit on day boundaries.
+today_epoch=$(date -d "$(date +%F)" +%s)
+count=0
+
+while IFS= read -r value; do
+  if [ "$value" = "NONE" ]; then
+    count=$((count + 1))
+    continue
+  fi
+
+  # Unparseable date → treat as NIL (stale).
+  if ! rev_epoch=$(date -d "$value" +%s 2>/dev/null); then
+    count=$((count + 1))
+    continue
+  fi
+
+  # Round to nearest day so a DST hour can't shift a boundary task.
+  age_days=$(( (today_epoch - rev_epoch + 43200) / 86400 ))
+  if [ "$age_days" -gt "$threshold" ]; then
+    count=$((count + 1))
+  fi
+done < <(extract_review_dates)
+
+echo "$count"
diff --git a/.ai/scripts/tests/task-review-staleness.bats b/.ai/scripts/tests/task-review-staleness.bats
new file mode 100644
index 0000000..abb7585
--- /dev/null
+++ b/.ai/scripts/tests/task-review-staleness.bats
@@ -0,0 +1,149 @@
+#!/usr/bin/env bats
+#
+# Tests for claude-templates/.ai/scripts/task-review-staleness.sh —
+# counts top-level todo.org tasks whose review has gone stale.
+#
+# Strategy: write a synthetic todo.org into a temp dir per test, with
+# LAST_REVIEWED dates generated relative to the real `date` (never
+# hardcoded). Run the real script against it and assert the count it
+# prints on stdout.
+#
+# Staleness rule under test:
+#   - A qualifying task is a depth-2 (**) heading with a TODO/DOING/VERIFY
+#     keyword and an [#A]/[#B]/[#C] priority cookie.
+#   - It is stale when LAST_REVIEWED is missing/malformed (NIL → oldest),
+#     or when its age strictly exceeds the threshold (age > N days).
+#   - age == N exactly is fresh (the spec's wording is ">N days").
+
+# The script under test is always the sibling-of-parent of this test file
+# (scripts/task-review-staleness.sh next to scripts/tests/). This holds in
+# both the canonical claude-templates/ tree and the rsync'd project mirror,
+# so the suite runs identically from either location.
+SCRIPT="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)/task-review-staleness.sh"
+
+setup() {
+  TEST_DIR="$(mktemp -d -t task-review-bats.XXXXXX)"
+  TODO="$TEST_DIR/todo.org"
+
+  TODAY="$(date +%F)"
+  D5="$(date -d '5 days ago' +%F)"
+  D30="$(date -d '30 days ago' +%F)"
+  D31="$(date -d '31 days ago' +%F)"
+  D40="$(date -d '40 days ago' +%F)"
+}
+
+teardown() {
+  rm -rf "$TEST_DIR"
+}
+
+# Emit a qualifying task with an explicit LAST_REVIEWED date.
+task_reviewed() {
+  local keyword="$1" prio="$2" title="$3" date="$4"
+  printf '** %s [#%s] %s\n:PROPERTIES:\n:LAST_REVIEWED: %s\n:END:\nBody.\n\n' \
+    "$keyword" "$prio" "$title" "$date" >> "$TODO"
+}
+
+# Emit a qualifying task with no PROPERTIES drawer at all.
+task_unreviewed() {
+  local keyword="$1" prio="$2" title="$3"
+  printf '** %s [#%s] %s\nBody.\n\n' "$keyword" "$prio" "$title" >> "$TODO"
+}
+
+# ---- Normal cases ----------------------------------------------------
+
+@test "staleness: empty file reports zero" {
+  : > "$TODO"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: all tasks fresh reports zero" {
+  task_reviewed TODO A "Fresh one" "$D5"
+  task_reviewed TODO B "Fresh two" "$D5"
+  task_reviewed DOING A "Fresh three" "$TODAY"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: all tasks stale reports full count" {
+  task_reviewed TODO A "Stale one" "$D40"
+  task_reviewed TODO B "Stale two" "$D40"
+  task_reviewed VERIFY C "Stale three" "$D40"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "3" ]
+}
+
+@test "staleness: mixed fresh, stale, and unreviewed counts only the latter two" {
+  task_reviewed TODO A "Fresh" "$D5"
+  task_reviewed TODO B "Stale" "$D40"
+  task_unreviewed DOING A "Never reviewed"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "2" ]
+}
+
+# ---- Boundary cases --------------------------------------------------
+
+@test "staleness: age exactly equal to threshold is fresh" {
+  task_reviewed TODO A "Exactly at cutoff" "$D30"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: age one day past threshold is stale" {
+  task_reviewed TODO A "One day over" "$D31"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+@test "staleness: unreviewed task (no drawer) counts as stale" {
+  task_unreviewed TODO A "Never reviewed"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+@test "staleness: threshold of 7 is softer than 30 on the same list" {
+  task_reviewed TODO A "Reviewed five days ago" "$D5"
+  task_reviewed TODO B "Reviewed thirty-one days ago" "$D31"
+  run bash "$SCRIPT" "$TODO" 7
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+# ---- Error / exclusion cases -----------------------------------------
+
+@test "staleness: DONE and CANCELLED tasks are excluded even when old" {
+  task_reviewed DONE A "Shipped long ago" "$D40"
+  task_reviewed CANCELLED B "Abandoned long ago" "$D40"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: deeper headings and cookie-less headings are excluded" {
+  # Depth-3 child with an old review date — not a review unit.
+  printf '*** TODO [#A] Child task\n:PROPERTIES:\n:LAST_REVIEWED: %s\n:END:\n\n' "$D40" >> "$TODO"
+  # Depth-2 but no priority cookie — not a review unit.
+  printf '** TODO Cookie-less task\nBody.\n\n' >> "$TODO"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: malformed LAST_REVIEWED is treated as stale" {
+  task_reviewed TODO A "Bad date" "not-a-date"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+@test "staleness: missing todo file exits non-zero" {
+  run bash "$SCRIPT" "$TEST_DIR/does-not-exist.org" 30
+  [ "$status" -ne 0 ]
+}
diff --git a/Makefile b/Makefile
index 34b6e56..8d433cf 100644
--- a/Makefile
+++ b/Makefile
@@ -423,7 +423,7 @@ test: ## Run all test suites (pytest + ERT + bats)
 		echo "ert: $$(basename "$$f")"; \
 		emacs --batch -q -L .ai/scripts -l ert -l "$$f" -f ert-run-tests-batch-and-exit; \
 	done
-	@set -e; for f in scripts/tests/*.bats; do \
+	@set -e; for f in scripts/tests/*.bats .ai/scripts/tests/*.bats; do \
 		[ -e "$$f" ] || continue; \
 		echo "bats: $$(basename "$$f")"; \
 		bats "$$f"; \
diff --git a/claude-templates/.ai/scripts/task-review-staleness.sh b/claude-templates/.ai/scripts/task-review-staleness.sh
new file mode 100755
index 0000000..b52cd3d
--- /dev/null
+++ b/claude-templates/.ai/scripts/task-review-staleness.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+#
+# task-review-staleness.sh — count top-level todo.org tasks whose review
+# has gone stale.
+#
+# Usage: task-review-staleness.sh <todo-file> <threshold-days>
+#
+# Prints a single integer to stdout: the number of qualifying tasks that
+# are stale. Shared by the wrap-up health check (threshold 30) and the
+# startup reminder (threshold 7) so both count the same way.
+#
+# A qualifying task is a depth-2 (**) heading carrying a TODO/DOING/VERIFY
+# keyword and an [#A]/[#B]/[#C] priority cookie. DONE/CANCELLED tasks,
+# deeper headings, and cookie-less headings are not review units.
+#
+# A qualifying task is stale when its :LAST_REVIEWED: property is missing
+# or unparseable (NIL sorts oldest), or when its age strictly exceeds the
+# threshold (age > N days; age == N is still fresh).
+
+set -euo pipefail
+
+die() { echo "task-review-staleness: $*" >&2; exit 2; }
+
+[ "$#" -eq 2 ] || die "usage: $(basename "$0") <todo-file> <threshold-days>"
+
+todo_file="$1"
+threshold="$2"
+
+[ -f "$todo_file" ] || die "no such file: $todo_file"
+[[ "$threshold" =~ ^[0-9]+$ ]] || die "threshold must be a non-negative integer: $threshold"
+
+# Emit one line per qualifying top-level task: its LAST_REVIEWED value, or
+# "NONE" when the task has no such property. Body prose is ignored; only
+# the property drawer between a qualifying heading and the next heading is
+# scanned.
+extract_review_dates() {
+  awk '
+    function flush() {
+      if (in_task) print (have_lr ? lr : "NONE")
+    }
+    /^\*+ / {
+      flush()
+      have_lr = 0; lr = ""
+      in_task = ($0 ~ /^\*\* (TODO|DOING|VERIFY) \[#[ABC]\]/)
+      next
+    }
+    in_task && /^[ \t]*:LAST_REVIEWED:[ \t]*/ {
+      line = $0
+      sub(/^[ \t]*:LAST_REVIEWED:[ \t]*/, "", line)
+      sub(/[ \t]*$/, "", line)
+      lr = line; have_lr = 1
+      next
+    }
+    END { flush() }
+  ' "$todo_file"
+}
+
+# Normalize "today" to local midnight so a task reviewed exactly N days
+# ago measures as N, not N-and-a-fraction. LAST_REVIEWED dates parse to
+# midnight already, so both ends of the diff sit on day boundaries.
+today_epoch=$(date -d "$(date +%F)" +%s)
+count=0
+
+while IFS= read -r value; do
+  if [ "$value" = "NONE" ]; then
+    count=$((count + 1))
+    continue
+  fi
+
+  # Unparseable date → treat as NIL (stale).
+  if ! rev_epoch=$(date -d "$value" +%s 2>/dev/null); then
+    count=$((count + 1))
+    continue
+  fi
+
+  # Round to nearest day so a DST hour can't shift a boundary task.
+  age_days=$(( (today_epoch - rev_epoch + 43200) / 86400 ))
+  if [ "$age_days" -gt "$threshold" ]; then
+    count=$((count + 1))
+  fi
+done < <(extract_review_dates)
+
+echo "$count"
diff --git a/claude-templates/.ai/scripts/tests/task-review-staleness.bats b/claude-templates/.ai/scripts/tests/task-review-staleness.bats
new file mode 100644
index 0000000..abb7585
--- /dev/null
+++ b/claude-templates/.ai/scripts/tests/task-review-staleness.bats
@@ -0,0 +1,149 @@
+#!/usr/bin/env bats
+#
+# Tests for claude-templates/.ai/scripts/task-review-staleness.sh —
+# counts top-level todo.org tasks whose review has gone stale.
+#
+# Strategy: write a synthetic todo.org into a temp dir per test, with
+# LAST_REVIEWED dates generated relative to the real `date` (never
+# hardcoded). Run the real script against it and assert the count it
+# prints on stdout.
+#
+# Staleness rule under test:
+#   - A qualifying task is a depth-2 (**) heading with a TODO/DOING/VERIFY
+#     keyword and an [#A]/[#B]/[#C] priority cookie.
+#   - It is stale when LAST_REVIEWED is missing/malformed (NIL → oldest),
+#     or when its age strictly exceeds the threshold (age > N days).
+#   - age == N exactly is fresh (the spec's wording is ">N days").
+
+# The script under test is always the sibling-of-parent of this test file
+# (scripts/task-review-staleness.sh next to scripts/tests/). This holds in
+# both the canonical claude-templates/ tree and the rsync'd project mirror,
+# so the suite runs identically from either location.
+SCRIPT="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)/task-review-staleness.sh"
+
+setup() {
+  TEST_DIR="$(mktemp -d -t task-review-bats.XXXXXX)"
+  TODO="$TEST_DIR/todo.org"
+
+  TODAY="$(date +%F)"
+  D5="$(date -d '5 days ago' +%F)"
+  D30="$(date -d '30 days ago' +%F)"
+  D31="$(date -d '31 days ago' +%F)"
+  D40="$(date -d '40 days ago' +%F)"
+}
+
+teardown() {
+  rm -rf "$TEST_DIR"
+}
+
+# Emit a qualifying task with an explicit LAST_REVIEWED date.
+task_reviewed() {
+  local keyword="$1" prio="$2" title="$3" date="$4"
+  printf '** %s [#%s] %s\n:PROPERTIES:\n:LAST_REVIEWED: %s\n:END:\nBody.\n\n' \
+    "$keyword" "$prio" "$title" "$date" >> "$TODO"
+}
+
+# Emit a qualifying task with no PROPERTIES drawer at all.
+task_unreviewed() {
+  local keyword="$1" prio="$2" title="$3"
+  printf '** %s [#%s] %s\nBody.\n\n' "$keyword" "$prio" "$title" >> "$TODO"
+}
+
+# ---- Normal cases ----------------------------------------------------
+
+@test "staleness: empty file reports zero" {
+  : > "$TODO"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: all tasks fresh reports zero" {
+  task_reviewed TODO A "Fresh one" "$D5"
+  task_reviewed TODO B "Fresh two" "$D5"
+  task_reviewed DOING A "Fresh three" "$TODAY"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: all tasks stale reports full count" {
+  task_reviewed TODO A "Stale one" "$D40"
+  task_reviewed TODO B "Stale two" "$D40"
+  task_reviewed VERIFY C "Stale three" "$D40"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "3" ]
+}
+
+@test "staleness: mixed fresh, stale, and unreviewed counts only the latter two" {
+  task_reviewed TODO A "Fresh" "$D5"
+  task_reviewed TODO B "Stale" "$D40"
+  task_unreviewed DOING A "Never reviewed"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "2" ]
+}
+
+# ---- Boundary cases --------------------------------------------------
+
+@test "staleness: age exactly equal to threshold is fresh" {
+  task_reviewed TODO A "Exactly at cutoff" "$D30"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: age one day past threshold is stale" {
+  task_reviewed TODO A "One day over" "$D31"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+@test "staleness: unreviewed task (no drawer) counts as stale" {
+  task_unreviewed TODO A "Never reviewed"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+@test "staleness: threshold of 7 is softer than 30 on the same list" {
+  task_reviewed TODO A "Reviewed five days ago" "$D5"
+  task_reviewed TODO B "Reviewed thirty-one days ago" "$D31"
+  run bash "$SCRIPT" "$TODO" 7
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+# ---- Error / exclusion cases -----------------------------------------
+
+@test "staleness: DONE and CANCELLED tasks are excluded even when old" {
+  task_reviewed DONE A "Shipped long ago" "$D40"
+  task_reviewed CANCELLED B "Abandoned long ago" "$D40"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: deeper headings and cookie-less headings are excluded" {
+  # Depth-3 child with an old review date — not a review unit.
+  printf '*** TODO [#A] Child task\n:PROPERTIES:\n:LAST_REVIEWED: %s\n:END:\n\n' "$D40" >> "$TODO"
+  # Depth-2 but no priority cookie — not a review unit.
+  printf '** TODO Cookie-less task\nBody.\n\n' >> "$TODO"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: malformed LAST_REVIEWED is treated as stale" {
+  task_reviewed TODO A "Bad date" "not-a-date"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+@test "staleness: missing todo file exits non-zero" {
+  run bash "$SCRIPT" "$TEST_DIR/does-not-exist.org" 30
+  [ "$status" -ne 0 ]
+}