test(scripts): add task-review-staleness.sh + bats harness

First component of the daily task-review habit from docs/design/task-review.org. The staleness count is the shared primitive both the wrap-up health check (threshold 30) and the startup reminder (threshold 7) call, so it lives in one tested script rather than being reimplemented in each workflow. The script counts top-level todo.org tasks whose review has gone stale: depth-2 headings with a TODO/DOING/VERIFY keyword and an [#A]/[#B]/[#C] cookie, where LAST_REVIEWED is missing, unparseable, or older than the threshold. Age uses a strict greater-than, so a task reviewed exactly N days ago is still fresh. Today normalizes to local midnight before the diff, and the day count rounds to the nearest day, so a DST hour can't push a boundary task across the line. Twelve bats cases cover the normal, boundary, and error categories. Dates are generated relative to the current date rather than hardcoded. The script path resolves as the sibling-of-parent of the test file, so the suite runs identically from the canonical claude-templates tree and the rsync'd project mirror. Makefile test target now globs .ai/scripts/tests for bats alongside scripts/tests.
author: Craig Jennings <c@cjennings.net> 2026-05-20 13:37:39 -0400
committer: Craig Jennings <c@cjennings.net> 2026-05-20 13:37:39 -0400
commit: 64b617e58a5e95c01b9c9662c8c8f5ba35909299 (patch)
tree: dcb9844091c218a3f1800eac1e402a853dd02611
parent: b3186104b1496a52b655ad1d2fd5c2d44445d572 (diff)
download: rulesets-64b617e58a5e95c01b9c9662c8c8f5ba35909299.tar.gz
rulesets-64b617e58a5e95c01b9c9662c8c8f5ba35909299.zip
5 files changed, 465 insertions, 1 deletions
diff --git a/.ai/scripts/task-review-staleness.sh b/.ai/scripts/task-review-staleness.sh
new file mode 100755
index 0000000..b52cd3d
--- /dev/null
+++ b/.ai/scripts/task-review-staleness.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+#
+# task-review-staleness.sh — count top-level todo.org tasks whose review
+# has gone stale.
+#
+# Usage: task-review-staleness.sh <todo-file> <threshold-days>
+#
+# Prints a single integer to stdout: the number of qualifying tasks that
+# are stale. Shared by the wrap-up health check (threshold 30) and the
+# startup reminder (threshold 7) so both count the same way.
+#
+# A qualifying task is a depth-2 (**) heading carrying a TODO/DOING/VERIFY
+# keyword and an [#A]/[#B]/[#C] priority cookie. DONE/CANCELLED tasks,
+# deeper headings, and cookie-less headings are not review units.
+#
+# A qualifying task is stale when its :LAST_REVIEWED: property is missing
+# or unparseable (NIL sorts oldest), or when its age strictly exceeds the
+# threshold (age > N days; age == N is still fresh).
+
+set -euo pipefail
+
+die() { echo "task-review-staleness: $*" >&2; exit 2; }
+
+[ "$#" -eq 2 ] || die "usage: $(basename "$0") <todo-file> <threshold-days>"
+
+todo_file="$1"
+threshold="$2"
+
+[ -f "$todo_file" ] || die "no such file: $todo_file"
+[[ "$threshold" =~ ^[0-9]+$ ]] || die "threshold must be a non-negative integer: $threshold"
+
+# Emit one line per qualifying top-level task: its LAST_REVIEWED value, or
+# "NONE" when the task has no such property. Body prose is ignored; only
+# the property drawer between a qualifying heading and the next heading is
+# scanned.
+extract_review_dates() {
+  awk '
+    function flush() {
+      if (in_task) print (have_lr ? lr : "NONE")
+    }
+    /^\*+ / {
+      flush()
+      have_lr = 0; lr = ""
+      in_task = ($0 ~ /^\*\* (TODO|DOING|VERIFY) \[#[ABC]\]/)
+      next
+    }
+    in_task && /^[ \t]*:LAST_REVIEWED:[ \t]*/ {
+      line = $0
+      sub(/^[ \t]*:LAST_REVIEWED:[ \t]*/, "", line)
+      sub(/[ \t]*$/, "", line)
+      lr = line; have_lr = 1
+      next
+    }
+    END { flush() }
+  ' "$todo_file"
+}
+
+# Normalize "today" to local midnight so a task reviewed exactly N days
+# ago measures as N, not N-and-a-fraction. LAST_REVIEWED dates parse to
+# midnight already, so both ends of the diff sit on day boundaries.
+today_epoch=$(date -d "$(date +%F)" +%s)
+count=0
+
+while IFS= read -r value; do
+  if [ "$value" = "NONE" ]; then
+    count=$((count + 1))
+    continue
+  fi
+
+  # Unparseable date → treat as NIL (stale).
+  if ! rev_epoch=$(date -d "$value" +%s 2>/dev/null); then
+    count=$((count + 1))
+    continue
+  fi
+
+  # Round to nearest day so a DST hour can't shift a boundary task.
+  age_days=$(( (today_epoch - rev_epoch + 43200) / 86400 ))
+  if [ "$age_days" -gt "$threshold" ]; then
+    count=$((count + 1))
+  fi
+done < <(extract_review_dates)
+
+echo "$count"
diff --git a/.ai/scripts/tests/task-review-staleness.bats b/.ai/scripts/tests/task-review-staleness.bats
new file mode 100644
index 0000000..abb7585
--- /dev/null
+++ b/.ai/scripts/tests/task-review-staleness.bats
@@ -0,0 +1,149 @@
+#!/usr/bin/env bats
+#
+# Tests for claude-templates/.ai/scripts/task-review-staleness.sh —
+# counts top-level todo.org tasks whose review has gone stale.
+#
+# Strategy: write a synthetic todo.org into a temp dir per test, with
+# LAST_REVIEWED dates generated relative to the real `date` (never
+# hardcoded). Run the real script against it and assert the count it
+# prints on stdout.
+#
+# Staleness rule under test:
+#   - A qualifying task is a depth-2 (**) heading with a TODO/DOING/VERIFY
+#     keyword and an [#A]/[#B]/[#C] priority cookie.
+#   - It is stale when LAST_REVIEWED is missing/malformed (NIL → oldest),
+#     or when its age strictly exceeds the threshold (age > N days).
+#   - age == N exactly is fresh (the spec's wording is ">N days").
+
+# The script under test is always the sibling-of-parent of this test file
+# (scripts/task-review-staleness.sh next to scripts/tests/). This holds in
+# both the canonical claude-templates/ tree and the rsync'd project mirror,
+# so the suite runs identically from either location.
+SCRIPT="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)/task-review-staleness.sh"
+
+setup() {
+  TEST_DIR="$(mktemp -d -t task-review-bats.XXXXXX)"
+  TODO="$TEST_DIR/todo.org"
+
+  TODAY="$(date +%F)"
+  D5="$(date -d '5 days ago' +%F)"
+  D30="$(date -d '30 days ago' +%F)"
+  D31="$(date -d '31 days ago' +%F)"
+  D40="$(date -d '40 days ago' +%F)"
+}
+
+teardown() {
+  rm -rf "$TEST_DIR"
+}
+
+# Emit a qualifying task with an explicit LAST_REVIEWED date.
+task_reviewed() {
+  local keyword="$1" prio="$2" title="$3" date="$4"
+  printf '** %s [#%s] %s\n:PROPERTIES:\n:LAST_REVIEWED: %s\n:END:\nBody.\n\n' \
+    "$keyword" "$prio" "$title" "$date" >> "$TODO"
+}
+
+# Emit a qualifying task with no PROPERTIES drawer at all.
+task_unreviewed() {
+  local keyword="$1" prio="$2" title="$3"
+  printf '** %s [#%s] %s\nBody.\n\n' "$keyword" "$prio" "$title" >> "$TODO"
+}
+
+# ---- Normal cases ----------------------------------------------------
+
+@test "staleness: empty file reports zero" {
+  : > "$TODO"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: all tasks fresh reports zero" {
+  task_reviewed TODO A "Fresh one" "$D5"
+  task_reviewed TODO B "Fresh two" "$D5"
+  task_reviewed DOING A "Fresh three" "$TODAY"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: all tasks stale reports full count" {
+  task_reviewed TODO A "Stale one" "$D40"
+  task_reviewed TODO B "Stale two" "$D40"
+  task_reviewed VERIFY C "Stale three" "$D40"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "3" ]
+}
+
+@test "staleness: mixed fresh, stale, and unreviewed counts only the latter two" {
+  task_reviewed TODO A "Fresh" "$D5"
+  task_reviewed TODO B "Stale" "$D40"
+  task_unreviewed DOING A "Never reviewed"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "2" ]
+}
+
+# ---- Boundary cases --------------------------------------------------
+
+@test "staleness: age exactly equal to threshold is fresh" {
+  task_reviewed TODO A "Exactly at cutoff" "$D30"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: age one day past threshold is stale" {
+  task_reviewed TODO A "One day over" "$D31"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+@test "staleness: unreviewed task (no drawer) counts as stale" {
+  task_unreviewed TODO A "Never reviewed"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+@test "staleness: threshold of 7 is softer than 30 on the same list" {
+  task_reviewed TODO A "Reviewed five days ago" "$D5"
+  task_reviewed TODO B "Reviewed thirty-one days ago" "$D31"
+  run bash "$SCRIPT" "$TODO" 7
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+# ---- Error / exclusion cases -----------------------------------------
+
+@test "staleness: DONE and CANCELLED tasks are excluded even when old" {
+  task_reviewed DONE A "Shipped long ago" "$D40"
+  task_reviewed CANCELLED B "Abandoned long ago" "$D40"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: deeper headings and cookie-less headings are excluded" {
+  # Depth-3 child with an old review date — not a review unit.
+  printf '*** TODO [#A] Child task\n:PROPERTIES:\n:LAST_REVIEWED: %s\n:END:\n\n' "$D40" >> "$TODO"
+  # Depth-2 but no priority cookie — not a review unit.
+  printf '** TODO Cookie-less task\nBody.\n\n' >> "$TODO"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: malformed LAST_REVIEWED is treated as stale" {
+  task_reviewed TODO A "Bad date" "not-a-date"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+@test "staleness: missing todo file exits non-zero" {
+  run bash "$SCRIPT" "$TEST_DIR/does-not-exist.org" 30
+  [ "$status" -ne 0 ]
+}
diff --git a/Makefile b/Makefile
index 34b6e56..8d433cf 100644
--- a/Makefile
+++ b/Makefile
@@ -423,7 +423,7 @@ test: ## Run all test suites (pytest + ERT + bats)
 		echo "ert: $$(basename "$$f")"; \
 		emacs --batch -q -L .ai/scripts -l ert -l "$$f" -f ert-run-tests-batch-and-exit; \
 	done
-	@set -e; for f in scripts/tests/*.bats; do \
+	@set -e; for f in scripts/tests/*.bats .ai/scripts/tests/*.bats; do \
 		[ -e "$$f" ] || continue; \
 		echo "bats: $$(basename "$$f")"; \
 		bats "$$f"; \
diff --git a/claude-templates/.ai/scripts/task-review-staleness.sh b/claude-templates/.ai/scripts/task-review-staleness.sh
new file mode 100755
index 0000000..b52cd3d
--- /dev/null
+++ b/claude-templates/.ai/scripts/task-review-staleness.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+#
+# task-review-staleness.sh — count top-level todo.org tasks whose review
+# has gone stale.
+#
+# Usage: task-review-staleness.sh <todo-file> <threshold-days>
+#
+# Prints a single integer to stdout: the number of qualifying tasks that
+# are stale. Shared by the wrap-up health check (threshold 30) and the
+# startup reminder (threshold 7) so both count the same way.
+#
+# A qualifying task is a depth-2 (**) heading carrying a TODO/DOING/VERIFY
+# keyword and an [#A]/[#B]/[#C] priority cookie. DONE/CANCELLED tasks,
+# deeper headings, and cookie-less headings are not review units.
+#
+# A qualifying task is stale when its :LAST_REVIEWED: property is missing
+# or unparseable (NIL sorts oldest), or when its age strictly exceeds the
+# threshold (age > N days; age == N is still fresh).
+
+set -euo pipefail
+
+die() { echo "task-review-staleness: $*" >&2; exit 2; }
+
+[ "$#" -eq 2 ] || die "usage: $(basename "$0") <todo-file> <threshold-days>"
+
+todo_file="$1"
+threshold="$2"
+
+[ -f "$todo_file" ] || die "no such file: $todo_file"
+[[ "$threshold" =~ ^[0-9]+$ ]] || die "threshold must be a non-negative integer: $threshold"
+
+# Emit one line per qualifying top-level task: its LAST_REVIEWED value, or
+# "NONE" when the task has no such property. Body prose is ignored; only
+# the property drawer between a qualifying heading and the next heading is
+# scanned.
+extract_review_dates() {
+  awk '
+    function flush() {
+      if (in_task) print (have_lr ? lr : "NONE")
+    }
+    /^\*+ / {
+      flush()
+      have_lr = 0; lr = ""
+      in_task = ($0 ~ /^\*\* (TODO|DOING|VERIFY) \[#[ABC]\]/)
+      next
+    }
+    in_task && /^[ \t]*:LAST_REVIEWED:[ \t]*/ {
+      line = $0
+      sub(/^[ \t]*:LAST_REVIEWED:[ \t]*/, "", line)
+      sub(/[ \t]*$/, "", line)
+      lr = line; have_lr = 1
+      next
+    }
+    END { flush() }
+  ' "$todo_file"
+}
+
+# Normalize "today" to local midnight so a task reviewed exactly N days
+# ago measures as N, not N-and-a-fraction. LAST_REVIEWED dates parse to
+# midnight already, so both ends of the diff sit on day boundaries.
+today_epoch=$(date -d "$(date +%F)" +%s)
+count=0
+
+while IFS= read -r value; do
+  if [ "$value" = "NONE" ]; then
+    count=$((count + 1))
+    continue
+  fi
+
+  # Unparseable date → treat as NIL (stale).
+  if ! rev_epoch=$(date -d "$value" +%s 2>/dev/null); then
+    count=$((count + 1))
+    continue
+  fi
+
+  # Round to nearest day so a DST hour can't shift a boundary task.
+  age_days=$(( (today_epoch - rev_epoch + 43200) / 86400 ))
+  if [ "$age_days" -gt "$threshold" ]; then
+    count=$((count + 1))
+  fi
+done < <(extract_review_dates)
+
+echo "$count"
diff --git a/claude-templates/.ai/scripts/tests/task-review-staleness.bats b/claude-templates/.ai/scripts/tests/task-review-staleness.bats
new file mode 100644
index 0000000..abb7585
--- /dev/null
+++ b/claude-templates/.ai/scripts/tests/task-review-staleness.bats
@@ -0,0 +1,149 @@
+#!/usr/bin/env bats
+#
+# Tests for claude-templates/.ai/scripts/task-review-staleness.sh —
+# counts top-level todo.org tasks whose review has gone stale.
+#
+# Strategy: write a synthetic todo.org into a temp dir per test, with
+# LAST_REVIEWED dates generated relative to the real `date` (never
+# hardcoded). Run the real script against it and assert the count it
+# prints on stdout.
+#
+# Staleness rule under test:
+#   - A qualifying task is a depth-2 (**) heading with a TODO/DOING/VERIFY
+#     keyword and an [#A]/[#B]/[#C] priority cookie.
+#   - It is stale when LAST_REVIEWED is missing/malformed (NIL → oldest),
+#     or when its age strictly exceeds the threshold (age > N days).
+#   - age == N exactly is fresh (the spec's wording is ">N days").
+
+# The script under test is always the sibling-of-parent of this test file
+# (scripts/task-review-staleness.sh next to scripts/tests/). This holds in
+# both the canonical claude-templates/ tree and the rsync'd project mirror,
+# so the suite runs identically from either location.
+SCRIPT="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)/task-review-staleness.sh"
+
+setup() {
+  TEST_DIR="$(mktemp -d -t task-review-bats.XXXXXX)"
+  TODO="$TEST_DIR/todo.org"
+
+  TODAY="$(date +%F)"
+  D5="$(date -d '5 days ago' +%F)"
+  D30="$(date -d '30 days ago' +%F)"
+  D31="$(date -d '31 days ago' +%F)"
+  D40="$(date -d '40 days ago' +%F)"
+}
+
+teardown() {
+  rm -rf "$TEST_DIR"
+}
+
+# Emit a qualifying task with an explicit LAST_REVIEWED date.
+task_reviewed() {
+  local keyword="$1" prio="$2" title="$3" date="$4"
+  printf '** %s [#%s] %s\n:PROPERTIES:\n:LAST_REVIEWED: %s\n:END:\nBody.\n\n' \
+    "$keyword" "$prio" "$title" "$date" >> "$TODO"
+}
+
+# Emit a qualifying task with no PROPERTIES drawer at all.
+task_unreviewed() {
+  local keyword="$1" prio="$2" title="$3"
+  printf '** %s [#%s] %s\nBody.\n\n' "$keyword" "$prio" "$title" >> "$TODO"
+}
+
+# ---- Normal cases ----------------------------------------------------
+
+@test "staleness: empty file reports zero" {
+  : > "$TODO"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: all tasks fresh reports zero" {
+  task_reviewed TODO A "Fresh one" "$D5"
+  task_reviewed TODO B "Fresh two" "$D5"
+  task_reviewed DOING A "Fresh three" "$TODAY"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: all tasks stale reports full count" {
+  task_reviewed TODO A "Stale one" "$D40"
+  task_reviewed TODO B "Stale two" "$D40"
+  task_reviewed VERIFY C "Stale three" "$D40"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "3" ]
+}
+
+@test "staleness: mixed fresh, stale, and unreviewed counts only the latter two" {
+  task_reviewed TODO A "Fresh" "$D5"
+  task_reviewed TODO B "Stale" "$D40"
+  task_unreviewed DOING A "Never reviewed"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "2" ]
+}
+
+# ---- Boundary cases --------------------------------------------------
+
+@test "staleness: age exactly equal to threshold is fresh" {
+  task_reviewed TODO A "Exactly at cutoff" "$D30"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: age one day past threshold is stale" {
+  task_reviewed TODO A "One day over" "$D31"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+@test "staleness: unreviewed task (no drawer) counts as stale" {
+  task_unreviewed TODO A "Never reviewed"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+@test "staleness: threshold of 7 is softer than 30 on the same list" {
+  task_reviewed TODO A "Reviewed five days ago" "$D5"
+  task_reviewed TODO B "Reviewed thirty-one days ago" "$D31"
+  run bash "$SCRIPT" "$TODO" 7
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+# ---- Error / exclusion cases -----------------------------------------
+
+@test "staleness: DONE and CANCELLED tasks are excluded even when old" {
+  task_reviewed DONE A "Shipped long ago" "$D40"
+  task_reviewed CANCELLED B "Abandoned long ago" "$D40"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: deeper headings and cookie-less headings are excluded" {
+  # Depth-3 child with an old review date — not a review unit.
+  printf '*** TODO [#A] Child task\n:PROPERTIES:\n:LAST_REVIEWED: %s\n:END:\n\n' "$D40" >> "$TODO"
+  # Depth-2 but no priority cookie — not a review unit.
+  printf '** TODO Cookie-less task\nBody.\n\n' >> "$TODO"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "0" ]
+}
+
+@test "staleness: malformed LAST_REVIEWED is treated as stale" {
+  task_reviewed TODO A "Bad date" "not-a-date"
+  run bash "$SCRIPT" "$TODO" 30
+  [ "$status" -eq 0 ]
+  [ "$output" = "1" ]
+}
+
+@test "staleness: missing todo file exits non-zero" {
+  run bash "$SCRIPT" "$TEST_DIR/does-not-exist.org" 30
+  [ "$status" -ne 0 ]
+}
author	Craig Jennings <c@cjennings.net>	2026-05-20 13:37:39 -0400
committer	Craig Jennings <c@cjennings.net>	2026-05-20 13:37:39 -0400
commit	64b617e58a5e95c01b9c9662c8c8f5ba35909299 (patch)
tree	dcb9844091c218a3f1800eac1e402a853dd02611
parent	b3186104b1496a52b655ad1d2fd5c2d44445d572 (diff)
download	rulesets-64b617e58a5e95c01b9c9662c8c8f5ba35909299.tar.gz rulesets-64b617e58a5e95c01b9c9662c8c8f5ba35909299.zip