chore(scripts): add drill-deck stats, diff-ids, and sync wrapper

I incorporated the flashcard-tooling bundle from the work project's deck-review workflow, validated there against a 93-card deck. Three scripts now live under .ai/scripts/: drill-deck-stats.py (pre-rewrite inventory plus a gate that warns on stray *** Answer headers, missing :ID:, non-prompt headings, and #+TITLE jargon like "org-drill"), drill-deck-diff-ids.py (SRS-state preservation check that flags any :ID: lost across a rewrite), and drill-deck-sync (bash wrapper chaining stats, optional diff-ids, then drill-to-anki, writing to ~/sync/phone/anki/ only when the gates pass). The drill-deck-review.org workflow gains a Helper Scripts section and references the scripts from its phases. I reconciled its output-path prose with the drill-to-anki default that just moved to ~/sync/phone/anki/, so it no longer claims the script still defaults to ~/sync/org/drill/. I added tests for both Python scripts (pure logic plus CLI gate behavior) and a bats suite for the wrapper's guard paths. The clean end-to-end sync path stays uncovered since it needs uv-resolved genanki.
author: Craig Jennings <c@cjennings.net> 2026-05-30 13:17:47 -0500
committer: Craig Jennings <c@cjennings.net> 2026-05-30 13:17:47 -0500
commit: 0234e52b727b34ade93961eb05b5638685f4406f (patch)
tree: b7ee5f66a9fceb3fd4d9b1d2ba8c44e89dde76c5 /.ai/scripts
parent: 038d59b7e548d2323f43dcd92ba14cba876d840d (diff)
download: rulesets-0234e52b727b34ade93961eb05b5638685f4406f.tar.gz
rulesets-0234e52b727b34ade93961eb05b5638685f4406f.zip
6 files changed, 570 insertions, 0 deletions
diff --git a/.ai/scripts/drill-deck-diff-ids.py b/.ai/scripts/drill-deck-diff-ids.py
new file mode 100755
index 0000000..bd2c4cc
--- /dev/null
+++ b/.ai/scripts/drill-deck-diff-ids.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+"""SRS-state preservation check between two versions of an org-drill deck.
+
+Extracts every :ID: from each version and reports IDs that disappeared
+or appeared. Disappeared IDs lose org-drill SRS state (review history,
+ease, intervals) and are the worst-case bug from a deck rewrite. Appeared
+IDs are usually fine (new cards added on purpose) but worth surfacing.
+
+Exits 0 when clean, 1 when any IDs disappeared or appeared.
+
+Usage:
+  drill-deck-diff-ids.py <before.org> <after.org>
+"""
+from __future__ import annotations
+
+import re
+import sys
+from pathlib import Path
+
+CARD_RE = re.compile(r"^\*\*\s+(.+?)\s+:drill:\s*$")
+ID_RE = re.compile(r"^\s*:ID:\s+(\S+)\s*$")
+
+
+def card_id_map(path: Path) -> dict[str, str]:
+    """Return {id -> heading} for every :drill: card in path."""
+    result: dict[str, str] = {}
+    lines = path.read_text(encoding="utf-8").splitlines()
+    i = 0
+    while i < len(lines):
+        m = CARD_RE.match(lines[i])
+        if m:
+            heading = m.group(1).strip()
+            i += 1
+            while i < len(lines):
+                line = lines[i]
+                if line.startswith("* ") or CARD_RE.match(line):
+                    break
+                mid = ID_RE.match(line)
+                if mid:
+                    result[mid.group(1)] = heading
+                    break
+                i += 1
+            continue
+        i += 1
+    return result
+
+
+def main() -> int:
+    if len(sys.argv) != 3:
+        print(f"usage: {sys.argv[0]} <before.org> <after.org>", file=sys.stderr)
+        return 2
+
+    before_path = Path(sys.argv[1]).expanduser().resolve()
+    after_path = Path(sys.argv[2]).expanduser().resolve()
+
+    for p in (before_path, after_path):
+        if not p.is_file():
+            print(f"error: {p} not found", file=sys.stderr)
+            return 2
+
+    before = card_id_map(before_path)
+    after = card_id_map(after_path)
+
+    before_ids = set(before)
+    after_ids = set(after)
+
+    preserved = before_ids & after_ids
+    disappeared = before_ids - after_ids
+    appeared = after_ids - before_ids
+
+    print(f"drill-deck-diff-ids: {before_path.name} → {after_path.name}")
+    print()
+    print(f"IDs in BEFORE: {len(before_ids)}")
+    print(f"IDs in AFTER: {len(after_ids)}")
+    print(f"Preserved: {len(preserved)}")
+    print(f"Disappeared: {len(disappeared)}")
+    print(f"Appeared: {len(appeared)}")
+    print()
+
+    warnings = 0
+    if disappeared:
+        warnings += 1
+        print(f"WARN: {len(disappeared)} card IDs disappeared (SRS state lost)")
+        for cid in sorted(disappeared):
+            print(f"      - {cid} (was: {before[cid]!r})")
+    if appeared:
+        warnings += 1
+        print(f"NOTE: {len(appeared)} new card IDs appeared")
+        for cid in sorted(appeared):
+            print(f"      - {cid} (now: {after[cid]!r})")
+
+    if warnings == 0:
+        print("clean — SRS state preserved")
+        return 0
+    return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/.ai/scripts/drill-deck-stats.py b/.ai/scripts/drill-deck-stats.py
new file mode 100755
index 0000000..72d1cde
--- /dev/null
+++ b/.ai/scripts/drill-deck-stats.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python3
+"""Inventory + workflow-violation warnings for an org-drill deck source file.
+
+Reports counts and flags violations:
+- Total cards (depth-2 `:drill:` headings)
+- PROPERTIES drawer count (should match card count)
+- `*** Answer` sub-header count (should be 0 per drill-deck-review.org)
+- Cards missing :ID: (loses identity across versions, risks SRS-state loss)
+- Cards whose heading lacks `?` (likely a topic-as-heading not yet rewritten)
+
+Exits 0 when clean, 1 when any warnings are present. Use as a gate before
+regenerating the Anki deck or running drill-deck-sync.
+
+Usage:
+  drill-deck-stats.py <file.org>
+"""
+from __future__ import annotations
+
+import re
+import sys
+from pathlib import Path
+
+CARD_RE = re.compile(r"^\*\*\s+(.+?)\s+:drill:\s*$")
+ANSWER_RE = re.compile(r"^\*\*\*\s+Answer\b")
+PROP_START_RE = re.compile(r"^\s*:PROPERTIES:\s*$")
+PROP_END_RE = re.compile(r"^\s*:END:\s*$")
+ID_RE = re.compile(r"^\s*:ID:\s+(\S+)\s*$")
+TITLE_RE = re.compile(r"^#\+TITLE:\s*(.+?)\s*$", re.IGNORECASE)
+SOURCE_TOOL_RE = re.compile(r"\borg[-\s]?drill\b", re.IGNORECASE)
+
+# A heading qualifies as "prompt form" if it contains `?` or starts with
+# one of these imperative verbs (directive prompts like "Spell these out"
+# and "Introduce yourself" are valid even without `?`).
+IMPERATIVE_VERBS = frozenset({
+    "spell", "describe", "explain", "name", "list", "give",
+    "show", "tell", "define", "compare", "identify", "outline",
+    "introduce", "walk", "state", "recite", "recall", "summarize",
+})
+
+
+def is_prompt_form(heading: str) -> bool:
+    """True if the heading reads as a question or imperative prompt."""
+    if "?" in heading:
+        return True
+    first_word = heading.split(None, 1)[0].lower().rstrip(":,;")
+    return first_word in IMPERATIVE_VERBS
+
+
+def main() -> int:
+    if len(sys.argv) != 2:
+        print(f"usage: {sys.argv[0]} <file.org>", file=sys.stderr)
+        return 2
+
+    path = Path(sys.argv[1]).expanduser().resolve()
+    if not path.is_file():
+        print(f"error: {path} not found", file=sys.stderr)
+        return 2
+
+    lines = path.read_text(encoding="utf-8").splitlines()
+
+    title: str | None = None
+    for line in lines[:20]:
+        m = TITLE_RE.match(line)
+        if m:
+            title = m.group(1).strip()
+            break
+
+    cards: list[tuple[str, bool, bool]] = []  # (heading, has_id, has_answer_subheader)
+    answer_count = 0
+    prop_count = 0
+
+    i = 0
+    while i < len(lines):
+        m = CARD_RE.match(lines[i])
+        if m:
+            heading = m.group(1).strip()
+            i += 1
+            has_id = False
+            has_answer = False
+            in_drawer = False
+            while i < len(lines):
+                line = lines[i]
+                if line.startswith("* ") or CARD_RE.match(line):
+                    break
+                if PROP_START_RE.match(line):
+                    prop_count += 1
+                    in_drawer = True
+                elif in_drawer and PROP_END_RE.match(line):
+                    in_drawer = False
+                elif in_drawer and ID_RE.match(line):
+                    has_id = True
+                elif ANSWER_RE.match(line):
+                    answer_count += 1
+                    has_answer = True
+                i += 1
+            cards.append((heading, has_id, has_answer))
+            continue
+        i += 1
+
+    not_prompt = [h for h, _, _ in cards if not is_prompt_form(h)]
+    no_id = [h for h, has_id, _ in cards if not has_id]
+
+    print(f"{path.name} — drill deck stats")
+    print()
+    title_display = title if title else "(no #+TITLE)"
+    print(f"Deck title: {title_display}")
+    print(f"Cards: {len(cards)}")
+    drawer_status = "match" if prop_count == len(cards) else f"mismatch (expected {len(cards)})"
+    print(f"PROPERTIES drawers: {prop_count} ({drawer_status})")
+    answer_status = "clean" if answer_count == 0 else "workflow violation"
+    print(f"*** Answer sub-headers: {answer_count} ({answer_status})")
+    print(f"Cards missing :ID:: {len(no_id)}")
+    print(f"Cards with non-prompt heading: {len(not_prompt)}")
+    print()
+
+    warnings = 0
+    if title is None:
+        warnings += 1
+        print("WARN: no #+TITLE: line found; deck name will fall back to the file basename")
+    elif SOURCE_TOOL_RE.search(title):
+        warnings += 1
+        print(f"WARN: #+TITLE contains source-tool jargon ('{title}'); the deck name shows in Anki — drop 'Org-Drill' for a name that reads well on the consumption side")
+    if answer_count:
+        warnings += 1
+        print(f"WARN: {answer_count} cards have *** Answer sub-headers (drop per drill-deck-review.org)")
+    if prop_count != len(cards):
+        warnings += 1
+        print(f"WARN: PROPERTIES count {prop_count} does not match card count {len(cards)}")
+    if no_id:
+        warnings += 1
+        print(f"WARN: {len(no_id)} cards missing :ID:; losing identity risks SRS-state loss across rewrites")
+        for h in no_id[:5]:
+            print(f"      - {h}")
+        if len(no_id) > 5:
+            print(f"      - ... and {len(no_id) - 5} more")
+    if not_prompt:
+        warnings += 1
+        print(f"WARN: {len(not_prompt)} cards have non-prompt headings (no '?' and no imperative-verb start); likely topic-as-heading not yet rewritten")
+        for h in not_prompt[:5]:
+            print(f"      - {h}")
+        if len(not_prompt) > 5:
+            print(f"      - ... and {len(not_prompt) - 5} more")
+
+    if warnings == 0:
+        print("clean")
+        return 0
+    return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/.ai/scripts/drill-deck-sync b/.ai/scripts/drill-deck-sync
new file mode 100755
index 0000000..8e51cdd
--- /dev/null
+++ b/.ai/scripts/drill-deck-sync
@@ -0,0 +1,98 @@
+#!/usr/bin/env bash
+# drill-deck-sync: stats check + regenerate Anki apkg + place at ~/sync/phone/anki/
+#
+# Wraps drill-deck-stats.py + drill-to-anki.py (and optionally
+# drill-deck-diff-ids.py) for the canonical "rewrote the deck, now ship
+# it" step in the drill-deck-review workflow.
+#
+# Usage:
+#   drill-deck-sync <source.org>
+#   drill-deck-sync <source.org> --diff-against <previous-version.org>
+#
+# Exits non-zero when the stats check warns, when --diff-against shows
+# any disappeared / appeared IDs, or when drill-to-anki.py fails. The
+# Anki apkg is not written when any gate fails.
+
+set -euo pipefail
+
+usage() {
+    cat >&2 <<'EOF'
+usage: drill-deck-sync <source.org> [--diff-against <previous-version.org>]
+EOF
+    exit 2
+}
+
+if [[ $# -lt 1 ]]; then
+    usage
+fi
+
+SOURCE="$1"
+shift
+
+DIFF_AGAINST=""
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --diff-against)
+            [[ $# -ge 2 ]] || usage
+            DIFF_AGAINST="$2"
+            shift 2
+            ;;
+        -h|--help)
+            usage
+            ;;
+        *)
+            echo "unknown arg: $1" >&2
+            usage
+            ;;
+    esac
+done
+
+if [[ ! -f "$SOURCE" ]]; then
+    echo "error: $SOURCE not found" >&2
+    exit 2
+fi
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+STATS="$SCRIPT_DIR/drill-deck-stats.py"
+DIFF_IDS="$SCRIPT_DIR/drill-deck-diff-ids.py"
+TO_ANKI="$SCRIPT_DIR/drill-to-anki.py"
+
+for helper in "$STATS" "$DIFF_IDS" "$TO_ANKI"; do
+    if [[ ! -f "$helper" ]]; then
+        echo "error: helper $helper not found" >&2
+        exit 2
+    fi
+done
+
+echo "=== drill-deck-sync: $SOURCE ==="
+echo
+echo "--- stats ---"
+if ! python3 "$STATS" "$SOURCE"; then
+    echo
+    echo "stats check failed — fix warnings before sync, or call drill-to-anki.py directly to override" >&2
+    exit 1
+fi
+echo
+
+if [[ -n "$DIFF_AGAINST" ]]; then
+    if [[ ! -f "$DIFF_AGAINST" ]]; then
+        echo "error: $DIFF_AGAINST not found" >&2
+        exit 2
+    fi
+    echo "--- ID preservation ---"
+    if ! python3 "$DIFF_IDS" "$DIFF_AGAINST" "$SOURCE"; then
+        echo
+        echo "ID preservation check failed — SRS state may have been lost" >&2
+        exit 1
+    fi
+    echo
+fi
+
+BASENAME="$(basename "$SOURCE" .org)"
+OUTPUT="$HOME/sync/phone/anki/${BASENAME}.apkg"
+
+echo "--- regenerate apkg ---"
+mkdir -p "$(dirname "$OUTPUT")"
+"$TO_ANKI" "$SOURCE" --output "$OUTPUT"
+echo
+echo "deck synced to $OUTPUT"
diff --git a/.ai/scripts/tests/drill-deck-sync.bats b/.ai/scripts/tests/drill-deck-sync.bats
new file mode 100644
index 0000000..e141cab
--- /dev/null
+++ b/.ai/scripts/tests/drill-deck-sync.bats
@@ -0,0 +1,38 @@
+#!/usr/bin/env bats
+# Tests for the drill-deck-sync wrapper: argument handling + the stats gate.
+# The clean end-to-end path runs drill-to-anki.py (uv-resolved genanki) and is
+# not exercised here; these cover the guard paths that stop before that step.
+
+setup() {
+    SCRIPT_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)"
+    SYNC="$SCRIPT_DIR/drill-deck-sync"
+    TMP="$(mktemp -d)"
+}
+
+teardown() {
+    rm -rf "$TMP"
+}
+
+@test "drill-deck-sync: no args exits 2" {
+    run "$SYNC"
+    [ "$status" -eq 2 ]
+}
+
+@test "drill-deck-sync: missing source file exits 2" {
+    run "$SYNC" "$TMP/nope.org"
+    [ "$status" -eq 2 ]
+}
+
+@test "drill-deck-sync: stats gate failure exits 1 and writes no apkg" {
+    cat > "$TMP/dirty.org" <<'EOF'
+#+TITLE: DeepSat Org-Drill Flashcards
+
+* Section
+** DeepSat :drill:
+*** Answer
+A satellite company.
+EOF
+    run "$SYNC" "$TMP/dirty.org"
+    [ "$status" -eq 1 ]
+    [ ! -f "$HOME/sync/phone/anki/dirty.apkg" ]
+}
diff --git a/.ai/scripts/tests/test_drill_deck_diff_ids.py b/.ai/scripts/tests/test_drill_deck_diff_ids.py
new file mode 100644
index 0000000..9cd8305
--- /dev/null
+++ b/.ai/scripts/tests/test_drill_deck_diff_ids.py
@@ -0,0 +1,88 @@
+"""Tests for drill-deck-diff-ids.py: :ID: extraction + SRS-state diff CLI.
+
+Plain python3 script (no third-party deps), so card_id_map imports directly;
+the disappeared/appeared reporting is exercised through the CLI.
+"""
+from __future__ import annotations
+
+import importlib.util
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+SCRIPT = Path(__file__).resolve().parents[1] / "drill-deck-diff-ids.py"
+
+
+@pytest.fixture(scope="module")
+def diff_ids():
+    spec = importlib.util.spec_from_file_location("drill_deck_diff_ids", SCRIPT)
+    assert spec and spec.loader
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+DECK_A = """* Section
+** What is DeepSat? :drill:
+:PROPERTIES:
+:ID: id-1
+:END:
+Body.
+** Who founded it? :drill:
+:PROPERTIES:
+:ID: id-2
+:END:
+Body.
+"""
+
+# id-2 dropped, id-3 added relative to DECK_A
+DECK_B = """* Section
+** What is DeepSat? :drill:
+:PROPERTIES:
+:ID: id-1
+:END:
+Body.
+** When was it founded? :drill:
+:PROPERTIES:
+:ID: id-3
+:END:
+Body.
+"""
+
+
+def test_card_id_map_extracts_id_to_heading(diff_ids, tmp_path):
+    f = tmp_path / "a.org"
+    f.write_text(DECK_A)
+    m = diff_ids.card_id_map(f)
+    assert set(m) == {"id-1", "id-2"}
+    assert m["id-1"] == "What is DeepSat?"
+
+
+def _run(before, after):
+    return subprocess.run(
+        [sys.executable, str(SCRIPT), str(before), str(after)],
+        capture_output=True, text=True,
+    )
+
+
+def test_cli_identical_decks_exit_zero(tmp_path):
+    a = tmp_path / "a.org"
+    a.write_text(DECK_A)
+    b = tmp_path / "b.org"
+    b.write_text(DECK_A)
+    r = _run(a, b)
+    assert r.returncode == 0
+    assert "preserved" in r.stdout.lower()
+
+
+def test_cli_dropped_id_warns_and_exits_one(tmp_path):
+    a = tmp_path / "a.org"
+    a.write_text(DECK_A)
+    b = tmp_path / "b.org"
+    b.write_text(DECK_B)
+    r = _run(a, b)
+    assert r.returncode == 1
+    assert "disappeared" in r.stdout.lower()
+    assert "id-2" in r.stdout
diff --git a/.ai/scripts/tests/test_drill_deck_stats.py b/.ai/scripts/tests/test_drill_deck_stats.py
new file mode 100644
index 0000000..02d9c4e
--- /dev/null
+++ b/.ai/scripts/tests/test_drill_deck_stats.py
@@ -0,0 +1,96 @@
+"""Tests for drill-deck-stats.py: prompt-form heuristic + CLI inventory/gate.
+
+Plain python3 script (no third-party deps), so the pure helper imports directly;
+the inventory/gate behavior is exercised through the CLI.
+"""
+from __future__ import annotations
+
+import importlib.util
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+SCRIPT = Path(__file__).resolve().parents[1] / "drill-deck-stats.py"
+
+
+@pytest.fixture(scope="module")
+def stats():
+    spec = importlib.util.spec_from_file_location("drill_deck_stats", SCRIPT)
+    assert spec and spec.loader
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+# --- is_prompt_form (pure) ---
+
+def test_is_prompt_form_question_mark(stats):
+    assert stats.is_prompt_form("What is DeepSat?") is True
+
+
+def test_is_prompt_form_imperative_verb(stats):
+    assert stats.is_prompt_form("Spell out the orbital regimes") is True
+
+
+def test_is_prompt_form_imperative_is_case_insensitive(stats):
+    assert stats.is_prompt_form("introduce yourself") is True
+
+
+def test_is_prompt_form_topic_heading_is_not_a_prompt(stats):
+    assert stats.is_prompt_form("DeepSat") is False
+
+
+def test_is_prompt_form_strips_trailing_punctuation_off_first_word(stats):
+    assert stats.is_prompt_form("List: the founders") is True
+
+
+# --- CLI inventory + gate (integration) ---
+
+CLEAN_DECK = """#+TITLE: DeepSat Flashcards
+
+* Section
+** What is DeepSat? :drill:
+:PROPERTIES:
+:ID: card-1
+:END:
+A satellite company.
+"""
+
+DIRTY_DECK = """#+TITLE: DeepSat Org-Drill Flashcards
+
+* Section
+** DeepSat :drill:
+*** Answer
+A satellite company.
+"""
+
+
+def _run(path):
+    return subprocess.run(
+        [sys.executable, str(SCRIPT), str(path)],
+        capture_output=True, text=True,
+    )
+
+
+def test_cli_clean_deck_exits_zero(tmp_path):
+    f = tmp_path / "clean.org"
+    f.write_text(CLEAN_DECK)
+    r = _run(f)
+    assert r.returncode == 0
+    assert "clean" in r.stdout
+
+
+def test_cli_dirty_deck_warns_and_exits_one(tmp_path):
+    f = tmp_path / "dirty.org"
+    f.write_text(DIRTY_DECK)
+    r = _run(f)
+    assert r.returncode == 1
+    assert "WARN" in r.stdout
+    assert "org-drill" in r.stdout.lower()  # title-jargon audit fired
+
+
+def test_cli_missing_file_exits_two(tmp_path):
+    r = _run(tmp_path / "nope.org")
+    assert r.returncode == 2
author	Craig Jennings <c@cjennings.net>	2026-05-30 13:17:47 -0500
committer	Craig Jennings <c@cjennings.net>	2026-05-30 13:17:47 -0500
commit	0234e52b727b34ade93961eb05b5638685f4406f (patch)
tree	b7ee5f66a9fceb3fd4d9b1d2ba8c44e89dde76c5 /.ai/scripts
parent	038d59b7e548d2323f43dcd92ba14cba876d840d (diff)
download	rulesets-0234e52b727b34ade93961eb05b5638685f4406f.tar.gz rulesets-0234e52b727b34ade93961eb05b5638685f4406f.zip