diff options
| author | Craig Jennings <c@cjennings.net> | 2026-05-30 13:17:47 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-05-30 13:17:47 -0500 |
| commit | 0234e52b727b34ade93961eb05b5638685f4406f (patch) | |
| tree | b7ee5f66a9fceb3fd4d9b1d2ba8c44e89dde76c5 /.ai/scripts | |
| parent | 038d59b7e548d2323f43dcd92ba14cba876d840d (diff) | |
| download | rulesets-0234e52b727b34ade93961eb05b5638685f4406f.tar.gz rulesets-0234e52b727b34ade93961eb05b5638685f4406f.zip | |
chore(scripts): add drill-deck stats, diff-ids, and sync wrapper
I incorporated the flashcard-tooling bundle from the work project's deck-review workflow, validated there against a 93-card deck. Three scripts now live under .ai/scripts/: drill-deck-stats.py (pre-rewrite inventory plus a gate that warns on stray *** Answer headers, missing :ID:, non-prompt headings, and #+TITLE jargon like "org-drill"), drill-deck-diff-ids.py (SRS-state preservation check that flags any :ID: lost across a rewrite), and drill-deck-sync (bash wrapper chaining stats, optional diff-ids, then drill-to-anki, writing to ~/sync/phone/anki/ only when the gates pass).
The drill-deck-review.org workflow gains a Helper Scripts section and references the scripts from its phases. I reconciled its output-path prose with the drill-to-anki default that just moved to ~/sync/phone/anki/, so it no longer claims the script still defaults to ~/sync/org/drill/. I added tests for both Python scripts (pure logic plus CLI gate behavior) and a bats suite for the wrapper's guard paths. The clean end-to-end sync path stays uncovered since it needs uv-resolved genanki.
Diffstat (limited to '.ai/scripts')
| -rwxr-xr-x | .ai/scripts/drill-deck-diff-ids.py | 99 | ||||
| -rwxr-xr-x | .ai/scripts/drill-deck-stats.py | 151 | ||||
| -rwxr-xr-x | .ai/scripts/drill-deck-sync | 98 | ||||
| -rw-r--r-- | .ai/scripts/tests/drill-deck-sync.bats | 38 | ||||
| -rw-r--r-- | .ai/scripts/tests/test_drill_deck_diff_ids.py | 88 | ||||
| -rw-r--r-- | .ai/scripts/tests/test_drill_deck_stats.py | 96 |
6 files changed, 570 insertions, 0 deletions
diff --git a/.ai/scripts/drill-deck-diff-ids.py b/.ai/scripts/drill-deck-diff-ids.py new file mode 100755 index 0000000..bd2c4cc --- /dev/null +++ b/.ai/scripts/drill-deck-diff-ids.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +"""SRS-state preservation check between two versions of an org-drill deck. + +Extracts every :ID: from each version and reports IDs that disappeared +or appeared. Disappeared IDs lose org-drill SRS state (review history, +ease, intervals) and are the worst-case bug from a deck rewrite. Appeared +IDs are usually fine (new cards added on purpose) but worth surfacing. + +Exits 0 when clean, 1 when any IDs disappeared or appeared. + +Usage: + drill-deck-diff-ids.py <before.org> <after.org> +""" +from __future__ import annotations + +import re +import sys +from pathlib import Path + +CARD_RE = re.compile(r"^\*\*\s+(.+?)\s+:drill:\s*$") +ID_RE = re.compile(r"^\s*:ID:\s+(\S+)\s*$") + + +def card_id_map(path: Path) -> dict[str, str]: + """Return {id -> heading} for every :drill: card in path.""" + result: dict[str, str] = {} + lines = path.read_text(encoding="utf-8").splitlines() + i = 0 + while i < len(lines): + m = CARD_RE.match(lines[i]) + if m: + heading = m.group(1).strip() + i += 1 + while i < len(lines): + line = lines[i] + if line.startswith("* ") or CARD_RE.match(line): + break + mid = ID_RE.match(line) + if mid: + result[mid.group(1)] = heading + break + i += 1 + continue + i += 1 + return result + + +def main() -> int: + if len(sys.argv) != 3: + print(f"usage: {sys.argv[0]} <before.org> <after.org>", file=sys.stderr) + return 2 + + before_path = Path(sys.argv[1]).expanduser().resolve() + after_path = Path(sys.argv[2]).expanduser().resolve() + + for p in (before_path, after_path): + if not p.is_file(): + print(f"error: {p} not found", file=sys.stderr) + return 2 + + before = card_id_map(before_path) + after = card_id_map(after_path) + + before_ids = set(before) + after_ids = set(after) + + preserved = before_ids & after_ids + disappeared = before_ids - after_ids + appeared = after_ids - before_ids + + print(f"drill-deck-diff-ids: {before_path.name} → {after_path.name}") + print() + print(f"IDs in BEFORE: {len(before_ids)}") + print(f"IDs in AFTER: {len(after_ids)}") + print(f"Preserved: {len(preserved)}") + print(f"Disappeared: {len(disappeared)}") + print(f"Appeared: {len(appeared)}") + print() + + warnings = 0 + if disappeared: + warnings += 1 + print(f"WARN: {len(disappeared)} card IDs disappeared (SRS state lost)") + for cid in sorted(disappeared): + print(f" - {cid} (was: {before[cid]!r})") + if appeared: + warnings += 1 + print(f"NOTE: {len(appeared)} new card IDs appeared") + for cid in sorted(appeared): + print(f" - {cid} (now: {after[cid]!r})") + + if warnings == 0: + print("clean — SRS state preserved") + return 0 + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.ai/scripts/drill-deck-stats.py b/.ai/scripts/drill-deck-stats.py new file mode 100755 index 0000000..72d1cde --- /dev/null +++ b/.ai/scripts/drill-deck-stats.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +"""Inventory + workflow-violation warnings for an org-drill deck source file. + +Reports counts and flags violations: +- Total cards (depth-2 `:drill:` headings) +- PROPERTIES drawer count (should match card count) +- `*** Answer` sub-header count (should be 0 per drill-deck-review.org) +- Cards missing :ID: (loses identity across versions, risks SRS-state loss) +- Cards whose heading lacks `?` (likely a topic-as-heading not yet rewritten) + +Exits 0 when clean, 1 when any warnings are present. Use as a gate before +regenerating the Anki deck or running drill-deck-sync. + +Usage: + drill-deck-stats.py <file.org> +""" +from __future__ import annotations + +import re +import sys +from pathlib import Path + +CARD_RE = re.compile(r"^\*\*\s+(.+?)\s+:drill:\s*$") +ANSWER_RE = re.compile(r"^\*\*\*\s+Answer\b") +PROP_START_RE = re.compile(r"^\s*:PROPERTIES:\s*$") +PROP_END_RE = re.compile(r"^\s*:END:\s*$") +ID_RE = re.compile(r"^\s*:ID:\s+(\S+)\s*$") +TITLE_RE = re.compile(r"^#\+TITLE:\s*(.+?)\s*$", re.IGNORECASE) +SOURCE_TOOL_RE = re.compile(r"\borg[-\s]?drill\b", re.IGNORECASE) + +# A heading qualifies as "prompt form" if it contains `?` or starts with +# one of these imperative verbs (directive prompts like "Spell these out" +# and "Introduce yourself" are valid even without `?`). +IMPERATIVE_VERBS = frozenset({ + "spell", "describe", "explain", "name", "list", "give", + "show", "tell", "define", "compare", "identify", "outline", + "introduce", "walk", "state", "recite", "recall", "summarize", +}) + + +def is_prompt_form(heading: str) -> bool: + """True if the heading reads as a question or imperative prompt.""" + if "?" in heading: + return True + first_word = heading.split(None, 1)[0].lower().rstrip(":,;") + return first_word in IMPERATIVE_VERBS + + +def main() -> int: + if len(sys.argv) != 2: + print(f"usage: {sys.argv[0]} <file.org>", file=sys.stderr) + return 2 + + path = Path(sys.argv[1]).expanduser().resolve() + if not path.is_file(): + print(f"error: {path} not found", file=sys.stderr) + return 2 + + lines = path.read_text(encoding="utf-8").splitlines() + + title: str | None = None + for line in lines[:20]: + m = TITLE_RE.match(line) + if m: + title = m.group(1).strip() + break + + cards: list[tuple[str, bool, bool]] = [] # (heading, has_id, has_answer_subheader) + answer_count = 0 + prop_count = 0 + + i = 0 + while i < len(lines): + m = CARD_RE.match(lines[i]) + if m: + heading = m.group(1).strip() + i += 1 + has_id = False + has_answer = False + in_drawer = False + while i < len(lines): + line = lines[i] + if line.startswith("* ") or CARD_RE.match(line): + break + if PROP_START_RE.match(line): + prop_count += 1 + in_drawer = True + elif in_drawer and PROP_END_RE.match(line): + in_drawer = False + elif in_drawer and ID_RE.match(line): + has_id = True + elif ANSWER_RE.match(line): + answer_count += 1 + has_answer = True + i += 1 + cards.append((heading, has_id, has_answer)) + continue + i += 1 + + not_prompt = [h for h, _, _ in cards if not is_prompt_form(h)] + no_id = [h for h, has_id, _ in cards if not has_id] + + print(f"{path.name} — drill deck stats") + print() + title_display = title if title else "(no #+TITLE)" + print(f"Deck title: {title_display}") + print(f"Cards: {len(cards)}") + drawer_status = "match" if prop_count == len(cards) else f"mismatch (expected {len(cards)})" + print(f"PROPERTIES drawers: {prop_count} ({drawer_status})") + answer_status = "clean" if answer_count == 0 else "workflow violation" + print(f"*** Answer sub-headers: {answer_count} ({answer_status})") + print(f"Cards missing :ID:: {len(no_id)}") + print(f"Cards with non-prompt heading: {len(not_prompt)}") + print() + + warnings = 0 + if title is None: + warnings += 1 + print("WARN: no #+TITLE: line found; deck name will fall back to the file basename") + elif SOURCE_TOOL_RE.search(title): + warnings += 1 + print(f"WARN: #+TITLE contains source-tool jargon ('{title}'); the deck name shows in Anki — drop 'Org-Drill' for a name that reads well on the consumption side") + if answer_count: + warnings += 1 + print(f"WARN: {answer_count} cards have *** Answer sub-headers (drop per drill-deck-review.org)") + if prop_count != len(cards): + warnings += 1 + print(f"WARN: PROPERTIES count {prop_count} does not match card count {len(cards)}") + if no_id: + warnings += 1 + print(f"WARN: {len(no_id)} cards missing :ID:; losing identity risks SRS-state loss across rewrites") + for h in no_id[:5]: + print(f" - {h}") + if len(no_id) > 5: + print(f" - ... and {len(no_id) - 5} more") + if not_prompt: + warnings += 1 + print(f"WARN: {len(not_prompt)} cards have non-prompt headings (no '?' and no imperative-verb start); likely topic-as-heading not yet rewritten") + for h in not_prompt[:5]: + print(f" - {h}") + if len(not_prompt) > 5: + print(f" - ... and {len(not_prompt) - 5} more") + + if warnings == 0: + print("clean") + return 0 + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.ai/scripts/drill-deck-sync b/.ai/scripts/drill-deck-sync new file mode 100755 index 0000000..8e51cdd --- /dev/null +++ b/.ai/scripts/drill-deck-sync @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +# drill-deck-sync: stats check + regenerate Anki apkg + place at ~/sync/phone/anki/ +# +# Wraps drill-deck-stats.py + drill-to-anki.py (and optionally +# drill-deck-diff-ids.py) for the canonical "rewrote the deck, now ship +# it" step in the drill-deck-review workflow. +# +# Usage: +# drill-deck-sync <source.org> +# drill-deck-sync <source.org> --diff-against <previous-version.org> +# +# Exits non-zero when the stats check warns, when --diff-against shows +# any disappeared / appeared IDs, or when drill-to-anki.py fails. The +# Anki apkg is not written when any gate fails. + +set -euo pipefail + +usage() { + cat >&2 <<'EOF' +usage: drill-deck-sync <source.org> [--diff-against <previous-version.org>] +EOF + exit 2 +} + +if [[ $# -lt 1 ]]; then + usage +fi + +SOURCE="$1" +shift + +DIFF_AGAINST="" +while [[ $# -gt 0 ]]; do + case "$1" in + --diff-against) + [[ $# -ge 2 ]] || usage + DIFF_AGAINST="$2" + shift 2 + ;; + -h|--help) + usage + ;; + *) + echo "unknown arg: $1" >&2 + usage + ;; + esac +done + +if [[ ! -f "$SOURCE" ]]; then + echo "error: $SOURCE not found" >&2 + exit 2 +fi + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +STATS="$SCRIPT_DIR/drill-deck-stats.py" +DIFF_IDS="$SCRIPT_DIR/drill-deck-diff-ids.py" +TO_ANKI="$SCRIPT_DIR/drill-to-anki.py" + +for helper in "$STATS" "$DIFF_IDS" "$TO_ANKI"; do + if [[ ! -f "$helper" ]]; then + echo "error: helper $helper not found" >&2 + exit 2 + fi +done + +echo "=== drill-deck-sync: $SOURCE ===" +echo +echo "--- stats ---" +if ! python3 "$STATS" "$SOURCE"; then + echo + echo "stats check failed — fix warnings before sync, or call drill-to-anki.py directly to override" >&2 + exit 1 +fi +echo + +if [[ -n "$DIFF_AGAINST" ]]; then + if [[ ! -f "$DIFF_AGAINST" ]]; then + echo "error: $DIFF_AGAINST not found" >&2 + exit 2 + fi + echo "--- ID preservation ---" + if ! python3 "$DIFF_IDS" "$DIFF_AGAINST" "$SOURCE"; then + echo + echo "ID preservation check failed — SRS state may have been lost" >&2 + exit 1 + fi + echo +fi + +BASENAME="$(basename "$SOURCE" .org)" +OUTPUT="$HOME/sync/phone/anki/${BASENAME}.apkg" + +echo "--- regenerate apkg ---" +mkdir -p "$(dirname "$OUTPUT")" +"$TO_ANKI" "$SOURCE" --output "$OUTPUT" +echo +echo "deck synced to $OUTPUT" diff --git a/.ai/scripts/tests/drill-deck-sync.bats b/.ai/scripts/tests/drill-deck-sync.bats new file mode 100644 index 0000000..e141cab --- /dev/null +++ b/.ai/scripts/tests/drill-deck-sync.bats @@ -0,0 +1,38 @@ +#!/usr/bin/env bats +# Tests for the drill-deck-sync wrapper: argument handling + the stats gate. +# The clean end-to-end path runs drill-to-anki.py (uv-resolved genanki) and is +# not exercised here; these cover the guard paths that stop before that step. + +setup() { + SCRIPT_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)" + SYNC="$SCRIPT_DIR/drill-deck-sync" + TMP="$(mktemp -d)" +} + +teardown() { + rm -rf "$TMP" +} + +@test "drill-deck-sync: no args exits 2" { + run "$SYNC" + [ "$status" -eq 2 ] +} + +@test "drill-deck-sync: missing source file exits 2" { + run "$SYNC" "$TMP/nope.org" + [ "$status" -eq 2 ] +} + +@test "drill-deck-sync: stats gate failure exits 1 and writes no apkg" { + cat > "$TMP/dirty.org" <<'EOF' +#+TITLE: DeepSat Org-Drill Flashcards + +* Section +** DeepSat :drill: +*** Answer +A satellite company. +EOF + run "$SYNC" "$TMP/dirty.org" + [ "$status" -eq 1 ] + [ ! -f "$HOME/sync/phone/anki/dirty.apkg" ] +} diff --git a/.ai/scripts/tests/test_drill_deck_diff_ids.py b/.ai/scripts/tests/test_drill_deck_diff_ids.py new file mode 100644 index 0000000..9cd8305 --- /dev/null +++ b/.ai/scripts/tests/test_drill_deck_diff_ids.py @@ -0,0 +1,88 @@ +"""Tests for drill-deck-diff-ids.py: :ID: extraction + SRS-state diff CLI. + +Plain python3 script (no third-party deps), so card_id_map imports directly; +the disappeared/appeared reporting is exercised through the CLI. +""" +from __future__ import annotations + +import importlib.util +import subprocess +import sys +from pathlib import Path + +import pytest + +SCRIPT = Path(__file__).resolve().parents[1] / "drill-deck-diff-ids.py" + + +@pytest.fixture(scope="module") +def diff_ids(): + spec = importlib.util.spec_from_file_location("drill_deck_diff_ids", SCRIPT) + assert spec and spec.loader + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +DECK_A = """* Section +** What is DeepSat? :drill: +:PROPERTIES: +:ID: id-1 +:END: +Body. +** Who founded it? :drill: +:PROPERTIES: +:ID: id-2 +:END: +Body. +""" + +# id-2 dropped, id-3 added relative to DECK_A +DECK_B = """* Section +** What is DeepSat? :drill: +:PROPERTIES: +:ID: id-1 +:END: +Body. +** When was it founded? :drill: +:PROPERTIES: +:ID: id-3 +:END: +Body. +""" + + +def test_card_id_map_extracts_id_to_heading(diff_ids, tmp_path): + f = tmp_path / "a.org" + f.write_text(DECK_A) + m = diff_ids.card_id_map(f) + assert set(m) == {"id-1", "id-2"} + assert m["id-1"] == "What is DeepSat?" + + +def _run(before, after): + return subprocess.run( + [sys.executable, str(SCRIPT), str(before), str(after)], + capture_output=True, text=True, + ) + + +def test_cli_identical_decks_exit_zero(tmp_path): + a = tmp_path / "a.org" + a.write_text(DECK_A) + b = tmp_path / "b.org" + b.write_text(DECK_A) + r = _run(a, b) + assert r.returncode == 0 + assert "preserved" in r.stdout.lower() + + +def test_cli_dropped_id_warns_and_exits_one(tmp_path): + a = tmp_path / "a.org" + a.write_text(DECK_A) + b = tmp_path / "b.org" + b.write_text(DECK_B) + r = _run(a, b) + assert r.returncode == 1 + assert "disappeared" in r.stdout.lower() + assert "id-2" in r.stdout diff --git a/.ai/scripts/tests/test_drill_deck_stats.py b/.ai/scripts/tests/test_drill_deck_stats.py new file mode 100644 index 0000000..02d9c4e --- /dev/null +++ b/.ai/scripts/tests/test_drill_deck_stats.py @@ -0,0 +1,96 @@ +"""Tests for drill-deck-stats.py: prompt-form heuristic + CLI inventory/gate. + +Plain python3 script (no third-party deps), so the pure helper imports directly; +the inventory/gate behavior is exercised through the CLI. +""" +from __future__ import annotations + +import importlib.util +import subprocess +import sys +from pathlib import Path + +import pytest + +SCRIPT = Path(__file__).resolve().parents[1] / "drill-deck-stats.py" + + +@pytest.fixture(scope="module") +def stats(): + spec = importlib.util.spec_from_file_location("drill_deck_stats", SCRIPT) + assert spec and spec.loader + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +# --- is_prompt_form (pure) --- + +def test_is_prompt_form_question_mark(stats): + assert stats.is_prompt_form("What is DeepSat?") is True + + +def test_is_prompt_form_imperative_verb(stats): + assert stats.is_prompt_form("Spell out the orbital regimes") is True + + +def test_is_prompt_form_imperative_is_case_insensitive(stats): + assert stats.is_prompt_form("introduce yourself") is True + + +def test_is_prompt_form_topic_heading_is_not_a_prompt(stats): + assert stats.is_prompt_form("DeepSat") is False + + +def test_is_prompt_form_strips_trailing_punctuation_off_first_word(stats): + assert stats.is_prompt_form("List: the founders") is True + + +# --- CLI inventory + gate (integration) --- + +CLEAN_DECK = """#+TITLE: DeepSat Flashcards + +* Section +** What is DeepSat? :drill: +:PROPERTIES: +:ID: card-1 +:END: +A satellite company. +""" + +DIRTY_DECK = """#+TITLE: DeepSat Org-Drill Flashcards + +* Section +** DeepSat :drill: +*** Answer +A satellite company. +""" + + +def _run(path): + return subprocess.run( + [sys.executable, str(SCRIPT), str(path)], + capture_output=True, text=True, + ) + + +def test_cli_clean_deck_exits_zero(tmp_path): + f = tmp_path / "clean.org" + f.write_text(CLEAN_DECK) + r = _run(f) + assert r.returncode == 0 + assert "clean" in r.stdout + + +def test_cli_dirty_deck_warns_and_exits_one(tmp_path): + f = tmp_path / "dirty.org" + f.write_text(DIRTY_DECK) + r = _run(f) + assert r.returncode == 1 + assert "WARN" in r.stdout + assert "org-drill" in r.stdout.lower() # title-jargon audit fired + + +def test_cli_missing_file_exits_two(tmp_path): + r = _run(tmp_path / "nope.org") + assert r.returncode == 2 |
