aboutsummaryrefslogtreecommitdiff
path: root/.ai/scripts
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-05-30 13:55:05 -0500
committerCraig Jennings <c@cjennings.net>2026-05-30 13:55:05 -0500
commit5bd759151d3ccf2d0a90f4b7de71e8c0e6e4a0a1 (patch)
treed25e5300cfa05d272efb124af2dfd6470fed8e55 /.ai/scripts
parent82e99ff8a4eb6d5aaba6ee02da3b5318a73b2125 (diff)
downloadrulesets-5bd759151d3ccf2d0a90f4b7de71e8c0e6e4a0a1.tar.gz
rulesets-5bd759151d3ccf2d0a90f4b7de71e8c0e6e4a0a1.zip
feat(drill-deck): add authoring-quality checks and a card-authoring section
I researched spaced-repetition best practices (Wozniak's twenty rules, Matuschak's prompt-writing guide, Nielsen, the Anki and FSRS docs) and folded the findings into the drill-deck pipeline. drill-deck-stats.py now checks authoring quality on top of structure. Two checks block: answer leakage (a question that echoes >= 80% of its own answer's content words tests recognition, not recall) and duplicate / near-duplicate fronts (confusable cards interfere). Three checks warn without blocking, surfacing rewrite candidates without failing the gate: overloaded backs, list-shaped backs, and binary yes/no prompts. The fuzzy thresholds live in constants at the top of the script, so a real deck that trips false positives can be tuned. I pulled the card-parsing into a parse_cards helper that captures each card's body, and added focused tests for every new helper plus CLI coverage of the leaky, duplicate, and notes-only cases. drill-deck-review.org gains a Card Authoring Principles section (the why behind the canonical shapes, with sources), a person-card splitting path bounded by the :ID:-preservation rule, a Phase B cost-benefit-removal and leech-reformulation disposition, and a scheduling-is-Anki-side note so a future editor doesn't try to encode FSRS retention in the org source. I left out cloze cards (would need a second note type), per-card tractability targeting and retention encoding (Anki-side telemetry that never reaches the source), and on-face source-stamping (the converter strips those drawers by design). Each is noted with its reason.
Diffstat (limited to '.ai/scripts')
-rwxr-xr-x.ai/scripts/drill-deck-stats.py276
-rw-r--r--.ai/scripts/tests/test_drill_deck_stats.py171
2 files changed, 377 insertions, 70 deletions
diff --git a/.ai/scripts/drill-deck-stats.py b/.ai/scripts/drill-deck-stats.py
index 72d1cde..d0707e2 100755
--- a/.ai/scripts/drill-deck-stats.py
+++ b/.ai/scripts/drill-deck-stats.py
@@ -1,15 +1,28 @@
#!/usr/bin/env python3
-"""Inventory + workflow-violation warnings for an org-drill deck source file.
+"""Inventory + authoring-quality checks for an org-drill deck source file.
-Reports counts and flags violations:
-- Total cards (depth-2 `:drill:` headings)
-- PROPERTIES drawer count (should match card count)
-- `*** Answer` sub-header count (should be 0 per drill-deck-review.org)
-- Cards missing :ID: (loses identity across versions, risks SRS-state loss)
-- Cards whose heading lacks `?` (likely a topic-as-heading not yet rewritten)
+Reports counts and flags two tiers of issue.
-Exits 0 when clean, 1 when any warnings are present. Use as a gate before
-regenerating the Anki deck or running drill-deck-sync.
+Blocking WARNs (exit 1):
+- PROPERTIES drawer count not matching card count
+- Cards missing :ID: (risks SRS-state loss across rewrites)
+- `*** Answer` sub-headers (should be 0 per drill-deck-review.org)
+- Non-prompt headings (topic-as-heading not yet rewritten)
+- #+TITLE missing, or carrying source-tool jargon ("org-drill")
+- Answer leakage: a card whose question echoes most of its own answer
+- Duplicate / near-duplicate fronts (interference between confusable cards)
+
+Non-blocking NOTEs (exit unaffected):
+- Overloaded backs (long answer — candidate to split into atomic cards)
+- List-shaped backs (enumeration — candidate to split or use overlapping cloze)
+- Binary yes/no prompts (low retrieval effort — candidate to reformulate)
+
+Exits 0 when no blocking warnings are present, 1 otherwise, 2 on bad usage.
+Use as a gate before regenerating the Anki deck or running drill-deck-sync.
+
+The fuzzy checks (leakage, duplicate, overloaded) are tuned by the LEAKAGE_*
+and BACK_WORD_LIMIT constants below; loosen them if a real deck trips false
+positives.
Usage:
drill-deck-stats.py <file.org>
@@ -27,16 +40,35 @@ PROP_END_RE = re.compile(r"^\s*:END:\s*$")
ID_RE = re.compile(r"^\s*:ID:\s+(\S+)\s*$")
TITLE_RE = re.compile(r"^#\+TITLE:\s*(.+?)\s*$", re.IGNORECASE)
SOURCE_TOOL_RE = re.compile(r"\borg[-\s]?drill\b", re.IGNORECASE)
+PLANNING_RE = re.compile(r"^\s*(SCHEDULED|DEADLINE|CLOSED):\s")
+BULLET_RE = re.compile(r"^\s*([-+*]|\d+[.)])\s+")
+BINARY_LEAD_RE = re.compile(
+ r"^\s*(is|are|was|were|does|do|did|can|could|should|would|will|has|have|had)\b",
+ re.IGNORECASE,
+)
-# A heading qualifies as "prompt form" if it contains `?` or starts with
-# one of these imperative verbs (directive prompts like "Spell these out"
-# and "Introduce yourself" are valid even without `?`).
+# A heading qualifies as "prompt form" if it contains `?` or starts with one of
+# these imperative verbs (directive prompts like "Spell these out" and
+# "Introduce yourself" are valid even without `?`).
IMPERATIVE_VERBS = frozenset({
"spell", "describe", "explain", "name", "list", "give",
"show", "tell", "define", "compare", "identify", "outline",
"introduce", "walk", "state", "recite", "recall", "summarize",
})
+# Function words ignored when comparing a question against its answer.
+STOPWORDS = frozenset({
+ "the", "a", "an", "is", "are", "was", "were", "of", "to", "in", "on",
+ "for", "and", "or", "with", "what", "who", "whom", "when", "where", "why",
+ "how", "which", "does", "do", "did", "tell", "me", "about", "their", "this",
+ "that", "it", "as", "at", "by", "be", "your", "you", "they", "them",
+})
+
+# Tuning knobs for the fuzzy checks.
+LEAKAGE_RATIO = 0.8 # share of a question's content words echoed in its answer
+LEAKAGE_MIN_WORDS = 3 # ignore very short questions, where overlap is noise
+BACK_WORD_LIMIT = 60 # words on a card back before it's flagged as overloaded
+
def is_prompt_form(heading: str) -> bool:
"""True if the heading reads as a question or imperative prompt."""
@@ -46,6 +78,116 @@ def is_prompt_form(heading: str) -> bool:
return first_word in IMPERATIVE_VERBS
+def content_words(text: str) -> set[str]:
+ """Lowercased alphanumeric tokens of length >= 3, minus stopwords."""
+ return {w for w in re.findall(r"[a-z0-9]+", text.lower())
+ if len(w) >= 3 and w not in STOPWORDS}
+
+
+def leakage_ratio(heading: str, body: str) -> float:
+ """Fraction of the question's content words that reappear in the answer.
+
+ A high ratio means the answer is largely restated in the question, so the
+ card can be answered by recognition rather than recall. Returns 0.0 for a
+ question with fewer than LEAKAGE_MIN_WORDS content words, where overlap is
+ just noise.
+ """
+ hw = content_words(heading)
+ if len(hw) < LEAKAGE_MIN_WORDS:
+ return 0.0
+ return len(hw & content_words(body)) / len(hw)
+
+
+def normalize_heading(heading: str) -> str:
+ """Collapse a heading to a comparison key (lowercase, alnum + single spaces)."""
+ return re.sub(r"\s+", " ", re.sub(r"[^a-z0-9 ]", " ", heading.lower())).strip()
+
+
+def is_binary_prompt(heading: str) -> bool:
+ """True for yes/no or 'A or B' prompts, which need little retrieval effort."""
+ if BINARY_LEAD_RE.match(heading):
+ return True
+ return bool(re.search(r"\bor\b", heading, re.IGNORECASE)) and heading.rstrip().endswith("?")
+
+
+def back_word_count(body: str) -> int:
+ return len(body.split())
+
+
+def is_list_back(body: str) -> bool:
+ """True if the answer body is mostly an org list (an enumeration card)."""
+ lines = [ln for ln in body.splitlines() if ln.strip()]
+ if len(lines) < 2:
+ return False
+ bullets = sum(1 for ln in lines if BULLET_RE.match(ln))
+ return bullets >= 2 and bullets * 2 >= len(lines)
+
+
+def parse_cards(lines: list[str]) -> tuple[list[dict], int]:
+ """Parse :drill: cards from org lines.
+
+ Returns (cards, prop_count). Each card is a dict with heading, has_id,
+ has_answer, and body (the answer text with PROPERTIES drawers, planning
+ lines, and `*** Answer` headers removed, approximating the rendered back).
+ """
+ cards: list[dict] = []
+ prop_count = 0
+ i = 0
+ n = len(lines)
+ while i < n:
+ m = CARD_RE.match(lines[i])
+ if not m:
+ i += 1
+ continue
+ heading = m.group(1).strip()
+ i += 1
+ has_id = False
+ has_answer = False
+ in_drawer = False
+ body_lines: list[str] = []
+ while i < n:
+ line = lines[i]
+ if line.startswith("* ") or CARD_RE.match(line):
+ break
+ if PROP_START_RE.match(line):
+ prop_count += 1
+ in_drawer = True
+ elif in_drawer and PROP_END_RE.match(line):
+ in_drawer = False
+ elif in_drawer:
+ if ID_RE.match(line):
+ has_id = True
+ elif ANSWER_RE.match(line):
+ has_answer = True
+ elif PLANNING_RE.match(line):
+ pass
+ else:
+ body_lines.append(line)
+ i += 1
+ cards.append({
+ "heading": heading,
+ "has_id": has_id,
+ "has_answer": has_answer,
+ "body": "\n".join(body_lines).strip(),
+ })
+ return cards, prop_count
+
+
+def find_duplicate_fronts(cards: list[dict]) -> list[tuple[str, str]]:
+ """Return (first, dup) heading pairs that normalize to the same key."""
+ seen: dict[str, str] = {}
+ dups: list[tuple[str, str]] = []
+ for c in cards:
+ key = normalize_heading(c["heading"])
+ if not key:
+ continue
+ if key in seen:
+ dups.append((seen[key], c["heading"]))
+ else:
+ seen[key] = c["heading"]
+ return dups
+
+
def main() -> int:
if len(sys.argv) != 2:
print(f"usage: {sys.argv[0]} <file.org>", file=sys.stderr)
@@ -65,84 +207,78 @@ def main() -> int:
title = m.group(1).strip()
break
- cards: list[tuple[str, bool, bool]] = [] # (heading, has_id, has_answer_subheader)
- answer_count = 0
- prop_count = 0
+ cards, prop_count = parse_cards(lines)
- i = 0
- while i < len(lines):
- m = CARD_RE.match(lines[i])
- if m:
- heading = m.group(1).strip()
- i += 1
- has_id = False
- has_answer = False
- in_drawer = False
- while i < len(lines):
- line = lines[i]
- if line.startswith("* ") or CARD_RE.match(line):
- break
- if PROP_START_RE.match(line):
- prop_count += 1
- in_drawer = True
- elif in_drawer and PROP_END_RE.match(line):
- in_drawer = False
- elif in_drawer and ID_RE.match(line):
- has_id = True
- elif ANSWER_RE.match(line):
- answer_count += 1
- has_answer = True
- i += 1
- cards.append((heading, has_id, has_answer))
- continue
- i += 1
-
- not_prompt = [h for h, _, _ in cards if not is_prompt_form(h)]
- no_id = [h for h, has_id, _ in cards if not has_id]
+ no_id = [c["heading"] for c in cards if not c["has_id"]]
+ not_prompt = [c["heading"] for c in cards if not is_prompt_form(c["heading"])]
+ answer_count = sum(1 for c in cards if c["has_answer"])
+ leaky = [c["heading"] for c in cards
+ if leakage_ratio(c["heading"], c["body"]) >= LEAKAGE_RATIO]
+ dups = find_duplicate_fronts(cards)
+ overloaded = [c["heading"] for c in cards if back_word_count(c["body"]) > BACK_WORD_LIMIT]
+ listy = [c["heading"] for c in cards if is_list_back(c["body"])]
+ binary = [c["heading"] for c in cards if is_binary_prompt(c["heading"])]
print(f"{path.name} — drill deck stats")
print()
- title_display = title if title else "(no #+TITLE)"
- print(f"Deck title: {title_display}")
+ print(f"Deck title: {title if title else '(no #+TITLE)'}")
print(f"Cards: {len(cards)}")
drawer_status = "match" if prop_count == len(cards) else f"mismatch (expected {len(cards)})"
print(f"PROPERTIES drawers: {prop_count} ({drawer_status})")
- answer_status = "clean" if answer_count == 0 else "workflow violation"
- print(f"*** Answer sub-headers: {answer_count} ({answer_status})")
+ print(f"*** Answer sub-headers: {answer_count} ({'clean' if answer_count == 0 else 'workflow violation'})")
print(f"Cards missing :ID:: {len(no_id)}")
print(f"Cards with non-prompt heading: {len(not_prompt)}")
+ print(f"Cards with possible answer leakage: {len(leaky)}")
+ print(f"Duplicate / near-duplicate fronts: {len(dups)}")
print()
warnings = 0
- if title is None:
+
+ def emit_list(items: list[str]) -> None:
+ for h in items[:5]:
+ print(f" - {h}")
+ if len(items) > 5:
+ print(f" - ... and {len(items) - 5} more")
+
+ def warn(msg: str, items: list[str] | None = None) -> None:
+ nonlocal warnings
warnings += 1
- print("WARN: no #+TITLE: line found; deck name will fall back to the file basename")
+ print(f"WARN: {msg}")
+ if items:
+ emit_list(items)
+
+ def note(msg: str, items: list[str] | None = None) -> None:
+ print(f"NOTE: {msg}")
+ if items:
+ emit_list(items)
+
+ if title is None:
+ warn("no #+TITLE: line found; deck name will fall back to the file basename")
elif SOURCE_TOOL_RE.search(title):
- warnings += 1
- print(f"WARN: #+TITLE contains source-tool jargon ('{title}'); the deck name shows in Anki — drop 'Org-Drill' for a name that reads well on the consumption side")
+ warn(f"#+TITLE contains source-tool jargon ('{title}'); the deck name shows in Anki — drop 'Org-Drill' for a name that reads well on the consumption side")
if answer_count:
- warnings += 1
- print(f"WARN: {answer_count} cards have *** Answer sub-headers (drop per drill-deck-review.org)")
+ warn(f"{answer_count} cards have *** Answer sub-headers (drop per drill-deck-review.org)")
if prop_count != len(cards):
- warnings += 1
- print(f"WARN: PROPERTIES count {prop_count} does not match card count {len(cards)}")
+ warn(f"PROPERTIES count {prop_count} does not match card count {len(cards)}")
if no_id:
- warnings += 1
- print(f"WARN: {len(no_id)} cards missing :ID:; losing identity risks SRS-state loss across rewrites")
- for h in no_id[:5]:
- print(f" - {h}")
- if len(no_id) > 5:
- print(f" - ... and {len(no_id) - 5} more")
+ warn(f"{len(no_id)} cards missing :ID:; losing identity risks SRS-state loss across rewrites", no_id)
if not_prompt:
- warnings += 1
- print(f"WARN: {len(not_prompt)} cards have non-prompt headings (no '?' and no imperative-verb start); likely topic-as-heading not yet rewritten")
- for h in not_prompt[:5]:
- print(f" - {h}")
- if len(not_prompt) > 5:
- print(f" - ... and {len(not_prompt) - 5} more")
+ warn(f"{len(not_prompt)} cards have non-prompt headings (no '?' and no imperative-verb start); likely topic-as-heading not yet rewritten", not_prompt)
+ if leaky:
+ warn(f"{len(leaky)} cards may leak their answer (question echoes >= {int(LEAKAGE_RATIO * 100)}% of its own answer's key words); reformulate so the answer is recalled, not recognized", leaky)
+ if dups:
+ warn(f"{len(dups)} duplicate / near-duplicate fronts (interference between confusable cards); disambiguate or merge",
+ [f"{a} == {b}" for a, b in dups])
+
+ if overloaded:
+ note(f"{len(overloaded)} cards have a long answer (> {BACK_WORD_LIMIT} words); candidates to split into atomic cards", overloaded)
+ if listy:
+ note(f"{len(listy)} cards have a list-shaped answer; enumeration cards recall poorly — candidates to split or use overlapping cloze", listy)
+ if binary:
+ note(f"{len(binary)} cards are binary (yes/no or 'A or B'); low retrieval effort — candidates to reformulate open-ended", binary)
if warnings == 0:
- print("clean")
+ print("clean (with non-blocking notes above)" if (overloaded or listy or binary) else "clean")
return 0
return 1
diff --git a/.ai/scripts/tests/test_drill_deck_stats.py b/.ai/scripts/tests/test_drill_deck_stats.py
index 3154d42..80b9913 100644
--- a/.ai/scripts/tests/test_drill_deck_stats.py
+++ b/.ai/scripts/tests/test_drill_deck_stats.py
@@ -132,3 +132,174 @@ def test_cli_properties_count_mismatch_warns_and_exits_one(tmp_path):
r = _run(f)
assert r.returncode == 1
assert "does not match card count" in r.stdout
+
+
+# --- content_words / leakage_ratio (pure) ---
+
+def test_content_words_drops_stopwords_and_short_tokens(stats):
+ assert stats.content_words("What is the LEO regime?") == {"leo", "regime"}
+
+
+def test_leakage_ratio_high_when_answer_restates_question(stats):
+ ratio = stats.leakage_ratio(
+ "primary orbital regimes satellites",
+ "the primary orbital regimes for satellites are listed",
+ )
+ assert ratio == 1.0
+
+
+def test_leakage_ratio_zero_for_short_question(stats):
+ # "LEO" is the only content word, below LEAKAGE_MIN_WORDS, so overlap is noise.
+ assert stats.leakage_ratio("What is LEO?", "LEO means low earth orbit") == 0.0
+
+
+# --- normalize_heading (pure) ---
+
+def test_normalize_heading_lowercases_and_strips_punctuation(stats):
+ assert stats.normalize_heading(" What is L.E.O.? ") == "what is l e o"
+
+
+def test_normalize_heading_collisions_match(stats):
+ assert stats.normalize_heading("What is LEO?") == stats.normalize_heading("what is leo")
+
+
+# --- is_binary_prompt (pure) ---
+
+def test_is_binary_prompt_true_for_yes_no_lead(stats):
+ assert stats.is_binary_prompt("Is LEO below GEO?") is True
+
+
+def test_is_binary_prompt_true_for_a_or_b(stats):
+ assert stats.is_binary_prompt("Is it LEO or GEO?") is True
+
+
+def test_is_binary_prompt_false_for_open_question(stats):
+ assert stats.is_binary_prompt("What distinguishes LEO from GEO?") is False
+
+
+# --- back_word_count / is_list_back (pure) ---
+
+def test_back_word_count(stats):
+ assert stats.back_word_count("one two three") == 3
+ assert stats.back_word_count("") == 0
+
+
+def test_is_list_back_true_for_bulleted_body(stats):
+ assert stats.is_list_back("- LEO\n- MEO\n- GEO") is True
+
+
+def test_is_list_back_false_for_prose(stats):
+ assert stats.is_list_back("Low Earth Orbit.\nThe closest regime.") is False
+
+
+def test_is_list_back_false_for_single_bullet(stats):
+ assert stats.is_list_back("- only one bullet\nplain prose line") is False
+
+
+# --- parse_cards (pure) ---
+
+def test_parse_cards_captures_body_without_drawer_planning_or_answer_header(stats):
+ text = (
+ "* Sec\n"
+ "** Q one? :drill:\n"
+ ":PROPERTIES:\n:ID: id-1\n:END:\n"
+ "SCHEDULED: <2026-05-20 Wed>\n"
+ "*** Answer\n"
+ "the real answer\n"
+ )
+ cards, prop_count = stats.parse_cards(text.splitlines())
+ assert prop_count == 1
+ assert len(cards) == 1
+ c = cards[0]
+ assert c["heading"] == "Q one?"
+ assert c["has_id"] is True
+ assert c["has_answer"] is True
+ assert c["body"] == "the real answer"
+
+
+def test_find_duplicate_fronts_matches_normalized_headings(stats):
+ cards = [
+ {"heading": "What is LEO?"},
+ {"heading": "what is leo?"},
+ {"heading": "What is GEO?"},
+ ]
+ dups = stats.find_duplicate_fronts(cards)
+ assert len(dups) == 1
+ assert dups[0] == ("What is LEO?", "what is leo?")
+
+
+# --- CLI: new blocking checks ---
+
+LEAKY_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** What are the primary orbital regimes for satellites? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+The primary orbital regimes for satellites are listed here.
+"""
+
+DUP_FRONT_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** What is LEO? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+Low Earth Orbit.
+** What is LEO? :drill:
+:PROPERTIES:
+:ID: c2
+:END:
+Low Earth Orbit, restated.
+"""
+
+
+def test_cli_answer_leakage_warns_and_exits_one(tmp_path):
+ f = tmp_path / "leaky.org"
+ f.write_text(LEAKY_DECK)
+ r = _run(f)
+ assert r.returncode == 1
+ assert "leak" in r.stdout.lower()
+
+
+def test_cli_duplicate_front_warns_and_exits_one(tmp_path):
+ f = tmp_path / "dup.org"
+ f.write_text(DUP_FRONT_DECK)
+ r = _run(f)
+ assert r.returncode == 1
+ assert "duplicate" in r.stdout.lower()
+
+
+# --- CLI: non-blocking NOTEs keep exit 0 ---
+
+NOTES_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** Is LEO closer than GEO? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+Yes, much closer.
+** What orbital regimes exist? :drill:
+:PROPERTIES:
+:ID: c2
+:END:
+- LEO
+- MEO
+- GEO
+** Describe the platform elements in full :drill:
+:PROPERTIES:
+:ID: c3
+:END:
+The platform carries power generation, propulsion, attitude control, thermal regulation, and radio hardware arranged around a central frame. Each element draws from shared resources and must survive launch loads, vacuum, and radiation. Engineers trade mass against capability when every kilogram raises cost, so redundancy is added only where a single failure would end the mission entirely and cheaper options cannot cover the same risk.
+"""
+
+
+def test_cli_non_blocking_notes_keep_exit_zero(tmp_path):
+ f = tmp_path / "notes.org"
+ f.write_text(NOTES_DECK)
+ r = _run(f)
+ assert r.returncode == 0
+ assert "NOTE" in r.stdout