aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-x.ai/scripts/drill-deck-stats.py276
-rw-r--r--.ai/scripts/tests/test_drill_deck_stats.py171
-rw-r--r--.ai/workflows/drill-deck-review.org38
-rwxr-xr-xclaude-templates/.ai/scripts/drill-deck-stats.py276
-rw-r--r--claude-templates/.ai/scripts/tests/test_drill_deck_stats.py171
-rw-r--r--claude-templates/.ai/workflows/drill-deck-review.org38
6 files changed, 826 insertions, 144 deletions
diff --git a/.ai/scripts/drill-deck-stats.py b/.ai/scripts/drill-deck-stats.py
index 72d1cde..d0707e2 100755
--- a/.ai/scripts/drill-deck-stats.py
+++ b/.ai/scripts/drill-deck-stats.py
@@ -1,15 +1,28 @@
#!/usr/bin/env python3
-"""Inventory + workflow-violation warnings for an org-drill deck source file.
+"""Inventory + authoring-quality checks for an org-drill deck source file.
-Reports counts and flags violations:
-- Total cards (depth-2 `:drill:` headings)
-- PROPERTIES drawer count (should match card count)
-- `*** Answer` sub-header count (should be 0 per drill-deck-review.org)
-- Cards missing :ID: (loses identity across versions, risks SRS-state loss)
-- Cards whose heading lacks `?` (likely a topic-as-heading not yet rewritten)
+Reports counts and flags two tiers of issue.
-Exits 0 when clean, 1 when any warnings are present. Use as a gate before
-regenerating the Anki deck or running drill-deck-sync.
+Blocking WARNs (exit 1):
+- PROPERTIES drawer count not matching card count
+- Cards missing :ID: (risks SRS-state loss across rewrites)
+- `*** Answer` sub-headers (should be 0 per drill-deck-review.org)
+- Non-prompt headings (topic-as-heading not yet rewritten)
+- #+TITLE missing, or carrying source-tool jargon ("org-drill")
+- Answer leakage: a card whose question echoes most of its own answer
+- Duplicate / near-duplicate fronts (interference between confusable cards)
+
+Non-blocking NOTEs (exit unaffected):
+- Overloaded backs (long answer — candidate to split into atomic cards)
+- List-shaped backs (enumeration — candidate to split or use overlapping cloze)
+- Binary yes/no prompts (low retrieval effort — candidate to reformulate)
+
+Exits 0 when no blocking warnings are present, 1 otherwise, 2 on bad usage.
+Use as a gate before regenerating the Anki deck or running drill-deck-sync.
+
+The fuzzy checks (leakage, duplicate, overloaded) are tuned by the LEAKAGE_*
+and BACK_WORD_LIMIT constants below; loosen them if a real deck trips false
+positives.
Usage:
drill-deck-stats.py <file.org>
@@ -27,16 +40,35 @@ PROP_END_RE = re.compile(r"^\s*:END:\s*$")
ID_RE = re.compile(r"^\s*:ID:\s+(\S+)\s*$")
TITLE_RE = re.compile(r"^#\+TITLE:\s*(.+?)\s*$", re.IGNORECASE)
SOURCE_TOOL_RE = re.compile(r"\borg[-\s]?drill\b", re.IGNORECASE)
+PLANNING_RE = re.compile(r"^\s*(SCHEDULED|DEADLINE|CLOSED):\s")
+BULLET_RE = re.compile(r"^\s*([-+*]|\d+[.)])\s+")
+BINARY_LEAD_RE = re.compile(
+ r"^\s*(is|are|was|were|does|do|did|can|could|should|would|will|has|have|had)\b",
+ re.IGNORECASE,
+)
-# A heading qualifies as "prompt form" if it contains `?` or starts with
-# one of these imperative verbs (directive prompts like "Spell these out"
-# and "Introduce yourself" are valid even without `?`).
+# A heading qualifies as "prompt form" if it contains `?` or starts with one of
+# these imperative verbs (directive prompts like "Spell these out" and
+# "Introduce yourself" are valid even without `?`).
IMPERATIVE_VERBS = frozenset({
"spell", "describe", "explain", "name", "list", "give",
"show", "tell", "define", "compare", "identify", "outline",
"introduce", "walk", "state", "recite", "recall", "summarize",
})
+# Function words ignored when comparing a question against its answer.
+STOPWORDS = frozenset({
+ "the", "a", "an", "is", "are", "was", "were", "of", "to", "in", "on",
+ "for", "and", "or", "with", "what", "who", "whom", "when", "where", "why",
+ "how", "which", "does", "do", "did", "tell", "me", "about", "their", "this",
+ "that", "it", "as", "at", "by", "be", "your", "you", "they", "them",
+})
+
+# Tuning knobs for the fuzzy checks.
+LEAKAGE_RATIO = 0.8 # share of a question's content words echoed in its answer
+LEAKAGE_MIN_WORDS = 3 # ignore very short questions, where overlap is noise
+BACK_WORD_LIMIT = 60 # words on a card back before it's flagged as overloaded
+
def is_prompt_form(heading: str) -> bool:
"""True if the heading reads as a question or imperative prompt."""
@@ -46,6 +78,116 @@ def is_prompt_form(heading: str) -> bool:
return first_word in IMPERATIVE_VERBS
+def content_words(text: str) -> set[str]:
+ """Lowercased alphanumeric tokens of length >= 3, minus stopwords."""
+ return {w for w in re.findall(r"[a-z0-9]+", text.lower())
+ if len(w) >= 3 and w not in STOPWORDS}
+
+
+def leakage_ratio(heading: str, body: str) -> float:
+ """Fraction of the question's content words that reappear in the answer.
+
+ A high ratio means the answer is largely restated in the question, so the
+ card can be answered by recognition rather than recall. Returns 0.0 for a
+ question with fewer than LEAKAGE_MIN_WORDS content words, where overlap is
+ just noise.
+ """
+ hw = content_words(heading)
+ if len(hw) < LEAKAGE_MIN_WORDS:
+ return 0.0
+ return len(hw & content_words(body)) / len(hw)
+
+
+def normalize_heading(heading: str) -> str:
+ """Collapse a heading to a comparison key (lowercase, alnum + single spaces)."""
+ return re.sub(r"\s+", " ", re.sub(r"[^a-z0-9 ]", " ", heading.lower())).strip()
+
+
+def is_binary_prompt(heading: str) -> bool:
+ """True for yes/no or 'A or B' prompts, which need little retrieval effort."""
+ if BINARY_LEAD_RE.match(heading):
+ return True
+ return bool(re.search(r"\bor\b", heading, re.IGNORECASE)) and heading.rstrip().endswith("?")
+
+
+def back_word_count(body: str) -> int:
+ return len(body.split())
+
+
+def is_list_back(body: str) -> bool:
+ """True if the answer body is mostly an org list (an enumeration card)."""
+ lines = [ln for ln in body.splitlines() if ln.strip()]
+ if len(lines) < 2:
+ return False
+ bullets = sum(1 for ln in lines if BULLET_RE.match(ln))
+ return bullets >= 2 and bullets * 2 >= len(lines)
+
+
+def parse_cards(lines: list[str]) -> tuple[list[dict], int]:
+ """Parse :drill: cards from org lines.
+
+ Returns (cards, prop_count). Each card is a dict with heading, has_id,
+ has_answer, and body (the answer text with PROPERTIES drawers, planning
+ lines, and `*** Answer` headers removed, approximating the rendered back).
+ """
+ cards: list[dict] = []
+ prop_count = 0
+ i = 0
+ n = len(lines)
+ while i < n:
+ m = CARD_RE.match(lines[i])
+ if not m:
+ i += 1
+ continue
+ heading = m.group(1).strip()
+ i += 1
+ has_id = False
+ has_answer = False
+ in_drawer = False
+ body_lines: list[str] = []
+ while i < n:
+ line = lines[i]
+ if line.startswith("* ") or CARD_RE.match(line):
+ break
+ if PROP_START_RE.match(line):
+ prop_count += 1
+ in_drawer = True
+ elif in_drawer and PROP_END_RE.match(line):
+ in_drawer = False
+ elif in_drawer:
+ if ID_RE.match(line):
+ has_id = True
+ elif ANSWER_RE.match(line):
+ has_answer = True
+ elif PLANNING_RE.match(line):
+ pass
+ else:
+ body_lines.append(line)
+ i += 1
+ cards.append({
+ "heading": heading,
+ "has_id": has_id,
+ "has_answer": has_answer,
+ "body": "\n".join(body_lines).strip(),
+ })
+ return cards, prop_count
+
+
+def find_duplicate_fronts(cards: list[dict]) -> list[tuple[str, str]]:
+ """Return (first, dup) heading pairs that normalize to the same key."""
+ seen: dict[str, str] = {}
+ dups: list[tuple[str, str]] = []
+ for c in cards:
+ key = normalize_heading(c["heading"])
+ if not key:
+ continue
+ if key in seen:
+ dups.append((seen[key], c["heading"]))
+ else:
+ seen[key] = c["heading"]
+ return dups
+
+
def main() -> int:
if len(sys.argv) != 2:
print(f"usage: {sys.argv[0]} <file.org>", file=sys.stderr)
@@ -65,84 +207,78 @@ def main() -> int:
title = m.group(1).strip()
break
- cards: list[tuple[str, bool, bool]] = [] # (heading, has_id, has_answer_subheader)
- answer_count = 0
- prop_count = 0
+ cards, prop_count = parse_cards(lines)
- i = 0
- while i < len(lines):
- m = CARD_RE.match(lines[i])
- if m:
- heading = m.group(1).strip()
- i += 1
- has_id = False
- has_answer = False
- in_drawer = False
- while i < len(lines):
- line = lines[i]
- if line.startswith("* ") or CARD_RE.match(line):
- break
- if PROP_START_RE.match(line):
- prop_count += 1
- in_drawer = True
- elif in_drawer and PROP_END_RE.match(line):
- in_drawer = False
- elif in_drawer and ID_RE.match(line):
- has_id = True
- elif ANSWER_RE.match(line):
- answer_count += 1
- has_answer = True
- i += 1
- cards.append((heading, has_id, has_answer))
- continue
- i += 1
-
- not_prompt = [h for h, _, _ in cards if not is_prompt_form(h)]
- no_id = [h for h, has_id, _ in cards if not has_id]
+ no_id = [c["heading"] for c in cards if not c["has_id"]]
+ not_prompt = [c["heading"] for c in cards if not is_prompt_form(c["heading"])]
+ answer_count = sum(1 for c in cards if c["has_answer"])
+ leaky = [c["heading"] for c in cards
+ if leakage_ratio(c["heading"], c["body"]) >= LEAKAGE_RATIO]
+ dups = find_duplicate_fronts(cards)
+ overloaded = [c["heading"] for c in cards if back_word_count(c["body"]) > BACK_WORD_LIMIT]
+ listy = [c["heading"] for c in cards if is_list_back(c["body"])]
+ binary = [c["heading"] for c in cards if is_binary_prompt(c["heading"])]
print(f"{path.name} — drill deck stats")
print()
- title_display = title if title else "(no #+TITLE)"
- print(f"Deck title: {title_display}")
+ print(f"Deck title: {title if title else '(no #+TITLE)'}")
print(f"Cards: {len(cards)}")
drawer_status = "match" if prop_count == len(cards) else f"mismatch (expected {len(cards)})"
print(f"PROPERTIES drawers: {prop_count} ({drawer_status})")
- answer_status = "clean" if answer_count == 0 else "workflow violation"
- print(f"*** Answer sub-headers: {answer_count} ({answer_status})")
+ print(f"*** Answer sub-headers: {answer_count} ({'clean' if answer_count == 0 else 'workflow violation'})")
print(f"Cards missing :ID:: {len(no_id)}")
print(f"Cards with non-prompt heading: {len(not_prompt)}")
+ print(f"Cards with possible answer leakage: {len(leaky)}")
+ print(f"Duplicate / near-duplicate fronts: {len(dups)}")
print()
warnings = 0
- if title is None:
+
+ def emit_list(items: list[str]) -> None:
+ for h in items[:5]:
+ print(f" - {h}")
+ if len(items) > 5:
+ print(f" - ... and {len(items) - 5} more")
+
+ def warn(msg: str, items: list[str] | None = None) -> None:
+ nonlocal warnings
warnings += 1
- print("WARN: no #+TITLE: line found; deck name will fall back to the file basename")
+ print(f"WARN: {msg}")
+ if items:
+ emit_list(items)
+
+ def note(msg: str, items: list[str] | None = None) -> None:
+ print(f"NOTE: {msg}")
+ if items:
+ emit_list(items)
+
+ if title is None:
+ warn("no #+TITLE: line found; deck name will fall back to the file basename")
elif SOURCE_TOOL_RE.search(title):
- warnings += 1
- print(f"WARN: #+TITLE contains source-tool jargon ('{title}'); the deck name shows in Anki — drop 'Org-Drill' for a name that reads well on the consumption side")
+ warn(f"#+TITLE contains source-tool jargon ('{title}'); the deck name shows in Anki — drop 'Org-Drill' for a name that reads well on the consumption side")
if answer_count:
- warnings += 1
- print(f"WARN: {answer_count} cards have *** Answer sub-headers (drop per drill-deck-review.org)")
+ warn(f"{answer_count} cards have *** Answer sub-headers (drop per drill-deck-review.org)")
if prop_count != len(cards):
- warnings += 1
- print(f"WARN: PROPERTIES count {prop_count} does not match card count {len(cards)}")
+ warn(f"PROPERTIES count {prop_count} does not match card count {len(cards)}")
if no_id:
- warnings += 1
- print(f"WARN: {len(no_id)} cards missing :ID:; losing identity risks SRS-state loss across rewrites")
- for h in no_id[:5]:
- print(f" - {h}")
- if len(no_id) > 5:
- print(f" - ... and {len(no_id) - 5} more")
+ warn(f"{len(no_id)} cards missing :ID:; losing identity risks SRS-state loss across rewrites", no_id)
if not_prompt:
- warnings += 1
- print(f"WARN: {len(not_prompt)} cards have non-prompt headings (no '?' and no imperative-verb start); likely topic-as-heading not yet rewritten")
- for h in not_prompt[:5]:
- print(f" - {h}")
- if len(not_prompt) > 5:
- print(f" - ... and {len(not_prompt) - 5} more")
+ warn(f"{len(not_prompt)} cards have non-prompt headings (no '?' and no imperative-verb start); likely topic-as-heading not yet rewritten", not_prompt)
+ if leaky:
+ warn(f"{len(leaky)} cards may leak their answer (question echoes >= {int(LEAKAGE_RATIO * 100)}% of its own answer's key words); reformulate so the answer is recalled, not recognized", leaky)
+ if dups:
+ warn(f"{len(dups)} duplicate / near-duplicate fronts (interference between confusable cards); disambiguate or merge",
+ [f"{a} == {b}" for a, b in dups])
+
+ if overloaded:
+ note(f"{len(overloaded)} cards have a long answer (> {BACK_WORD_LIMIT} words); candidates to split into atomic cards", overloaded)
+ if listy:
+ note(f"{len(listy)} cards have a list-shaped answer; enumeration cards recall poorly — candidates to split or use overlapping cloze", listy)
+ if binary:
+ note(f"{len(binary)} cards are binary (yes/no or 'A or B'); low retrieval effort — candidates to reformulate open-ended", binary)
if warnings == 0:
- print("clean")
+ print("clean (with non-blocking notes above)" if (overloaded or listy or binary) else "clean")
return 0
return 1
diff --git a/.ai/scripts/tests/test_drill_deck_stats.py b/.ai/scripts/tests/test_drill_deck_stats.py
index 3154d42..80b9913 100644
--- a/.ai/scripts/tests/test_drill_deck_stats.py
+++ b/.ai/scripts/tests/test_drill_deck_stats.py
@@ -132,3 +132,174 @@ def test_cli_properties_count_mismatch_warns_and_exits_one(tmp_path):
r = _run(f)
assert r.returncode == 1
assert "does not match card count" in r.stdout
+
+
+# --- content_words / leakage_ratio (pure) ---
+
+def test_content_words_drops_stopwords_and_short_tokens(stats):
+ assert stats.content_words("What is the LEO regime?") == {"leo", "regime"}
+
+
+def test_leakage_ratio_high_when_answer_restates_question(stats):
+ ratio = stats.leakage_ratio(
+ "primary orbital regimes satellites",
+ "the primary orbital regimes for satellites are listed",
+ )
+ assert ratio == 1.0
+
+
+def test_leakage_ratio_zero_for_short_question(stats):
+ # "LEO" is the only content word, below LEAKAGE_MIN_WORDS, so overlap is noise.
+ assert stats.leakage_ratio("What is LEO?", "LEO means low earth orbit") == 0.0
+
+
+# --- normalize_heading (pure) ---
+
+def test_normalize_heading_lowercases_and_strips_punctuation(stats):
+ assert stats.normalize_heading(" What is L.E.O.? ") == "what is l e o"
+
+
+def test_normalize_heading_collisions_match(stats):
+ assert stats.normalize_heading("What is LEO?") == stats.normalize_heading("what is leo")
+
+
+# --- is_binary_prompt (pure) ---
+
+def test_is_binary_prompt_true_for_yes_no_lead(stats):
+ assert stats.is_binary_prompt("Is LEO below GEO?") is True
+
+
+def test_is_binary_prompt_true_for_a_or_b(stats):
+ assert stats.is_binary_prompt("Is it LEO or GEO?") is True
+
+
+def test_is_binary_prompt_false_for_open_question(stats):
+ assert stats.is_binary_prompt("What distinguishes LEO from GEO?") is False
+
+
+# --- back_word_count / is_list_back (pure) ---
+
+def test_back_word_count(stats):
+ assert stats.back_word_count("one two three") == 3
+ assert stats.back_word_count("") == 0
+
+
+def test_is_list_back_true_for_bulleted_body(stats):
+ assert stats.is_list_back("- LEO\n- MEO\n- GEO") is True
+
+
+def test_is_list_back_false_for_prose(stats):
+ assert stats.is_list_back("Low Earth Orbit.\nThe closest regime.") is False
+
+
+def test_is_list_back_false_for_single_bullet(stats):
+ assert stats.is_list_back("- only one bullet\nplain prose line") is False
+
+
+# --- parse_cards (pure) ---
+
+def test_parse_cards_captures_body_without_drawer_planning_or_answer_header(stats):
+ text = (
+ "* Sec\n"
+ "** Q one? :drill:\n"
+ ":PROPERTIES:\n:ID: id-1\n:END:\n"
+ "SCHEDULED: <2026-05-20 Wed>\n"
+ "*** Answer\n"
+ "the real answer\n"
+ )
+ cards, prop_count = stats.parse_cards(text.splitlines())
+ assert prop_count == 1
+ assert len(cards) == 1
+ c = cards[0]
+ assert c["heading"] == "Q one?"
+ assert c["has_id"] is True
+ assert c["has_answer"] is True
+ assert c["body"] == "the real answer"
+
+
+def test_find_duplicate_fronts_matches_normalized_headings(stats):
+ cards = [
+ {"heading": "What is LEO?"},
+ {"heading": "what is leo?"},
+ {"heading": "What is GEO?"},
+ ]
+ dups = stats.find_duplicate_fronts(cards)
+ assert len(dups) == 1
+ assert dups[0] == ("What is LEO?", "what is leo?")
+
+
+# --- CLI: new blocking checks ---
+
+LEAKY_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** What are the primary orbital regimes for satellites? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+The primary orbital regimes for satellites are listed here.
+"""
+
+DUP_FRONT_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** What is LEO? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+Low Earth Orbit.
+** What is LEO? :drill:
+:PROPERTIES:
+:ID: c2
+:END:
+Low Earth Orbit, restated.
+"""
+
+
+def test_cli_answer_leakage_warns_and_exits_one(tmp_path):
+ f = tmp_path / "leaky.org"
+ f.write_text(LEAKY_DECK)
+ r = _run(f)
+ assert r.returncode == 1
+ assert "leak" in r.stdout.lower()
+
+
+def test_cli_duplicate_front_warns_and_exits_one(tmp_path):
+ f = tmp_path / "dup.org"
+ f.write_text(DUP_FRONT_DECK)
+ r = _run(f)
+ assert r.returncode == 1
+ assert "duplicate" in r.stdout.lower()
+
+
+# --- CLI: non-blocking NOTEs keep exit 0 ---
+
+NOTES_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** Is LEO closer than GEO? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+Yes, much closer.
+** What orbital regimes exist? :drill:
+:PROPERTIES:
+:ID: c2
+:END:
+- LEO
+- MEO
+- GEO
+** Describe the platform elements in full :drill:
+:PROPERTIES:
+:ID: c3
+:END:
+The platform carries power generation, propulsion, attitude control, thermal regulation, and radio hardware arranged around a central frame. Each element draws from shared resources and must survive launch loads, vacuum, and radiation. Engineers trade mass against capability when every kilogram raises cost, so redundancy is added only where a single failure would end the mission entirely and cheaper options cannot cover the same risk.
+"""
+
+
+def test_cli_non_blocking_notes_keep_exit_zero(tmp_path):
+ f = tmp_path / "notes.org"
+ f.write_text(NOTES_DECK)
+ r = _run(f)
+ assert r.returncode == 0
+ assert "NOTE" in r.stdout
diff --git a/.ai/workflows/drill-deck-review.org b/.ai/workflows/drill-deck-review.org
index 7e9eed5..fe12f3c 100644
--- a/.ai/workflows/drill-deck-review.org
+++ b/.ai/workflows/drill-deck-review.org
@@ -8,6 +8,8 @@ Take an org-drill flashcard file and bring it into the canonical shape — every
The workflow has three substantive passes (question-form audit, content-accuracy audit, source rewrite) followed by a mechanical regenerate-and-place step. Content review is dispatched to a subagent because it's bounded research across project source-of-truth files; the structural rewrite stays in the main thread because it touches the SRS state we don't want to lose. Three helper scripts (=drill-deck-stats.py=, =drill-deck-diff-ids.py=, =drill-deck-sync=) automate the inventory, the safety check, and the regenerate-and-place.
+*Scheduling lives on the Anki side.* Desired retention and the FSRS scheduling model are per-deck Anki options set on the phone, never controlled by the org source or =drill-to-anki.py=. The pipeline's only scheduling job is keeping each card's identity (the =:ID:=-derived GUID) stable so Anki's review history survives a rewrite. Don't try to encode retention, intervals, or org-drill's SM-2 state into the Anki output — the two schedulers are separate, and the import carries only card content plus identity. (Anki's desired-retention default is 90%; see [[https://docs.ankiweb.net/deck-options.html][the deck-options manual]].)
+
* When to Use This Workflow
Trigger phrases:
@@ -88,6 +90,8 @@ Format: "Who is X? Tell me about their Y." where X is a role descriptor that doe
Note: pick a role descriptor that genuinely identifies one person. If multiple people share the role description, add a single distinguishing detail (e.g., "the one who works evenings", "the Vineti alum"). Don't pile on parentheticals.
+ Splitting: the person card deliberately trades atomicity for narrative recall — one card carries identity plus several attributes. When a body bundles genuinely unrelated attributes (role, employment history, limitations, scope) rather than one coherent topic, split it into multiple cards. One inherits the existing =:ID:= (and its SRS history); each new sibling starts fresh and will correctly show in =drill-deck-diff-ids.py= as an appeared ID. The criterion: split when the body reads as a list of separate facts, keep it whole when it reads as one story. (Minimum-information principle — Wozniak rule 4, Matuschak "Focused".)
+
*** Talking-points and directive cards
Already in prompt form ("Introduce Yourself", "Spell out these orbital regime acronyms", "What is DeepSat?"). Leave the heading alone. Still strip the =*** Answer= sub-header and audit the body content for staleness.
@@ -100,6 +104,26 @@ The =drill-deck-stats.py= helper recognizes both =?=-form and imperative-verb fo
- *PROPERTIES drawer stays.* Org-drill needs the =:ID:=, =:DRILL_LAST_INTERVAL:=, =:DRILL_EASE:= etc. for SRS state. The Anki output strips it (see the script change).
- *=SCHEDULED:= / =DEADLINE:= planning lines stay.* Same reason. The Anki output strips them.
+* Card Authoring Principles
+
+The canonical shapes above are the house style; these are the reasons behind them, drawn from the spaced-repetition literature. =drill-deck-stats.py= checks the mechanical ones; the rest guide the rewrite and the content pass.
+
+- *One fact per card (minimum information principle).* A card should test a single retrievable connection. A back that bundles several independent facts gets partially recalled and burns repetitions on the parts you already know. When a body covers unrelated attributes, split it into separate cards. =drill-deck-stats.py= flags long backs as a non-blocking NOTE.
+
+- *Demand recall, not recognition (effortful retrieval).* Pulling the answer from memory is what strengthens it, so the question must not let you infer the answer from its own wording. This is why person headings never name the person, and why a question that restates its own answer is a defect. =drill-deck-stats.py= flags high front/back word overlap as answer leakage.
+
+- *Avoid binary prompts.* "Is X true?" and "A or B?" allow a coin-flip guess and produce shallow understanding. Reformulate open-ended — "How does X affect Y?" beats "Does X affect Y?" Flagged as a non-blocking NOTE.
+
+- *Avoid lists and enumerations.* Unordered sets past about five members, and long lists, recall poorly as a single card. Split the list across cards (overlapping cloze is the textbook alternative, but this pipeline has no cloze shape, so split instead). List-shaped backs are flagged as a non-blocking NOTE.
+
+- *Make cues precise.* A vague question admits several reasonable answers, so you can't tell whether you knew the intended one. Include enough context that only the intended answer fits, without narrowing into provincial trivia.
+
+- *Combat interference.* Confusable cards inhibit each other; two near-identical fronts are the worst case. Disambiguate them with distinguishing context, or merge them. =drill-deck-stats.py= flags duplicate / near-duplicate fronts.
+
+- *Understand before you memorize.* Cards are the last step, after the material is understood and structured. A card you can't explain is a leech waiting to happen.
+
+Sources: Wozniak's [[https://www.supermemo.com/en/blog/twenty-rules-of-formulating-knowledge][Twenty rules of formulating knowledge]], Andy Matuschak's [[https://andymatuschak.org/prompts/][How to write good prompts]], Michael Nielsen's [[https://augmentingcognition.com/ltm.html][Augmenting Long-term Memory]], and the [[https://docs.ankiweb.net/][Anki manual]].
+
* Approach: Phases
** Phase A: Question-form + title audit (per card and per file)
@@ -110,7 +134,7 @@ Run =drill-deck-stats.py= on the source first to get the structural inventory:
.ai/scripts/drill-deck-stats.py <source.org>
#+end_src
-The script reports the deck title from =#+TITLE:= (and flags it if it contains source-tool jargon like "Org-Drill"), card count, PROPERTIES-drawer count, =*** Answer= sub-header count, cards missing =:ID:=, and cards whose heading is neither =?=-form nor an imperative-verb prompt. Each surfaced card is a candidate for the rewrite, plus the title itself if flagged.
+The script reports the deck title from =#+TITLE:= (and flags it if it contains source-tool jargon like "Org-Drill"), card count, PROPERTIES-drawer count, =*** Answer= sub-header count, cards missing =:ID:=, and cards whose heading is neither =?=-form nor an imperative-verb prompt. It also flags possible answer leakage and duplicate / near-duplicate fronts (both blocking), and surfaces non-blocking NOTEs for overloaded, list-shaped, or binary cards. Each surfaced card is a candidate for the rewrite, plus the title itself if flagged.
For each candidate, propose the new heading in advance so Phase C is mechanical. For person cards, the proposal is the role descriptor + topical anchor pair. For acronym/concept cards, the proposal is the existing body question promoted to the heading.
@@ -136,6 +160,7 @@ Categories to look for:
- Project facts: milestone shifts, submission states, exercise / demo dates
- External contacts: title or affiliation changes
- Company facts: head count, funding, customer status
+- Removable cards: trivia not worth memorizing, or a fact whose underlying source no longer appears in any source-of-truth doc (flag as a deletion candidate, not a rewrite)
Skip cards where you find no staleness. Cap at 2,000 words.
#+end_example
@@ -144,6 +169,8 @@ Include any user-supplied seed fixes in the dispatch (e.g., "Vrezh is now full-t
Output of Phase B: a structured per-card list of content updates with confidence levels. High-confidence findings get baked in during Phase C. Medium-confidence findings are reviewed inline before baking. Low-confidence findings are surfaced but skipped unless the user calls them in.
+*Removal and leeches.* Two dispositions beyond rewrite. (1) Cost-benefit removal: a card flagged as removable is a deletion candidate — weigh whether the fact clears a "worth memorizing" bar before keeping it. (2) Leech feedback: when Anki suspends a card as a leech (8 lapses by default), the card's formulation is the problem, not the review effort; route it back through Phase B/C as a reformulation target, preserving its =:ID:= so Anki keeps the lapse history. The org → Anki flow is one-directional: leech tags, lapse counts, and per-card success rates live in Anki and never flow back to the source, so these signals are carried in by hand. (Anki [[https://docs.ankiweb.net/leeches.html][leech]] guidance is "reformulate, don't grind".)
+
** Phase C: Source rewrite
Take Phase A's question-rewrite plan and Phase B's content-update list, apply them to the source file. Preserve every card's =:PROPERTIES:= drawer (especially =:ID:=) and =SCHEDULED:= line verbatim — those carry SRS state that must survive the rewrite.
@@ -217,10 +244,12 @@ The core converter. Reads an org-drill source file, emits a stable-ID Anki =.apk
** =drill-deck-stats.py=
-Inventory + workflow-violation warnings for a single deck source. Counts cards, PROPERTIES drawers, =*** Answer= sub-headers, cards missing =:ID:=, and cards whose heading is neither =?=-form nor an imperative-verb prompt. Exits 0 when clean, 1 when warnings present, so it gates =drill-deck-sync=.
+Inventory + authoring-quality checks for a single deck source. Counts cards, PROPERTIES drawers, =*** Answer= sub-headers, cards missing =:ID:=, and cards whose heading is neither =?=-form nor an imperative-verb prompt. It also checks authoring quality: answer leakage (front/back content-word overlap) and duplicate / near-duplicate fronts are blocking WARNs; overloaded backs, list-shaped backs, and binary prompts are non-blocking NOTEs. Exits 0 when no blocking warning is present, 1 otherwise, so it gates =drill-deck-sync=.
Imperative-verb allowlist: Spell, Describe, Explain, Name, List, Give, Show, Tell, Define, Compare, Identify, Outline, Introduce, Walk, State, Recite, Recall, Summarize.
+The fuzzy checks (leakage ratio, overloaded word count) are tuned by the =LEAKAGE_*= and =BACK_WORD_LIMIT= constants at the top of the script. Loosen them if a real deck trips false positives.
+
** =drill-deck-diff-ids.py=
SRS-state preservation check between two versions of a deck. Extracts every =:ID:= from each, reports IDs that disappeared (lost SRS state — worst-case bug) or appeared (new cards). Exits 0 when clean, 1 when any disappeared/appeared.
@@ -264,6 +293,8 @@ If you find the script doing something else, update the script before regenerati
5. *Skipping the content-accuracy pass.* The structural rewrite alone leaves stale facts in place. The drill cards become a memorization tool for the wrong information.
6. *Treating subagent output as gospel.* Medium- and low-confidence findings need human review before baking. The subagent surfaces; the main thread decides.
7. *Running =drill-deck-sync= without =--diff-against=.* The stats check still runs, but the SRS-state preservation check doesn't. On a rewrite of any size, pass =--diff-against /tmp/<name>-prerewrite.org= (grab from git first).
+8. *Answer leakage.* A question that restates its own answer tests recognition, not recall — the card looks learned when it isn't. =drill-deck-stats.py= flags high front/back word overlap.
+9. *Encoding scheduling in the source.* Retention, intervals, and FSRS state are Anki-side options; the org files and =drill-to-anki.py= carry only card content plus identity. See the scheduling note in the Overview.
* Living Document
@@ -284,3 +315,6 @@ After the first run, scripted the safety-net checks into three helpers: =drill-d
*** 2026-05-30: Title-audit added (same day)
Craig noticed the Anki deck name still showed as "DeepSat Org-Drill Flashcards" because the source =#+TITLE:= leaks tool-name jargon into Anki. Added a "Deck title" subsection under Canonical Card Shape, expanded Phase A to audit the title, and extended =drill-deck-stats.py= to flag any title matching =org[-\s]?drill= (case-insensitive). Stable-ID caveat documented: renaming the deck changes the Anki deck ID, so the next import lands as a new deck and the old one needs deleting from Anki.
+
+*** 2026-05-30: Authoring-quality checks + Card Authoring section (same day)
+Researched flashcard / spaced-repetition best practices (Wozniak's twenty rules, Matuschak's prompt-writing guide, Nielsen, the Anki manual, the FSRS docs) and folded the findings in. =drill-deck-stats.py= gained answer-leakage and duplicate-front checks (blocking), plus non-blocking NOTEs for overloaded backs, list-shaped backs, and binary prompts. Added a "Card Authoring Principles" section (the why behind the canonical shapes), a person-card splitting path, a Phase B cost-benefit-removal + leech-feedback disposition, and a scheduling-is-Anki-side note in the Overview. Deliberately not adopted, with reasons: cloze cards (would need a second note type and an authoring convention), per-card tractability targeting and FSRS-retention encoding (Anki-side telemetry that never flows back to the source), on-face source-stamping (the converter strips those drawers by design; provenance stays in the org layer).
diff --git a/claude-templates/.ai/scripts/drill-deck-stats.py b/claude-templates/.ai/scripts/drill-deck-stats.py
index 72d1cde..d0707e2 100755
--- a/claude-templates/.ai/scripts/drill-deck-stats.py
+++ b/claude-templates/.ai/scripts/drill-deck-stats.py
@@ -1,15 +1,28 @@
#!/usr/bin/env python3
-"""Inventory + workflow-violation warnings for an org-drill deck source file.
+"""Inventory + authoring-quality checks for an org-drill deck source file.
-Reports counts and flags violations:
-- Total cards (depth-2 `:drill:` headings)
-- PROPERTIES drawer count (should match card count)
-- `*** Answer` sub-header count (should be 0 per drill-deck-review.org)
-- Cards missing :ID: (loses identity across versions, risks SRS-state loss)
-- Cards whose heading lacks `?` (likely a topic-as-heading not yet rewritten)
+Reports counts and flags two tiers of issue.
-Exits 0 when clean, 1 when any warnings are present. Use as a gate before
-regenerating the Anki deck or running drill-deck-sync.
+Blocking WARNs (exit 1):
+- PROPERTIES drawer count not matching card count
+- Cards missing :ID: (risks SRS-state loss across rewrites)
+- `*** Answer` sub-headers (should be 0 per drill-deck-review.org)
+- Non-prompt headings (topic-as-heading not yet rewritten)
+- #+TITLE missing, or carrying source-tool jargon ("org-drill")
+- Answer leakage: a card whose question echoes most of its own answer
+- Duplicate / near-duplicate fronts (interference between confusable cards)
+
+Non-blocking NOTEs (exit unaffected):
+- Overloaded backs (long answer — candidate to split into atomic cards)
+- List-shaped backs (enumeration — candidate to split or use overlapping cloze)
+- Binary yes/no prompts (low retrieval effort — candidate to reformulate)
+
+Exits 0 when no blocking warnings are present, 1 otherwise, 2 on bad usage.
+Use as a gate before regenerating the Anki deck or running drill-deck-sync.
+
+The fuzzy checks (leakage, duplicate, overloaded) are tuned by the LEAKAGE_*
+and BACK_WORD_LIMIT constants below; loosen them if a real deck trips false
+positives.
Usage:
drill-deck-stats.py <file.org>
@@ -27,16 +40,35 @@ PROP_END_RE = re.compile(r"^\s*:END:\s*$")
ID_RE = re.compile(r"^\s*:ID:\s+(\S+)\s*$")
TITLE_RE = re.compile(r"^#\+TITLE:\s*(.+?)\s*$", re.IGNORECASE)
SOURCE_TOOL_RE = re.compile(r"\borg[-\s]?drill\b", re.IGNORECASE)
+PLANNING_RE = re.compile(r"^\s*(SCHEDULED|DEADLINE|CLOSED):\s")
+BULLET_RE = re.compile(r"^\s*([-+*]|\d+[.)])\s+")
+BINARY_LEAD_RE = re.compile(
+ r"^\s*(is|are|was|were|does|do|did|can|could|should|would|will|has|have|had)\b",
+ re.IGNORECASE,
+)
-# A heading qualifies as "prompt form" if it contains `?` or starts with
-# one of these imperative verbs (directive prompts like "Spell these out"
-# and "Introduce yourself" are valid even without `?`).
+# A heading qualifies as "prompt form" if it contains `?` or starts with one of
+# these imperative verbs (directive prompts like "Spell these out" and
+# "Introduce yourself" are valid even without `?`).
IMPERATIVE_VERBS = frozenset({
"spell", "describe", "explain", "name", "list", "give",
"show", "tell", "define", "compare", "identify", "outline",
"introduce", "walk", "state", "recite", "recall", "summarize",
})
+# Function words ignored when comparing a question against its answer.
+STOPWORDS = frozenset({
+ "the", "a", "an", "is", "are", "was", "were", "of", "to", "in", "on",
+ "for", "and", "or", "with", "what", "who", "whom", "when", "where", "why",
+ "how", "which", "does", "do", "did", "tell", "me", "about", "their", "this",
+ "that", "it", "as", "at", "by", "be", "your", "you", "they", "them",
+})
+
+# Tuning knobs for the fuzzy checks.
+LEAKAGE_RATIO = 0.8 # share of a question's content words echoed in its answer
+LEAKAGE_MIN_WORDS = 3 # ignore very short questions, where overlap is noise
+BACK_WORD_LIMIT = 60 # words on a card back before it's flagged as overloaded
+
def is_prompt_form(heading: str) -> bool:
"""True if the heading reads as a question or imperative prompt."""
@@ -46,6 +78,116 @@ def is_prompt_form(heading: str) -> bool:
return first_word in IMPERATIVE_VERBS
+def content_words(text: str) -> set[str]:
+ """Lowercased alphanumeric tokens of length >= 3, minus stopwords."""
+ return {w for w in re.findall(r"[a-z0-9]+", text.lower())
+ if len(w) >= 3 and w not in STOPWORDS}
+
+
+def leakage_ratio(heading: str, body: str) -> float:
+ """Fraction of the question's content words that reappear in the answer.
+
+ A high ratio means the answer is largely restated in the question, so the
+ card can be answered by recognition rather than recall. Returns 0.0 for a
+ question with fewer than LEAKAGE_MIN_WORDS content words, where overlap is
+ just noise.
+ """
+ hw = content_words(heading)
+ if len(hw) < LEAKAGE_MIN_WORDS:
+ return 0.0
+ return len(hw & content_words(body)) / len(hw)
+
+
+def normalize_heading(heading: str) -> str:
+ """Collapse a heading to a comparison key (lowercase, alnum + single spaces)."""
+ return re.sub(r"\s+", " ", re.sub(r"[^a-z0-9 ]", " ", heading.lower())).strip()
+
+
+def is_binary_prompt(heading: str) -> bool:
+ """True for yes/no or 'A or B' prompts, which need little retrieval effort."""
+ if BINARY_LEAD_RE.match(heading):
+ return True
+ return bool(re.search(r"\bor\b", heading, re.IGNORECASE)) and heading.rstrip().endswith("?")
+
+
+def back_word_count(body: str) -> int:
+ return len(body.split())
+
+
+def is_list_back(body: str) -> bool:
+ """True if the answer body is mostly an org list (an enumeration card)."""
+ lines = [ln for ln in body.splitlines() if ln.strip()]
+ if len(lines) < 2:
+ return False
+ bullets = sum(1 for ln in lines if BULLET_RE.match(ln))
+ return bullets >= 2 and bullets * 2 >= len(lines)
+
+
+def parse_cards(lines: list[str]) -> tuple[list[dict], int]:
+ """Parse :drill: cards from org lines.
+
+ Returns (cards, prop_count). Each card is a dict with heading, has_id,
+ has_answer, and body (the answer text with PROPERTIES drawers, planning
+ lines, and `*** Answer` headers removed, approximating the rendered back).
+ """
+ cards: list[dict] = []
+ prop_count = 0
+ i = 0
+ n = len(lines)
+ while i < n:
+ m = CARD_RE.match(lines[i])
+ if not m:
+ i += 1
+ continue
+ heading = m.group(1).strip()
+ i += 1
+ has_id = False
+ has_answer = False
+ in_drawer = False
+ body_lines: list[str] = []
+ while i < n:
+ line = lines[i]
+ if line.startswith("* ") or CARD_RE.match(line):
+ break
+ if PROP_START_RE.match(line):
+ prop_count += 1
+ in_drawer = True
+ elif in_drawer and PROP_END_RE.match(line):
+ in_drawer = False
+ elif in_drawer:
+ if ID_RE.match(line):
+ has_id = True
+ elif ANSWER_RE.match(line):
+ has_answer = True
+ elif PLANNING_RE.match(line):
+ pass
+ else:
+ body_lines.append(line)
+ i += 1
+ cards.append({
+ "heading": heading,
+ "has_id": has_id,
+ "has_answer": has_answer,
+ "body": "\n".join(body_lines).strip(),
+ })
+ return cards, prop_count
+
+
+def find_duplicate_fronts(cards: list[dict]) -> list[tuple[str, str]]:
+ """Return (first, dup) heading pairs that normalize to the same key."""
+ seen: dict[str, str] = {}
+ dups: list[tuple[str, str]] = []
+ for c in cards:
+ key = normalize_heading(c["heading"])
+ if not key:
+ continue
+ if key in seen:
+ dups.append((seen[key], c["heading"]))
+ else:
+ seen[key] = c["heading"]
+ return dups
+
+
def main() -> int:
if len(sys.argv) != 2:
print(f"usage: {sys.argv[0]} <file.org>", file=sys.stderr)
@@ -65,84 +207,78 @@ def main() -> int:
title = m.group(1).strip()
break
- cards: list[tuple[str, bool, bool]] = [] # (heading, has_id, has_answer_subheader)
- answer_count = 0
- prop_count = 0
+ cards, prop_count = parse_cards(lines)
- i = 0
- while i < len(lines):
- m = CARD_RE.match(lines[i])
- if m:
- heading = m.group(1).strip()
- i += 1
- has_id = False
- has_answer = False
- in_drawer = False
- while i < len(lines):
- line = lines[i]
- if line.startswith("* ") or CARD_RE.match(line):
- break
- if PROP_START_RE.match(line):
- prop_count += 1
- in_drawer = True
- elif in_drawer and PROP_END_RE.match(line):
- in_drawer = False
- elif in_drawer and ID_RE.match(line):
- has_id = True
- elif ANSWER_RE.match(line):
- answer_count += 1
- has_answer = True
- i += 1
- cards.append((heading, has_id, has_answer))
- continue
- i += 1
-
- not_prompt = [h for h, _, _ in cards if not is_prompt_form(h)]
- no_id = [h for h, has_id, _ in cards if not has_id]
+ no_id = [c["heading"] for c in cards if not c["has_id"]]
+ not_prompt = [c["heading"] for c in cards if not is_prompt_form(c["heading"])]
+ answer_count = sum(1 for c in cards if c["has_answer"])
+ leaky = [c["heading"] for c in cards
+ if leakage_ratio(c["heading"], c["body"]) >= LEAKAGE_RATIO]
+ dups = find_duplicate_fronts(cards)
+ overloaded = [c["heading"] for c in cards if back_word_count(c["body"]) > BACK_WORD_LIMIT]
+ listy = [c["heading"] for c in cards if is_list_back(c["body"])]
+ binary = [c["heading"] for c in cards if is_binary_prompt(c["heading"])]
print(f"{path.name} — drill deck stats")
print()
- title_display = title if title else "(no #+TITLE)"
- print(f"Deck title: {title_display}")
+ print(f"Deck title: {title if title else '(no #+TITLE)'}")
print(f"Cards: {len(cards)}")
drawer_status = "match" if prop_count == len(cards) else f"mismatch (expected {len(cards)})"
print(f"PROPERTIES drawers: {prop_count} ({drawer_status})")
- answer_status = "clean" if answer_count == 0 else "workflow violation"
- print(f"*** Answer sub-headers: {answer_count} ({answer_status})")
+ print(f"*** Answer sub-headers: {answer_count} ({'clean' if answer_count == 0 else 'workflow violation'})")
print(f"Cards missing :ID:: {len(no_id)}")
print(f"Cards with non-prompt heading: {len(not_prompt)}")
+ print(f"Cards with possible answer leakage: {len(leaky)}")
+ print(f"Duplicate / near-duplicate fronts: {len(dups)}")
print()
warnings = 0
- if title is None:
+
+ def emit_list(items: list[str]) -> None:
+ for h in items[:5]:
+ print(f" - {h}")
+ if len(items) > 5:
+ print(f" - ... and {len(items) - 5} more")
+
+ def warn(msg: str, items: list[str] | None = None) -> None:
+ nonlocal warnings
warnings += 1
- print("WARN: no #+TITLE: line found; deck name will fall back to the file basename")
+ print(f"WARN: {msg}")
+ if items:
+ emit_list(items)
+
+ def note(msg: str, items: list[str] | None = None) -> None:
+ print(f"NOTE: {msg}")
+ if items:
+ emit_list(items)
+
+ if title is None:
+ warn("no #+TITLE: line found; deck name will fall back to the file basename")
elif SOURCE_TOOL_RE.search(title):
- warnings += 1
- print(f"WARN: #+TITLE contains source-tool jargon ('{title}'); the deck name shows in Anki — drop 'Org-Drill' for a name that reads well on the consumption side")
+ warn(f"#+TITLE contains source-tool jargon ('{title}'); the deck name shows in Anki — drop 'Org-Drill' for a name that reads well on the consumption side")
if answer_count:
- warnings += 1
- print(f"WARN: {answer_count} cards have *** Answer sub-headers (drop per drill-deck-review.org)")
+ warn(f"{answer_count} cards have *** Answer sub-headers (drop per drill-deck-review.org)")
if prop_count != len(cards):
- warnings += 1
- print(f"WARN: PROPERTIES count {prop_count} does not match card count {len(cards)}")
+ warn(f"PROPERTIES count {prop_count} does not match card count {len(cards)}")
if no_id:
- warnings += 1
- print(f"WARN: {len(no_id)} cards missing :ID:; losing identity risks SRS-state loss across rewrites")
- for h in no_id[:5]:
- print(f" - {h}")
- if len(no_id) > 5:
- print(f" - ... and {len(no_id) - 5} more")
+ warn(f"{len(no_id)} cards missing :ID:; losing identity risks SRS-state loss across rewrites", no_id)
if not_prompt:
- warnings += 1
- print(f"WARN: {len(not_prompt)} cards have non-prompt headings (no '?' and no imperative-verb start); likely topic-as-heading not yet rewritten")
- for h in not_prompt[:5]:
- print(f" - {h}")
- if len(not_prompt) > 5:
- print(f" - ... and {len(not_prompt) - 5} more")
+ warn(f"{len(not_prompt)} cards have non-prompt headings (no '?' and no imperative-verb start); likely topic-as-heading not yet rewritten", not_prompt)
+ if leaky:
+ warn(f"{len(leaky)} cards may leak their answer (question echoes >= {int(LEAKAGE_RATIO * 100)}% of its own answer's key words); reformulate so the answer is recalled, not recognized", leaky)
+ if dups:
+ warn(f"{len(dups)} duplicate / near-duplicate fronts (interference between confusable cards); disambiguate or merge",
+ [f"{a} == {b}" for a, b in dups])
+
+ if overloaded:
+ note(f"{len(overloaded)} cards have a long answer (> {BACK_WORD_LIMIT} words); candidates to split into atomic cards", overloaded)
+ if listy:
+ note(f"{len(listy)} cards have a list-shaped answer; enumeration cards recall poorly — candidates to split or use overlapping cloze", listy)
+ if binary:
+ note(f"{len(binary)} cards are binary (yes/no or 'A or B'); low retrieval effort — candidates to reformulate open-ended", binary)
if warnings == 0:
- print("clean")
+ print("clean (with non-blocking notes above)" if (overloaded or listy or binary) else "clean")
return 0
return 1
diff --git a/claude-templates/.ai/scripts/tests/test_drill_deck_stats.py b/claude-templates/.ai/scripts/tests/test_drill_deck_stats.py
index 3154d42..80b9913 100644
--- a/claude-templates/.ai/scripts/tests/test_drill_deck_stats.py
+++ b/claude-templates/.ai/scripts/tests/test_drill_deck_stats.py
@@ -132,3 +132,174 @@ def test_cli_properties_count_mismatch_warns_and_exits_one(tmp_path):
r = _run(f)
assert r.returncode == 1
assert "does not match card count" in r.stdout
+
+
+# --- content_words / leakage_ratio (pure) ---
+
+def test_content_words_drops_stopwords_and_short_tokens(stats):
+ assert stats.content_words("What is the LEO regime?") == {"leo", "regime"}
+
+
+def test_leakage_ratio_high_when_answer_restates_question(stats):
+ ratio = stats.leakage_ratio(
+ "primary orbital regimes satellites",
+ "the primary orbital regimes for satellites are listed",
+ )
+ assert ratio == 1.0
+
+
+def test_leakage_ratio_zero_for_short_question(stats):
+ # "LEO" is the only content word, below LEAKAGE_MIN_WORDS, so overlap is noise.
+ assert stats.leakage_ratio("What is LEO?", "LEO means low earth orbit") == 0.0
+
+
+# --- normalize_heading (pure) ---
+
+def test_normalize_heading_lowercases_and_strips_punctuation(stats):
+ assert stats.normalize_heading(" What is L.E.O.? ") == "what is l e o"
+
+
+def test_normalize_heading_collisions_match(stats):
+ assert stats.normalize_heading("What is LEO?") == stats.normalize_heading("what is leo")
+
+
+# --- is_binary_prompt (pure) ---
+
+def test_is_binary_prompt_true_for_yes_no_lead(stats):
+ assert stats.is_binary_prompt("Is LEO below GEO?") is True
+
+
+def test_is_binary_prompt_true_for_a_or_b(stats):
+ assert stats.is_binary_prompt("Is it LEO or GEO?") is True
+
+
+def test_is_binary_prompt_false_for_open_question(stats):
+ assert stats.is_binary_prompt("What distinguishes LEO from GEO?") is False
+
+
+# --- back_word_count / is_list_back (pure) ---
+
+def test_back_word_count(stats):
+ assert stats.back_word_count("one two three") == 3
+ assert stats.back_word_count("") == 0
+
+
+def test_is_list_back_true_for_bulleted_body(stats):
+ assert stats.is_list_back("- LEO\n- MEO\n- GEO") is True
+
+
+def test_is_list_back_false_for_prose(stats):
+ assert stats.is_list_back("Low Earth Orbit.\nThe closest regime.") is False
+
+
+def test_is_list_back_false_for_single_bullet(stats):
+ assert stats.is_list_back("- only one bullet\nplain prose line") is False
+
+
+# --- parse_cards (pure) ---
+
+def test_parse_cards_captures_body_without_drawer_planning_or_answer_header(stats):
+ text = (
+ "* Sec\n"
+ "** Q one? :drill:\n"
+ ":PROPERTIES:\n:ID: id-1\n:END:\n"
+ "SCHEDULED: <2026-05-20 Wed>\n"
+ "*** Answer\n"
+ "the real answer\n"
+ )
+ cards, prop_count = stats.parse_cards(text.splitlines())
+ assert prop_count == 1
+ assert len(cards) == 1
+ c = cards[0]
+ assert c["heading"] == "Q one?"
+ assert c["has_id"] is True
+ assert c["has_answer"] is True
+ assert c["body"] == "the real answer"
+
+
+def test_find_duplicate_fronts_matches_normalized_headings(stats):
+ cards = [
+ {"heading": "What is LEO?"},
+ {"heading": "what is leo?"},
+ {"heading": "What is GEO?"},
+ ]
+ dups = stats.find_duplicate_fronts(cards)
+ assert len(dups) == 1
+ assert dups[0] == ("What is LEO?", "what is leo?")
+
+
+# --- CLI: new blocking checks ---
+
+LEAKY_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** What are the primary orbital regimes for satellites? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+The primary orbital regimes for satellites are listed here.
+"""
+
+DUP_FRONT_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** What is LEO? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+Low Earth Orbit.
+** What is LEO? :drill:
+:PROPERTIES:
+:ID: c2
+:END:
+Low Earth Orbit, restated.
+"""
+
+
+def test_cli_answer_leakage_warns_and_exits_one(tmp_path):
+ f = tmp_path / "leaky.org"
+ f.write_text(LEAKY_DECK)
+ r = _run(f)
+ assert r.returncode == 1
+ assert "leak" in r.stdout.lower()
+
+
+def test_cli_duplicate_front_warns_and_exits_one(tmp_path):
+ f = tmp_path / "dup.org"
+ f.write_text(DUP_FRONT_DECK)
+ r = _run(f)
+ assert r.returncode == 1
+ assert "duplicate" in r.stdout.lower()
+
+
+# --- CLI: non-blocking NOTEs keep exit 0 ---
+
+NOTES_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** Is LEO closer than GEO? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+Yes, much closer.
+** What orbital regimes exist? :drill:
+:PROPERTIES:
+:ID: c2
+:END:
+- LEO
+- MEO
+- GEO
+** Describe the platform elements in full :drill:
+:PROPERTIES:
+:ID: c3
+:END:
+The platform carries power generation, propulsion, attitude control, thermal regulation, and radio hardware arranged around a central frame. Each element draws from shared resources and must survive launch loads, vacuum, and radiation. Engineers trade mass against capability when every kilogram raises cost, so redundancy is added only where a single failure would end the mission entirely and cheaper options cannot cover the same risk.
+"""
+
+
+def test_cli_non_blocking_notes_keep_exit_zero(tmp_path):
+ f = tmp_path / "notes.org"
+ f.write_text(NOTES_DECK)
+ r = _run(f)
+ assert r.returncode == 0
+ assert "NOTE" in r.stdout
diff --git a/claude-templates/.ai/workflows/drill-deck-review.org b/claude-templates/.ai/workflows/drill-deck-review.org
index 7e9eed5..fe12f3c 100644
--- a/claude-templates/.ai/workflows/drill-deck-review.org
+++ b/claude-templates/.ai/workflows/drill-deck-review.org
@@ -8,6 +8,8 @@ Take an org-drill flashcard file and bring it into the canonical shape — every
The workflow has three substantive passes (question-form audit, content-accuracy audit, source rewrite) followed by a mechanical regenerate-and-place step. Content review is dispatched to a subagent because it's bounded research across project source-of-truth files; the structural rewrite stays in the main thread because it touches the SRS state we don't want to lose. Three helper scripts (=drill-deck-stats.py=, =drill-deck-diff-ids.py=, =drill-deck-sync=) automate the inventory, the safety check, and the regenerate-and-place.
+*Scheduling lives on the Anki side.* Desired retention and the FSRS scheduling model are per-deck Anki options set on the phone, never controlled by the org source or =drill-to-anki.py=. The pipeline's only scheduling job is keeping each card's identity (the =:ID:=-derived GUID) stable so Anki's review history survives a rewrite. Don't try to encode retention, intervals, or org-drill's SM-2 state into the Anki output — the two schedulers are separate, and the import carries only card content plus identity. (Anki's desired-retention default is 90%; see [[https://docs.ankiweb.net/deck-options.html][the deck-options manual]].)
+
* When to Use This Workflow
Trigger phrases:
@@ -88,6 +90,8 @@ Format: "Who is X? Tell me about their Y." where X is a role descriptor that doe
Note: pick a role descriptor that genuinely identifies one person. If multiple people share the role description, add a single distinguishing detail (e.g., "the one who works evenings", "the Vineti alum"). Don't pile on parentheticals.
+ Splitting: the person card deliberately trades atomicity for narrative recall — one card carries identity plus several attributes. When a body bundles genuinely unrelated attributes (role, employment history, limitations, scope) rather than one coherent topic, split it into multiple cards. One inherits the existing =:ID:= (and its SRS history); each new sibling starts fresh and will correctly show in =drill-deck-diff-ids.py= as an appeared ID. The criterion: split when the body reads as a list of separate facts, keep it whole when it reads as one story. (Minimum-information principle — Wozniak rule 4, Matuschak "Focused".)
+
*** Talking-points and directive cards
Already in prompt form ("Introduce Yourself", "Spell out these orbital regime acronyms", "What is DeepSat?"). Leave the heading alone. Still strip the =*** Answer= sub-header and audit the body content for staleness.
@@ -100,6 +104,26 @@ The =drill-deck-stats.py= helper recognizes both =?=-form and imperative-verb fo
- *PROPERTIES drawer stays.* Org-drill needs the =:ID:=, =:DRILL_LAST_INTERVAL:=, =:DRILL_EASE:= etc. for SRS state. The Anki output strips it (see the script change).
- *=SCHEDULED:= / =DEADLINE:= planning lines stay.* Same reason. The Anki output strips them.
+* Card Authoring Principles
+
+The canonical shapes above are the house style; these are the reasons behind them, drawn from the spaced-repetition literature. =drill-deck-stats.py= checks the mechanical ones; the rest guide the rewrite and the content pass.
+
+- *One fact per card (minimum information principle).* A card should test a single retrievable connection. A back that bundles several independent facts gets partially recalled and burns repetitions on the parts you already know. When a body covers unrelated attributes, split it into separate cards. =drill-deck-stats.py= flags long backs as a non-blocking NOTE.
+
+- *Demand recall, not recognition (effortful retrieval).* Pulling the answer from memory is what strengthens it, so the question must not let you infer the answer from its own wording. This is why person headings never name the person, and why a question that restates its own answer is a defect. =drill-deck-stats.py= flags high front/back word overlap as answer leakage.
+
+- *Avoid binary prompts.* "Is X true?" and "A or B?" allow a coin-flip guess and produce shallow understanding. Reformulate open-ended — "How does X affect Y?" beats "Does X affect Y?" Flagged as a non-blocking NOTE.
+
+- *Avoid lists and enumerations.* Unordered sets past about five members, and long lists, recall poorly as a single card. Split the list across cards (overlapping cloze is the textbook alternative, but this pipeline has no cloze shape, so split instead). List-shaped backs are flagged as a non-blocking NOTE.
+
+- *Make cues precise.* A vague question admits several reasonable answers, so you can't tell whether you knew the intended one. Include enough context that only the intended answer fits, without narrowing into provincial trivia.
+
+- *Combat interference.* Confusable cards inhibit each other; two near-identical fronts are the worst case. Disambiguate them with distinguishing context, or merge them. =drill-deck-stats.py= flags duplicate / near-duplicate fronts.
+
+- *Understand before you memorize.* Cards are the last step, after the material is understood and structured. A card you can't explain is a leech waiting to happen.
+
+Sources: Wozniak's [[https://www.supermemo.com/en/blog/twenty-rules-of-formulating-knowledge][Twenty rules of formulating knowledge]], Andy Matuschak's [[https://andymatuschak.org/prompts/][How to write good prompts]], Michael Nielsen's [[https://augmentingcognition.com/ltm.html][Augmenting Long-term Memory]], and the [[https://docs.ankiweb.net/][Anki manual]].
+
* Approach: Phases
** Phase A: Question-form + title audit (per card and per file)
@@ -110,7 +134,7 @@ Run =drill-deck-stats.py= on the source first to get the structural inventory:
.ai/scripts/drill-deck-stats.py <source.org>
#+end_src
-The script reports the deck title from =#+TITLE:= (and flags it if it contains source-tool jargon like "Org-Drill"), card count, PROPERTIES-drawer count, =*** Answer= sub-header count, cards missing =:ID:=, and cards whose heading is neither =?=-form nor an imperative-verb prompt. Each surfaced card is a candidate for the rewrite, plus the title itself if flagged.
+The script reports the deck title from =#+TITLE:= (and flags it if it contains source-tool jargon like "Org-Drill"), card count, PROPERTIES-drawer count, =*** Answer= sub-header count, cards missing =:ID:=, and cards whose heading is neither =?=-form nor an imperative-verb prompt. It also flags possible answer leakage and duplicate / near-duplicate fronts (both blocking), and surfaces non-blocking NOTEs for overloaded, list-shaped, or binary cards. Each surfaced card is a candidate for the rewrite, plus the title itself if flagged.
For each candidate, propose the new heading in advance so Phase C is mechanical. For person cards, the proposal is the role descriptor + topical anchor pair. For acronym/concept cards, the proposal is the existing body question promoted to the heading.
@@ -136,6 +160,7 @@ Categories to look for:
- Project facts: milestone shifts, submission states, exercise / demo dates
- External contacts: title or affiliation changes
- Company facts: head count, funding, customer status
+- Removable cards: trivia not worth memorizing, or a fact whose underlying source no longer appears in any source-of-truth doc (flag as a deletion candidate, not a rewrite)
Skip cards where you find no staleness. Cap at 2,000 words.
#+end_example
@@ -144,6 +169,8 @@ Include any user-supplied seed fixes in the dispatch (e.g., "Vrezh is now full-t
Output of Phase B: a structured per-card list of content updates with confidence levels. High-confidence findings get baked in during Phase C. Medium-confidence findings are reviewed inline before baking. Low-confidence findings are surfaced but skipped unless the user calls them in.
+*Removal and leeches.* Two dispositions beyond rewrite. (1) Cost-benefit removal: a card flagged as removable is a deletion candidate — weigh whether the fact clears a "worth memorizing" bar before keeping it. (2) Leech feedback: when Anki suspends a card as a leech (8 lapses by default), the card's formulation is the problem, not the review effort; route it back through Phase B/C as a reformulation target, preserving its =:ID:= so Anki keeps the lapse history. The org → Anki flow is one-directional: leech tags, lapse counts, and per-card success rates live in Anki and never flow back to the source, so these signals are carried in by hand. (Anki [[https://docs.ankiweb.net/leeches.html][leech]] guidance is "reformulate, don't grind".)
+
** Phase C: Source rewrite
Take Phase A's question-rewrite plan and Phase B's content-update list, apply them to the source file. Preserve every card's =:PROPERTIES:= drawer (especially =:ID:=) and =SCHEDULED:= line verbatim — those carry SRS state that must survive the rewrite.
@@ -217,10 +244,12 @@ The core converter. Reads an org-drill source file, emits a stable-ID Anki =.apk
** =drill-deck-stats.py=
-Inventory + workflow-violation warnings for a single deck source. Counts cards, PROPERTIES drawers, =*** Answer= sub-headers, cards missing =:ID:=, and cards whose heading is neither =?=-form nor an imperative-verb prompt. Exits 0 when clean, 1 when warnings present, so it gates =drill-deck-sync=.
+Inventory + authoring-quality checks for a single deck source. Counts cards, PROPERTIES drawers, =*** Answer= sub-headers, cards missing =:ID:=, and cards whose heading is neither =?=-form nor an imperative-verb prompt. It also checks authoring quality: answer leakage (front/back content-word overlap) and duplicate / near-duplicate fronts are blocking WARNs; overloaded backs, list-shaped backs, and binary prompts are non-blocking NOTEs. Exits 0 when no blocking warning is present, 1 otherwise, so it gates =drill-deck-sync=.
Imperative-verb allowlist: Spell, Describe, Explain, Name, List, Give, Show, Tell, Define, Compare, Identify, Outline, Introduce, Walk, State, Recite, Recall, Summarize.
+The fuzzy checks (leakage ratio, overloaded word count) are tuned by the =LEAKAGE_*= and =BACK_WORD_LIMIT= constants at the top of the script. Loosen them if a real deck trips false positives.
+
** =drill-deck-diff-ids.py=
SRS-state preservation check between two versions of a deck. Extracts every =:ID:= from each, reports IDs that disappeared (lost SRS state — worst-case bug) or appeared (new cards). Exits 0 when clean, 1 when any disappeared/appeared.
@@ -264,6 +293,8 @@ If you find the script doing something else, update the script before regenerati
5. *Skipping the content-accuracy pass.* The structural rewrite alone leaves stale facts in place. The drill cards become a memorization tool for the wrong information.
6. *Treating subagent output as gospel.* Medium- and low-confidence findings need human review before baking. The subagent surfaces; the main thread decides.
7. *Running =drill-deck-sync= without =--diff-against=.* The stats check still runs, but the SRS-state preservation check doesn't. On a rewrite of any size, pass =--diff-against /tmp/<name>-prerewrite.org= (grab from git first).
+8. *Answer leakage.* A question that restates its own answer tests recognition, not recall — the card looks learned when it isn't. =drill-deck-stats.py= flags high front/back word overlap.
+9. *Encoding scheduling in the source.* Retention, intervals, and FSRS state are Anki-side options; the org files and =drill-to-anki.py= carry only card content plus identity. See the scheduling note in the Overview.
* Living Document
@@ -284,3 +315,6 @@ After the first run, scripted the safety-net checks into three helpers: =drill-d
*** 2026-05-30: Title-audit added (same day)
Craig noticed the Anki deck name still showed as "DeepSat Org-Drill Flashcards" because the source =#+TITLE:= leaks tool-name jargon into Anki. Added a "Deck title" subsection under Canonical Card Shape, expanded Phase A to audit the title, and extended =drill-deck-stats.py= to flag any title matching =org[-\s]?drill= (case-insensitive). Stable-ID caveat documented: renaming the deck changes the Anki deck ID, so the next import lands as a new deck and the old one needs deleting from Anki.
+
+*** 2026-05-30: Authoring-quality checks + Card Authoring section (same day)
+Researched flashcard / spaced-repetition best practices (Wozniak's twenty rules, Matuschak's prompt-writing guide, Nielsen, the Anki manual, the FSRS docs) and folded the findings in. =drill-deck-stats.py= gained answer-leakage and duplicate-front checks (blocking), plus non-blocking NOTEs for overloaded backs, list-shaped backs, and binary prompts. Added a "Card Authoring Principles" section (the why behind the canonical shapes), a person-card splitting path, a Phase B cost-benefit-removal + leech-feedback disposition, and a scheduling-is-Anki-side note in the Overview. Deliberately not adopted, with reasons: cloze cards (would need a second note type and an authoring convention), per-card tractability targeting and FSRS-retention encoding (Anki-side telemetry that never flows back to the source), on-face source-stamping (the converter strips those drawers by design; provenance stays in the org layer).