diff options
| author | Craig Jennings <c@cjennings.net> | 2026-05-30 13:55:05 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-05-30 13:55:05 -0500 |
| commit | 5bd759151d3ccf2d0a90f4b7de71e8c0e6e4a0a1 (patch) | |
| tree | d25e5300cfa05d272efb124af2dfd6470fed8e55 /.ai/scripts/drill-deck-stats.py | |
| parent | 82e99ff8a4eb6d5aaba6ee02da3b5318a73b2125 (diff) | |
| download | rulesets-5bd759151d3ccf2d0a90f4b7de71e8c0e6e4a0a1.tar.gz rulesets-5bd759151d3ccf2d0a90f4b7de71e8c0e6e4a0a1.zip | |
feat(drill-deck): add authoring-quality checks and a card-authoring section
I researched spaced-repetition best practices (Wozniak's twenty rules, Matuschak's prompt-writing guide, Nielsen, the Anki and FSRS docs) and folded the findings into the drill-deck pipeline.
drill-deck-stats.py now checks authoring quality on top of structure. Two checks block: answer leakage (a question that echoes >= 80% of its own answer's content words tests recognition, not recall) and duplicate / near-duplicate fronts (confusable cards interfere). Three checks warn without blocking, surfacing rewrite candidates without failing the gate: overloaded backs, list-shaped backs, and binary yes/no prompts. The fuzzy thresholds live in constants at the top of the script, so a real deck that trips false positives can be tuned. I pulled the card-parsing into a parse_cards helper that captures each card's body, and added focused tests for every new helper plus CLI coverage of the leaky, duplicate, and notes-only cases.
drill-deck-review.org gains a Card Authoring Principles section (the why behind the canonical shapes, with sources), a person-card splitting path bounded by the :ID:-preservation rule, a Phase B cost-benefit-removal and leech-reformulation disposition, and a scheduling-is-Anki-side note so a future editor doesn't try to encode FSRS retention in the org source. I left out cloze cards (would need a second note type), per-card tractability targeting and retention encoding (Anki-side telemetry that never reaches the source), and on-face source-stamping (the converter strips those drawers by design). Each is noted with its reason.
Diffstat (limited to '.ai/scripts/drill-deck-stats.py')
| -rwxr-xr-x | .ai/scripts/drill-deck-stats.py | 276 |
1 files changed, 206 insertions, 70 deletions
diff --git a/.ai/scripts/drill-deck-stats.py b/.ai/scripts/drill-deck-stats.py index 72d1cde..d0707e2 100755 --- a/.ai/scripts/drill-deck-stats.py +++ b/.ai/scripts/drill-deck-stats.py @@ -1,15 +1,28 @@ #!/usr/bin/env python3 -"""Inventory + workflow-violation warnings for an org-drill deck source file. +"""Inventory + authoring-quality checks for an org-drill deck source file. -Reports counts and flags violations: -- Total cards (depth-2 `:drill:` headings) -- PROPERTIES drawer count (should match card count) -- `*** Answer` sub-header count (should be 0 per drill-deck-review.org) -- Cards missing :ID: (loses identity across versions, risks SRS-state loss) -- Cards whose heading lacks `?` (likely a topic-as-heading not yet rewritten) +Reports counts and flags two tiers of issue. -Exits 0 when clean, 1 when any warnings are present. Use as a gate before -regenerating the Anki deck or running drill-deck-sync. +Blocking WARNs (exit 1): +- PROPERTIES drawer count not matching card count +- Cards missing :ID: (risks SRS-state loss across rewrites) +- `*** Answer` sub-headers (should be 0 per drill-deck-review.org) +- Non-prompt headings (topic-as-heading not yet rewritten) +- #+TITLE missing, or carrying source-tool jargon ("org-drill") +- Answer leakage: a card whose question echoes most of its own answer +- Duplicate / near-duplicate fronts (interference between confusable cards) + +Non-blocking NOTEs (exit unaffected): +- Overloaded backs (long answer — candidate to split into atomic cards) +- List-shaped backs (enumeration — candidate to split or use overlapping cloze) +- Binary yes/no prompts (low retrieval effort — candidate to reformulate) + +Exits 0 when no blocking warnings are present, 1 otherwise, 2 on bad usage. +Use as a gate before regenerating the Anki deck or running drill-deck-sync. + +The fuzzy checks (leakage, duplicate, overloaded) are tuned by the LEAKAGE_* +and BACK_WORD_LIMIT constants below; loosen them if a real deck trips false +positives. Usage: drill-deck-stats.py <file.org> @@ -27,16 +40,35 @@ PROP_END_RE = re.compile(r"^\s*:END:\s*$") ID_RE = re.compile(r"^\s*:ID:\s+(\S+)\s*$") TITLE_RE = re.compile(r"^#\+TITLE:\s*(.+?)\s*$", re.IGNORECASE) SOURCE_TOOL_RE = re.compile(r"\borg[-\s]?drill\b", re.IGNORECASE) +PLANNING_RE = re.compile(r"^\s*(SCHEDULED|DEADLINE|CLOSED):\s") +BULLET_RE = re.compile(r"^\s*([-+*]|\d+[.)])\s+") +BINARY_LEAD_RE = re.compile( + r"^\s*(is|are|was|were|does|do|did|can|could|should|would|will|has|have|had)\b", + re.IGNORECASE, +) -# A heading qualifies as "prompt form" if it contains `?` or starts with -# one of these imperative verbs (directive prompts like "Spell these out" -# and "Introduce yourself" are valid even without `?`). +# A heading qualifies as "prompt form" if it contains `?` or starts with one of +# these imperative verbs (directive prompts like "Spell these out" and +# "Introduce yourself" are valid even without `?`). IMPERATIVE_VERBS = frozenset({ "spell", "describe", "explain", "name", "list", "give", "show", "tell", "define", "compare", "identify", "outline", "introduce", "walk", "state", "recite", "recall", "summarize", }) +# Function words ignored when comparing a question against its answer. +STOPWORDS = frozenset({ + "the", "a", "an", "is", "are", "was", "were", "of", "to", "in", "on", + "for", "and", "or", "with", "what", "who", "whom", "when", "where", "why", + "how", "which", "does", "do", "did", "tell", "me", "about", "their", "this", + "that", "it", "as", "at", "by", "be", "your", "you", "they", "them", +}) + +# Tuning knobs for the fuzzy checks. +LEAKAGE_RATIO = 0.8 # share of a question's content words echoed in its answer +LEAKAGE_MIN_WORDS = 3 # ignore very short questions, where overlap is noise +BACK_WORD_LIMIT = 60 # words on a card back before it's flagged as overloaded + def is_prompt_form(heading: str) -> bool: """True if the heading reads as a question or imperative prompt.""" @@ -46,6 +78,116 @@ def is_prompt_form(heading: str) -> bool: return first_word in IMPERATIVE_VERBS +def content_words(text: str) -> set[str]: + """Lowercased alphanumeric tokens of length >= 3, minus stopwords.""" + return {w for w in re.findall(r"[a-z0-9]+", text.lower()) + if len(w) >= 3 and w not in STOPWORDS} + + +def leakage_ratio(heading: str, body: str) -> float: + """Fraction of the question's content words that reappear in the answer. + + A high ratio means the answer is largely restated in the question, so the + card can be answered by recognition rather than recall. Returns 0.0 for a + question with fewer than LEAKAGE_MIN_WORDS content words, where overlap is + just noise. + """ + hw = content_words(heading) + if len(hw) < LEAKAGE_MIN_WORDS: + return 0.0 + return len(hw & content_words(body)) / len(hw) + + +def normalize_heading(heading: str) -> str: + """Collapse a heading to a comparison key (lowercase, alnum + single spaces).""" + return re.sub(r"\s+", " ", re.sub(r"[^a-z0-9 ]", " ", heading.lower())).strip() + + +def is_binary_prompt(heading: str) -> bool: + """True for yes/no or 'A or B' prompts, which need little retrieval effort.""" + if BINARY_LEAD_RE.match(heading): + return True + return bool(re.search(r"\bor\b", heading, re.IGNORECASE)) and heading.rstrip().endswith("?") + + +def back_word_count(body: str) -> int: + return len(body.split()) + + +def is_list_back(body: str) -> bool: + """True if the answer body is mostly an org list (an enumeration card).""" + lines = [ln for ln in body.splitlines() if ln.strip()] + if len(lines) < 2: + return False + bullets = sum(1 for ln in lines if BULLET_RE.match(ln)) + return bullets >= 2 and bullets * 2 >= len(lines) + + +def parse_cards(lines: list[str]) -> tuple[list[dict], int]: + """Parse :drill: cards from org lines. + + Returns (cards, prop_count). Each card is a dict with heading, has_id, + has_answer, and body (the answer text with PROPERTIES drawers, planning + lines, and `*** Answer` headers removed, approximating the rendered back). + """ + cards: list[dict] = [] + prop_count = 0 + i = 0 + n = len(lines) + while i < n: + m = CARD_RE.match(lines[i]) + if not m: + i += 1 + continue + heading = m.group(1).strip() + i += 1 + has_id = False + has_answer = False + in_drawer = False + body_lines: list[str] = [] + while i < n: + line = lines[i] + if line.startswith("* ") or CARD_RE.match(line): + break + if PROP_START_RE.match(line): + prop_count += 1 + in_drawer = True + elif in_drawer and PROP_END_RE.match(line): + in_drawer = False + elif in_drawer: + if ID_RE.match(line): + has_id = True + elif ANSWER_RE.match(line): + has_answer = True + elif PLANNING_RE.match(line): + pass + else: + body_lines.append(line) + i += 1 + cards.append({ + "heading": heading, + "has_id": has_id, + "has_answer": has_answer, + "body": "\n".join(body_lines).strip(), + }) + return cards, prop_count + + +def find_duplicate_fronts(cards: list[dict]) -> list[tuple[str, str]]: + """Return (first, dup) heading pairs that normalize to the same key.""" + seen: dict[str, str] = {} + dups: list[tuple[str, str]] = [] + for c in cards: + key = normalize_heading(c["heading"]) + if not key: + continue + if key in seen: + dups.append((seen[key], c["heading"])) + else: + seen[key] = c["heading"] + return dups + + def main() -> int: if len(sys.argv) != 2: print(f"usage: {sys.argv[0]} <file.org>", file=sys.stderr) @@ -65,84 +207,78 @@ def main() -> int: title = m.group(1).strip() break - cards: list[tuple[str, bool, bool]] = [] # (heading, has_id, has_answer_subheader) - answer_count = 0 - prop_count = 0 + cards, prop_count = parse_cards(lines) - i = 0 - while i < len(lines): - m = CARD_RE.match(lines[i]) - if m: - heading = m.group(1).strip() - i += 1 - has_id = False - has_answer = False - in_drawer = False - while i < len(lines): - line = lines[i] - if line.startswith("* ") or CARD_RE.match(line): - break - if PROP_START_RE.match(line): - prop_count += 1 - in_drawer = True - elif in_drawer and PROP_END_RE.match(line): - in_drawer = False - elif in_drawer and ID_RE.match(line): - has_id = True - elif ANSWER_RE.match(line): - answer_count += 1 - has_answer = True - i += 1 - cards.append((heading, has_id, has_answer)) - continue - i += 1 - - not_prompt = [h for h, _, _ in cards if not is_prompt_form(h)] - no_id = [h for h, has_id, _ in cards if not has_id] + no_id = [c["heading"] for c in cards if not c["has_id"]] + not_prompt = [c["heading"] for c in cards if not is_prompt_form(c["heading"])] + answer_count = sum(1 for c in cards if c["has_answer"]) + leaky = [c["heading"] for c in cards + if leakage_ratio(c["heading"], c["body"]) >= LEAKAGE_RATIO] + dups = find_duplicate_fronts(cards) + overloaded = [c["heading"] for c in cards if back_word_count(c["body"]) > BACK_WORD_LIMIT] + listy = [c["heading"] for c in cards if is_list_back(c["body"])] + binary = [c["heading"] for c in cards if is_binary_prompt(c["heading"])] print(f"{path.name} — drill deck stats") print() - title_display = title if title else "(no #+TITLE)" - print(f"Deck title: {title_display}") + print(f"Deck title: {title if title else '(no #+TITLE)'}") print(f"Cards: {len(cards)}") drawer_status = "match" if prop_count == len(cards) else f"mismatch (expected {len(cards)})" print(f"PROPERTIES drawers: {prop_count} ({drawer_status})") - answer_status = "clean" if answer_count == 0 else "workflow violation" - print(f"*** Answer sub-headers: {answer_count} ({answer_status})") + print(f"*** Answer sub-headers: {answer_count} ({'clean' if answer_count == 0 else 'workflow violation'})") print(f"Cards missing :ID:: {len(no_id)}") print(f"Cards with non-prompt heading: {len(not_prompt)}") + print(f"Cards with possible answer leakage: {len(leaky)}") + print(f"Duplicate / near-duplicate fronts: {len(dups)}") print() warnings = 0 - if title is None: + + def emit_list(items: list[str]) -> None: + for h in items[:5]: + print(f" - {h}") + if len(items) > 5: + print(f" - ... and {len(items) - 5} more") + + def warn(msg: str, items: list[str] | None = None) -> None: + nonlocal warnings warnings += 1 - print("WARN: no #+TITLE: line found; deck name will fall back to the file basename") + print(f"WARN: {msg}") + if items: + emit_list(items) + + def note(msg: str, items: list[str] | None = None) -> None: + print(f"NOTE: {msg}") + if items: + emit_list(items) + + if title is None: + warn("no #+TITLE: line found; deck name will fall back to the file basename") elif SOURCE_TOOL_RE.search(title): - warnings += 1 - print(f"WARN: #+TITLE contains source-tool jargon ('{title}'); the deck name shows in Anki — drop 'Org-Drill' for a name that reads well on the consumption side") + warn(f"#+TITLE contains source-tool jargon ('{title}'); the deck name shows in Anki — drop 'Org-Drill' for a name that reads well on the consumption side") if answer_count: - warnings += 1 - print(f"WARN: {answer_count} cards have *** Answer sub-headers (drop per drill-deck-review.org)") + warn(f"{answer_count} cards have *** Answer sub-headers (drop per drill-deck-review.org)") if prop_count != len(cards): - warnings += 1 - print(f"WARN: PROPERTIES count {prop_count} does not match card count {len(cards)}") + warn(f"PROPERTIES count {prop_count} does not match card count {len(cards)}") if no_id: - warnings += 1 - print(f"WARN: {len(no_id)} cards missing :ID:; losing identity risks SRS-state loss across rewrites") - for h in no_id[:5]: - print(f" - {h}") - if len(no_id) > 5: - print(f" - ... and {len(no_id) - 5} more") + warn(f"{len(no_id)} cards missing :ID:; losing identity risks SRS-state loss across rewrites", no_id) if not_prompt: - warnings += 1 - print(f"WARN: {len(not_prompt)} cards have non-prompt headings (no '?' and no imperative-verb start); likely topic-as-heading not yet rewritten") - for h in not_prompt[:5]: - print(f" - {h}") - if len(not_prompt) > 5: - print(f" - ... and {len(not_prompt) - 5} more") + warn(f"{len(not_prompt)} cards have non-prompt headings (no '?' and no imperative-verb start); likely topic-as-heading not yet rewritten", not_prompt) + if leaky: + warn(f"{len(leaky)} cards may leak their answer (question echoes >= {int(LEAKAGE_RATIO * 100)}% of its own answer's key words); reformulate so the answer is recalled, not recognized", leaky) + if dups: + warn(f"{len(dups)} duplicate / near-duplicate fronts (interference between confusable cards); disambiguate or merge", + [f"{a} == {b}" for a, b in dups]) + + if overloaded: + note(f"{len(overloaded)} cards have a long answer (> {BACK_WORD_LIMIT} words); candidates to split into atomic cards", overloaded) + if listy: + note(f"{len(listy)} cards have a list-shaped answer; enumeration cards recall poorly — candidates to split or use overlapping cloze", listy) + if binary: + note(f"{len(binary)} cards are binary (yes/no or 'A or B'); low retrieval effort — candidates to reformulate open-ended", binary) if warnings == 0: - print("clean") + print("clean (with non-blocking notes above)" if (overloaded or listy or binary) else "clean") return 0 return 1 |
