claude-templates/.ai/scripts/flashcard-stats.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327

#!/usr/bin/env python3
"""Inventory + authoring-quality checks for an org-drill deck source file.

Reports counts and flags two tiers of issue.

Blocking WARNs (exit 1):
- PROPERTIES drawer count not matching card count
- Cards missing :ID: (risks SRS-state loss across rewrites)
- `*** Answer` sub-headers (should be 0 per flashcard-review.org)
- Non-prompt headings (topic-as-heading not yet rewritten)
- #+TITLE missing, or carrying source-tool jargon ("org-drill")
- Answer leakage: a card whose question echoes most of its own answer
  (Source: citation lines and created-date lines are excluded from the
  overlap, and range/category cards that recall numbers are exempted)
- Duplicate / near-duplicate fronts (interference between confusable cards)

Non-blocking NOTEs (exit unaffected):
- Overloaded backs (long answer — candidate to split into atomic cards)
- List-shaped backs (enumeration — candidate to split or use overlapping cloze)
- Binary yes/no prompts (low retrieval effort — candidate to reformulate)

Exits 0 when no blocking warnings are present, 1 otherwise, 2 on bad usage.
Use as a gate before regenerating the Anki deck or running flashcard-sync.

The fuzzy checks (leakage, duplicate, overloaded) are tuned by the LEAKAGE_*
and BACK_WORD_LIMIT constants below; loosen them if a real deck trips false
positives.

Usage:
  flashcard-stats.py <file.org>
"""
from __future__ import annotations

import re
import sys
from pathlib import Path

CARD_RE = re.compile(r"^\*\*\s+(.+?)\s+:drill:\s*$")
ANSWER_RE = re.compile(r"^\*\*\*\s+Answer\b")
PROP_START_RE = re.compile(r"^\s*:PROPERTIES:\s*$")
PROP_END_RE = re.compile(r"^\s*:END:\s*$")
ID_RE = re.compile(r"^\s*:ID:\s+(\S+)\s*$")
TITLE_RE = re.compile(r"^#\+TITLE:\s*(.+?)\s*$", re.IGNORECASE)
SOURCE_TOOL_RE = re.compile(r"\borg[-\s]?drill\b", re.IGNORECASE)
PLANNING_RE = re.compile(r"^\s*(SCHEDULED|DEADLINE|CLOSED):\s")
SOURCE_LINE_RE = re.compile(r"^\s*source:\s", re.IGNORECASE)
CREATED_LINE_RE = re.compile(r"^\s*:?created:?\s", re.IGNORECASE)
RANGE_RE = re.compile(r"\d[^\n]*[-–—]\s*\d")
THRESHOLD_RE = re.compile(r"[<>≤≥]\s*\d")
BULLET_RE = re.compile(r"^\s*([-+*]|\d+[.)])\s+")
BINARY_LEAD_RE = re.compile(
    r"^\s*(is|are|was|were|does|do|did|can|could|should|would|will|has|have|had)\b",
    re.IGNORECASE,
)

# A heading qualifies as "prompt form" if it contains `?` or starts with one of
# these imperative verbs (directive prompts like "Spell these out" and
# "Introduce yourself" are valid even without `?`).
IMPERATIVE_VERBS = frozenset({
    "spell", "describe", "explain", "name", "list", "give",
    "show", "tell", "define", "compare", "identify", "outline",
    "introduce", "walk", "state", "recite", "recall", "summarize",
})

# Function words ignored when comparing a question against its answer.
STOPWORDS = frozenset({
    "the", "a", "an", "is", "are", "was", "were", "of", "to", "in", "on",
    "for", "and", "or", "with", "what", "who", "whom", "when", "where", "why",
    "how", "which", "does", "do", "did", "tell", "me", "about", "their", "this",
    "that", "it", "as", "at", "by", "be", "your", "you", "they", "them",
})

# Tuning knobs for the fuzzy checks.
LEAKAGE_RATIO = 0.8     # share of a question's content words echoed in its answer
LEAKAGE_MIN_WORDS = 3   # ignore very short questions, where overlap is noise
BACK_WORD_LIMIT = 60    # words on a card back before it's flagged as overloaded


def is_prompt_form(heading: str) -> bool:
    """True if the heading reads as a question or imperative prompt."""
    if "?" in heading:
        return True
    first_word = heading.split(None, 1)[0].lower().rstrip(":,;")
    return first_word in IMPERATIVE_VERBS


def content_words(text: str) -> set[str]:
    """Lowercased alphanumeric tokens of length >= 3, minus stopwords."""
    return {w for w in re.findall(r"[a-z0-9]+", text.lower())
            if len(w) >= 3 and w not in STOPWORDS}


def leakage_ratio(heading: str, body: str) -> float:
    """Fraction of the question's content words that reappear in the answer.

    A high ratio means the answer is largely restated in the question, so the
    card can be answered by recognition rather than recall. Returns 0.0 for a
    question with fewer than LEAKAGE_MIN_WORDS content words, where overlap is
    just noise.
    """
    hw = content_words(heading)
    if len(hw) < LEAKAGE_MIN_WORDS:
        return 0.0
    return len(hw & content_words(body)) / len(hw)


def prose_body(body: str) -> str:
    """Body with Source: citation and created-date lines removed.

    Those lines are metadata, not the answer. A Source line's URL slug often
    repeats the question's words, and a created date is bookkeeping — neither
    should count toward answer-leakage overlap.
    """
    return "\n".join(
        ln for ln in body.splitlines()
        if not SOURCE_LINE_RE.match(ln) and not CREATED_LINE_RE.match(ln)
    )


def has_distinct_numeric_recall(heading: str, body: str) -> bool:
    """True if the answer carries numeric ranges/thresholds the question lacks.

    A range/category card ("What are the HbA1c ranges across normal,
    prediabetes, and diabetes?") echoes its categories in the answer, but the
    recalled content is the numbers, which the question doesn't give away — so
    high word overlap isn't leakage.
    """
    body_nums = bool(RANGE_RE.search(body) or THRESHOLD_RE.search(body))
    head_nums = bool(RANGE_RE.search(heading) or THRESHOLD_RE.search(heading))
    return body_nums and not head_nums


def is_leaky(heading: str, body: str) -> bool:
    """True if a card leaks its answer, after excluding citation lines and
    numeric-recall (range/category) cards."""
    prose = prose_body(body)
    if leakage_ratio(heading, prose) < LEAKAGE_RATIO:
        return False
    return not has_distinct_numeric_recall(heading, prose)


def normalize_heading(heading: str) -> str:
    """Collapse a heading to a comparison key (lowercase, alnum + single spaces)."""
    return re.sub(r"\s+", " ", re.sub(r"[^a-z0-9 ]", " ", heading.lower())).strip()


def is_binary_prompt(heading: str) -> bool:
    """True for yes/no or 'A or B' prompts, which need little retrieval effort."""
    if BINARY_LEAD_RE.match(heading):
        return True
    return bool(re.search(r"\bor\b", heading, re.IGNORECASE)) and heading.rstrip().endswith("?")


def back_word_count(body: str) -> int:
    return len(body.split())


def is_list_back(body: str) -> bool:
    """True if the answer body is mostly an org list (an enumeration card)."""
    lines = [ln for ln in body.splitlines() if ln.strip()]
    if len(lines) < 2:
        return False
    bullets = sum(1 for ln in lines if BULLET_RE.match(ln))
    return bullets >= 2 and bullets * 2 >= len(lines)


def parse_cards(lines: list[str]) -> tuple[list[dict], int]:
    """Parse :drill: cards from org lines.

    Returns (cards, prop_count). Each card is a dict with heading, has_id,
    has_answer, and body (the answer text with PROPERTIES drawers, planning
    lines, and `*** Answer` headers removed, approximating the rendered back).
    """
    cards: list[dict] = []
    prop_count = 0
    i = 0
    n = len(lines)
    while i < n:
        m = CARD_RE.match(lines[i])
        if not m:
            i += 1
            continue
        heading = m.group(1).strip()
        i += 1
        has_id = False
        has_answer = False
        in_drawer = False
        body_lines: list[str] = []
        while i < n:
            line = lines[i]
            if line.startswith("* ") or CARD_RE.match(line):
                break
            if PROP_START_RE.match(line):
                prop_count += 1
                in_drawer = True
            elif in_drawer and PROP_END_RE.match(line):
                in_drawer = False
            elif in_drawer:
                if ID_RE.match(line):
                    has_id = True
            elif ANSWER_RE.match(line):
                has_answer = True
            elif PLANNING_RE.match(line):
                pass
            else:
                body_lines.append(line)
            i += 1
        cards.append({
            "heading": heading,
            "has_id": has_id,
            "has_answer": has_answer,
            "body": "\n".join(body_lines).strip(),
        })
    return cards, prop_count


def find_duplicate_fronts(cards: list[dict]) -> list[tuple[str, str]]:
    """Return (first, dup) heading pairs that normalize to the same key."""
    seen: dict[str, str] = {}
    dups: list[tuple[str, str]] = []
    for c in cards:
        key = normalize_heading(c["heading"])
        if not key:
            continue
        if key in seen:
            dups.append((seen[key], c["heading"]))
        else:
            seen[key] = c["heading"]
    return dups


def main() -> int:
    if len(sys.argv) != 2:
        print(f"usage: {sys.argv[0]} <file.org>", file=sys.stderr)
        return 2

    path = Path(sys.argv[1]).expanduser().resolve()
    if not path.is_file():
        print(f"error: {path} not found", file=sys.stderr)
        return 2

    lines = path.read_text(encoding="utf-8").splitlines()

    title: str | None = None
    for line in lines[:20]:
        m = TITLE_RE.match(line)
        if m:
            title = m.group(1).strip()
            break

    cards, prop_count = parse_cards(lines)

    no_id = [c["heading"] for c in cards if not c["has_id"]]
    not_prompt = [c["heading"] for c in cards if not is_prompt_form(c["heading"])]
    answer_count = sum(1 for c in cards if c["has_answer"])
    leaky = [c["heading"] for c in cards if is_leaky(c["heading"], c["body"])]
    dups = find_duplicate_fronts(cards)
    overloaded = [c["heading"] for c in cards if back_word_count(c["body"]) > BACK_WORD_LIMIT]
    listy = [c["heading"] for c in cards if is_list_back(c["body"])]
    binary = [c["heading"] for c in cards if is_binary_prompt(c["heading"])]

    print(f"{path.name} — drill deck stats")
    print()
    print(f"Deck title: {title if title else '(no #+TITLE)'}")
    print(f"Cards: {len(cards)}")
    drawer_status = "match" if prop_count == len(cards) else f"mismatch (expected {len(cards)})"
    print(f"PROPERTIES drawers: {prop_count} ({drawer_status})")
    print(f"*** Answer sub-headers: {answer_count} ({'clean' if answer_count == 0 else 'workflow violation'})")
    print(f"Cards missing :ID:: {len(no_id)}")
    print(f"Cards with non-prompt heading: {len(not_prompt)}")
    print(f"Cards with possible answer leakage: {len(leaky)}")
    print(f"Duplicate / near-duplicate fronts: {len(dups)}")
    print()

    warnings = 0

    def emit_list(items: list[str]) -> None:
        for h in items[:5]:
            print(f"      - {h}")
        if len(items) > 5:
            print(f"      - ... and {len(items) - 5} more")

    def warn(msg: str, items: list[str] | None = None) -> None:
        nonlocal warnings
        warnings += 1
        print(f"WARN: {msg}")
        if items:
            emit_list(items)

    def note(msg: str, items: list[str] | None = None) -> None:
        print(f"NOTE: {msg}")
        if items:
            emit_list(items)

    if title is None:
        warn("no #+TITLE: line found; deck name will fall back to the file basename")
    elif SOURCE_TOOL_RE.search(title):
        warn(f"#+TITLE contains source-tool jargon ('{title}'); the deck name shows in Anki — drop 'Org-Drill' for a name that reads well on the consumption side")
    if answer_count:
        warn(f"{answer_count} cards have *** Answer sub-headers (drop per flashcard-review.org)")
    if prop_count != len(cards):
        warn(f"PROPERTIES count {prop_count} does not match card count {len(cards)}")
    if no_id:
        warn(f"{len(no_id)} cards missing :ID:; losing identity risks SRS-state loss across rewrites", no_id)
    if not_prompt:
        warn(f"{len(not_prompt)} cards have non-prompt headings (no '?' and no imperative-verb start); likely topic-as-heading not yet rewritten", not_prompt)
    if leaky:
        warn(f"{len(leaky)} cards may leak their answer (question echoes >= {int(LEAKAGE_RATIO * 100)}% of its own answer's key words); reformulate so the answer is recalled, not recognized", leaky)
    if dups:
        warn(f"{len(dups)} duplicate / near-duplicate fronts (interference between confusable cards); disambiguate or merge",
             [f"{a}  ==  {b}" for a, b in dups])

    if overloaded:
        note(f"{len(overloaded)} cards have a long answer (> {BACK_WORD_LIMIT} words); candidates to split into atomic cards", overloaded)
    if listy:
        note(f"{len(listy)} cards have a list-shaped answer; enumeration cards recall poorly — candidates to split or use overlapping cloze", listy)
    if binary:
        note(f"{len(binary)} cards are binary (yes/no or 'A or B'); low retrieval effort — candidates to reformulate open-ended", binary)

    if warnings == 0:
        print("clean (with non-blocking notes above)" if (overloaded or listy or binary) else "clean")
        return 0
    return 1


if __name__ == "__main__":
    raise SystemExit(main())