diff options
| -rwxr-xr-x | .ai/scripts/drill-deck-stats.py | 44 | ||||
| -rwxr-xr-x | .ai/scripts/drill-to-anki.py | 9 | ||||
| -rw-r--r-- | .ai/scripts/tests/test_drill_deck_stats.py | 74 | ||||
| -rw-r--r-- | .ai/scripts/tests/test_drill_to_anki.py | 5 | ||||
| -rw-r--r-- | .ai/workflows/drill-deck-review.org | 13 | ||||
| -rwxr-xr-x | claude-templates/.ai/scripts/drill-deck-stats.py | 44 | ||||
| -rwxr-xr-x | claude-templates/.ai/scripts/drill-to-anki.py | 9 | ||||
| -rw-r--r-- | claude-templates/.ai/scripts/tests/test_drill_deck_stats.py | 74 | ||||
| -rw-r--r-- | claude-templates/.ai/scripts/tests/test_drill_to_anki.py | 5 | ||||
| -rw-r--r-- | claude-templates/.ai/workflows/drill-deck-review.org | 13 |
10 files changed, 274 insertions, 16 deletions
diff --git a/.ai/scripts/drill-deck-stats.py b/.ai/scripts/drill-deck-stats.py index d0707e2..04c3468 100755 --- a/.ai/scripts/drill-deck-stats.py +++ b/.ai/scripts/drill-deck-stats.py @@ -10,6 +10,8 @@ Blocking WARNs (exit 1): - Non-prompt headings (topic-as-heading not yet rewritten) - #+TITLE missing, or carrying source-tool jargon ("org-drill") - Answer leakage: a card whose question echoes most of its own answer + (Source: citation lines and created-date lines are excluded from the + overlap, and range/category cards that recall numbers are exempted) - Duplicate / near-duplicate fronts (interference between confusable cards) Non-blocking NOTEs (exit unaffected): @@ -41,6 +43,10 @@ ID_RE = re.compile(r"^\s*:ID:\s+(\S+)\s*$") TITLE_RE = re.compile(r"^#\+TITLE:\s*(.+?)\s*$", re.IGNORECASE) SOURCE_TOOL_RE = re.compile(r"\borg[-\s]?drill\b", re.IGNORECASE) PLANNING_RE = re.compile(r"^\s*(SCHEDULED|DEADLINE|CLOSED):\s") +SOURCE_LINE_RE = re.compile(r"^\s*source:\s", re.IGNORECASE) +CREATED_LINE_RE = re.compile(r"^\s*:?created:?\s", re.IGNORECASE) +RANGE_RE = re.compile(r"\d[^\n]*[-–—]\s*\d") +THRESHOLD_RE = re.compile(r"[<>≤≥]\s*\d") BULLET_RE = re.compile(r"^\s*([-+*]|\d+[.)])\s+") BINARY_LEAD_RE = re.compile( r"^\s*(is|are|was|were|does|do|did|can|could|should|would|will|has|have|had)\b", @@ -98,6 +104,41 @@ def leakage_ratio(heading: str, body: str) -> float: return len(hw & content_words(body)) / len(hw) +def prose_body(body: str) -> str: + """Body with Source: citation and created-date lines removed. + + Those lines are metadata, not the answer. A Source line's URL slug often + repeats the question's words, and a created date is bookkeeping — neither + should count toward answer-leakage overlap. + """ + return "\n".join( + ln for ln in body.splitlines() + if not SOURCE_LINE_RE.match(ln) and not CREATED_LINE_RE.match(ln) + ) + + +def has_distinct_numeric_recall(heading: str, body: str) -> bool: + """True if the answer carries numeric ranges/thresholds the question lacks. + + A range/category card ("What are the HbA1c ranges across normal, + prediabetes, and diabetes?") echoes its categories in the answer, but the + recalled content is the numbers, which the question doesn't give away — so + high word overlap isn't leakage. + """ + body_nums = bool(RANGE_RE.search(body) or THRESHOLD_RE.search(body)) + head_nums = bool(RANGE_RE.search(heading) or THRESHOLD_RE.search(heading)) + return body_nums and not head_nums + + +def is_leaky(heading: str, body: str) -> bool: + """True if a card leaks its answer, after excluding citation lines and + numeric-recall (range/category) cards.""" + prose = prose_body(body) + if leakage_ratio(heading, prose) < LEAKAGE_RATIO: + return False + return not has_distinct_numeric_recall(heading, prose) + + def normalize_heading(heading: str) -> str: """Collapse a heading to a comparison key (lowercase, alnum + single spaces).""" return re.sub(r"\s+", " ", re.sub(r"[^a-z0-9 ]", " ", heading.lower())).strip() @@ -212,8 +253,7 @@ def main() -> int: no_id = [c["heading"] for c in cards if not c["has_id"]] not_prompt = [c["heading"] for c in cards if not is_prompt_form(c["heading"])] answer_count = sum(1 for c in cards if c["has_answer"]) - leaky = [c["heading"] for c in cards - if leakage_ratio(c["heading"], c["body"]) >= LEAKAGE_RATIO] + leaky = [c["heading"] for c in cards if is_leaky(c["heading"], c["body"])] dups = find_duplicate_fronts(cards) overloaded = [c["heading"] for c in cards if back_word_count(c["body"]) > BACK_WORD_LIMIT] listy = [c["heading"] for c in cards if is_list_back(c["body"])] diff --git a/.ai/scripts/drill-to-anki.py b/.ai/scripts/drill-to-anki.py index 1050021..9fe954e 100755 --- a/.ai/scripts/drill-to-anki.py +++ b/.ai/scripts/drill-to-anki.py @@ -90,15 +90,18 @@ def escape_html(s: str) -> str: def strip_org_metadata(body_lines: list[str]) -> list[str]: - """Drop :PROPERTIES: drawers and SCHEDULED/DEADLINE/CLOSED planning lines. + """Drop :PROPERTIES: drawers, planning lines, and created-date lines. Org-drill needs these in the source file (SRS state lives in the PROPERTIES drawer; SCHEDULED carries the next-review date), but they - are noise on the back of an Anki card. + are noise on the back of an Anki card. A created/added date never + belongs on a card, so a stray "Created:" or ":CREATED:" body line is + dropped too. """ cleaned: list[str] = [] in_drawer = False planning_re = re.compile(r"^\s*(SCHEDULED|DEADLINE|CLOSED):\s") + created_re = re.compile(r"^\s*:?created:?\s", re.IGNORECASE) drawer_start_re = re.compile(r"^\s*:PROPERTIES:\s*$") drawer_end_re = re.compile(r"^\s*:END:\s*$") for line in body_lines: @@ -109,7 +112,7 @@ def strip_org_metadata(body_lines: list[str]) -> list[str]: if drawer_start_re.match(line): in_drawer = True continue - if planning_re.match(line): + if planning_re.match(line) or created_re.match(line): continue cleaned.append(line) return cleaned diff --git a/.ai/scripts/tests/test_drill_deck_stats.py b/.ai/scripts/tests/test_drill_deck_stats.py index 80b9913..d60084d 100644 --- a/.ai/scripts/tests/test_drill_deck_stats.py +++ b/.ai/scripts/tests/test_drill_deck_stats.py @@ -303,3 +303,77 @@ def test_cli_non_blocking_notes_keep_exit_zero(tmp_path): r = _run(f) assert r.returncode == 0 assert "NOTE" in r.stdout + + +# --- leakage refinements: source-line strip + numeric carve-out --- + +def test_prose_body_strips_source_and_created_lines(stats): + body = "The real answer here.\nCreated: 2026-05-30\nSource: AHA — https://heart.org/x" + assert stats.prose_body(body) == "The real answer here." + + +def test_has_distinct_numeric_recall_true_for_range_card(stats): + assert stats.has_distinct_numeric_recall( + "What are the HbA1c ranges across normal, prediabetes, and diabetes?", + "Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%.", + ) is True + + +def test_has_distinct_numeric_recall_false_without_numbers(stats): + assert stats.has_distinct_numeric_recall("What is LEO?", "Low Earth Orbit.") is False + + +def test_is_leaky_false_when_overlap_is_only_in_the_source_line(stats): + heading = "What blood pressure constitutes a hypertensive crisis?" + body = ("A reading at or above 180/120.\n" + "Source: AHA — https://heart.org/high-blood-pressure/hypertensive-crisis") + assert stats.is_leaky(heading, body) is False + + +def test_is_leaky_false_for_numeric_range_card(stats): + heading = "What are the HbA1c ranges across normal, prediabetes, and diabetes?" + body = "HbA1c ranges. Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%." + assert stats.is_leaky(heading, body) is False + + +def test_is_leaky_true_for_genuine_restatement(stats): + heading = "primary orbital regimes satellites classification" + body = "The primary orbital regimes satellites classification scheme." + assert stats.is_leaky(heading, body) is True + + +SOURCE_LINE_DECK = """#+TITLE: Test Flashcards + +* Section +** What blood pressure constitutes a hypertensive crisis? :drill: +:PROPERTIES: +:ID: c1 +:END: +A reading at or above 180/120. + +Source: AHA — https://heart.org/high-blood-pressure/hypertensive-crisis-blood-pressure +""" + +RANGE_CARD_DECK = """#+TITLE: Test Flashcards + +* Section +** What are the HbA1c ranges across normal, prediabetes, and diabetes? :drill: +:PROPERTIES: +:ID: c1 +:END: +HbA1c ranges. Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%. +""" + + +def test_cli_source_line_overlap_is_not_flagged(tmp_path): + f = tmp_path / "source.org" + f.write_text(SOURCE_LINE_DECK) + r = _run(f) + assert r.returncode == 0 + + +def test_cli_numeric_range_card_is_not_flagged(tmp_path): + f = tmp_path / "range.org" + f.write_text(RANGE_CARD_DECK) + r = _run(f) + assert r.returncode == 0 diff --git a/.ai/scripts/tests/test_drill_to_anki.py b/.ai/scripts/tests/test_drill_to_anki.py index 6c5ef9b..fc17817 100644 --- a/.ai/scripts/tests/test_drill_to_anki.py +++ b/.ai/scripts/tests/test_drill_to_anki.py @@ -121,6 +121,11 @@ def test_strip_org_metadata_unclosed_drawer_swallows_the_rest(drill): assert drill.strip_org_metadata(body) == [] +def test_strip_org_metadata_drops_created_date_line(drill): + # A created/added date never belongs on a card back. + assert drill.strip_org_metadata(["Created: 2026-05-30", "real answer"]) == ["real answer"] + + # --- parse (pure, core parser) --- SECTIONED = """* Orbital Regimes diff --git a/.ai/workflows/drill-deck-review.org b/.ai/workflows/drill-deck-review.org index fe12f3c..390f296 100644 --- a/.ai/workflows/drill-deck-review.org +++ b/.ai/workflows/drill-deck-review.org @@ -103,6 +103,8 @@ The =drill-deck-stats.py= helper recognizes both =?=-form and imperative-verb fo - *Body opens by naming the topic.* "Air Force Research Laboratory. Air Force's R&D arm." or "Vrezh Mikayelyan. Armenia-based, full-time as of ..." The Anki back shows this directly under the front question; restating the topic makes the back read as a complete answer. - *PROPERTIES drawer stays.* Org-drill needs the =:ID:=, =:DRILL_LAST_INTERVAL:=, =:DRILL_EASE:= etc. for SRS state. The Anki output strips it (see the script change). - *=SCHEDULED:= / =DEADLINE:= planning lines stay.* Same reason. The Anki output strips them. +- *Source citation goes at the very end, after two blank lines.* When a card cites a source, put a =Source: <label> — <url>= line at the end of the body, separated from the answer by two blank lines (two empty paragraphs) so it reads as a footer, not part of the answer. =drill-deck-stats.py= ignores =Source:= lines when checking for answer leakage, since a URL slug often repeats the question's words. +- *No created/added date on the card.* Don't stamp a card with the date it was written. If a card body carries a =Created:= line (or a =:CREATED:= line outside the drawer), remove it during the rewrite. The Anki output strips =Created:= lines as a backstop, but they shouldn't be in the source either. Volatile facts get dated in the answer prose itself ("full-time as of April 2026"), never via a card-level timestamp. * Card Authoring Principles @@ -110,7 +112,7 @@ The canonical shapes above are the house style; these are the reasons behind the - *One fact per card (minimum information principle).* A card should test a single retrievable connection. A back that bundles several independent facts gets partially recalled and burns repetitions on the parts you already know. When a body covers unrelated attributes, split it into separate cards. =drill-deck-stats.py= flags long backs as a non-blocking NOTE. -- *Demand recall, not recognition (effortful retrieval).* Pulling the answer from memory is what strengthens it, so the question must not let you infer the answer from its own wording. This is why person headings never name the person, and why a question that restates its own answer is a defect. =drill-deck-stats.py= flags high front/back word overlap as answer leakage. +- *Demand recall, not recognition (effortful retrieval).* Pulling the answer from memory is what strengthens it, so the question must not let you infer the answer from its own wording. This is why person headings never name the person, and why a question that restates its own answer is a defect. =drill-deck-stats.py= flags high front/back word overlap as answer leakage — excluding =Source:= citation lines, and exempting range/category cards whose answer recalls numbers the question doesn't give away. - *Avoid binary prompts.* "Is X true?" and "A or B?" allow a coin-flip guess and produce shallow understanding. Reformulate open-ended — "How does X affect Y?" beats "Does X affect Y?" Flagged as a non-blocking NOTE. @@ -188,6 +190,8 @@ Rewrite shape per card: Drop the =*** Answer= sub-header entirely. The body that was under =*** Answer= becomes the body of the card. If the original body had a question above =*** Answer= (the pre-rewrite norm), drop that question — the new heading carries it. +Two body conventions to apply during the rewrite: remove any =Created:= / created-date line (no card-level timestamps), and if the card cites a source, put the =Source:= line at the end of the body after two blank lines. + For the file as a whole, use a single =Write= rather than per-card =Edit= calls. One pass through the source, one write back. Per-card edits multiply tool calls by N and risk drift. ** Phase D: Regenerate the Anki deck @@ -244,7 +248,7 @@ The core converter. Reads an org-drill source file, emits a stable-ID Anki =.apk ** =drill-deck-stats.py= -Inventory + authoring-quality checks for a single deck source. Counts cards, PROPERTIES drawers, =*** Answer= sub-headers, cards missing =:ID:=, and cards whose heading is neither =?=-form nor an imperative-verb prompt. It also checks authoring quality: answer leakage (front/back content-word overlap) and duplicate / near-duplicate fronts are blocking WARNs; overloaded backs, list-shaped backs, and binary prompts are non-blocking NOTEs. Exits 0 when no blocking warning is present, 1 otherwise, so it gates =drill-deck-sync=. +Inventory + authoring-quality checks for a single deck source. Counts cards, PROPERTIES drawers, =*** Answer= sub-headers, cards missing =:ID:=, and cards whose heading is neither =?=-form nor an imperative-verb prompt. It also checks authoring quality: answer leakage (front/back content-word overlap) and duplicate / near-duplicate fronts are blocking WARNs; overloaded backs, list-shaped backs, and binary prompts are non-blocking NOTEs. Exits 0 when no blocking warning is present, 1 otherwise, so it gates =drill-deck-sync=. The leakage check ignores =Source:= and created-date lines and exempts range/category cards whose answer recalls numbers the question doesn't give away. Imperative-verb allowlist: Spell, Describe, Explain, Name, List, Give, Show, Tell, Define, Compare, Identify, Outline, Introduce, Walk, State, Recite, Recall, Summarize. @@ -269,7 +273,7 @@ drill-deck-sync <source.org> --diff-against <previous-version.org> The =drill-to-anki.py= script has these contracts that this workflow depends on: 1. *Strips =:PROPERTIES:= drawers* from the card body before rendering. Org-drill needs them in source; Anki cards shouldn't show them. -2. *Strips =SCHEDULED:= / =DEADLINE:= / =CLOSED:= planning lines* from the card body. Same reason. +2. *Strips =SCHEDULED:= / =DEADLINE:= / =CLOSED:= planning lines and =Created:= / =:CREATED:= date lines* from the card body. Same reason — and a created date never belongs on a card. 3. *Does NOT strip =*** Answer= sub-headers.* If the source still has them, the Anki cards will show them. This workflow's Phase C removes them at the source. =drill-deck-stats.py= flags any remaining as a workflow violation. 4. *Front of each Anki card* = the heading text without the =:drill:= tag. 5. *Back of each Anki card* = the cleaned body (after #1 and #2), joined with =<br>= and HTML-escaped. @@ -318,3 +322,6 @@ Craig noticed the Anki deck name still showed as "DeepSat Org-Drill Flashcards" *** 2026-05-30: Authoring-quality checks + Card Authoring section (same day) Researched flashcard / spaced-repetition best practices (Wozniak's twenty rules, Matuschak's prompt-writing guide, Nielsen, the Anki manual, the FSRS docs) and folded the findings in. =drill-deck-stats.py= gained answer-leakage and duplicate-front checks (blocking), plus non-blocking NOTEs for overloaded backs, list-shaped backs, and binary prompts. Added a "Card Authoring Principles" section (the why behind the canonical shapes), a person-card splitting path, a Phase B cost-benefit-removal + leech-feedback disposition, and a scheduling-is-Anki-side note in the Overview. Deliberately not adopted, with reasons: cloze cards (would need a second note type and an authoring convention), per-card tractability targeting and FSRS-retention encoding (Anki-side telemetry that never flows back to the source), on-face source-stamping (the converter strips those drawers by design; provenance stays in the org layer). + +*** 2026-05-30: Leakage false-positive fixes + source/created-date conventions (same day) +Health ran the leakage check on a 43-card deck and hit two false-positive classes. Fixed both in =drill-deck-stats.py=: =Source:= citation lines are stripped before the overlap is computed (a URL slug repeats the question's words), and range/category cards whose answer carries numeric ranges or thresholds the question lacks are exempted (the recalled content is the numbers, which aren't given away). Codified two body conventions: a =Source:= citation sits at the end of the card after two blank lines, and no created/added date goes on a card. =drill-to-anki.py= now strips =Created:= / =:CREATED:= lines from the back as a backstop, and Phase C removes them from the source during the rewrite. diff --git a/claude-templates/.ai/scripts/drill-deck-stats.py b/claude-templates/.ai/scripts/drill-deck-stats.py index d0707e2..04c3468 100755 --- a/claude-templates/.ai/scripts/drill-deck-stats.py +++ b/claude-templates/.ai/scripts/drill-deck-stats.py @@ -10,6 +10,8 @@ Blocking WARNs (exit 1): - Non-prompt headings (topic-as-heading not yet rewritten) - #+TITLE missing, or carrying source-tool jargon ("org-drill") - Answer leakage: a card whose question echoes most of its own answer + (Source: citation lines and created-date lines are excluded from the + overlap, and range/category cards that recall numbers are exempted) - Duplicate / near-duplicate fronts (interference between confusable cards) Non-blocking NOTEs (exit unaffected): @@ -41,6 +43,10 @@ ID_RE = re.compile(r"^\s*:ID:\s+(\S+)\s*$") TITLE_RE = re.compile(r"^#\+TITLE:\s*(.+?)\s*$", re.IGNORECASE) SOURCE_TOOL_RE = re.compile(r"\borg[-\s]?drill\b", re.IGNORECASE) PLANNING_RE = re.compile(r"^\s*(SCHEDULED|DEADLINE|CLOSED):\s") +SOURCE_LINE_RE = re.compile(r"^\s*source:\s", re.IGNORECASE) +CREATED_LINE_RE = re.compile(r"^\s*:?created:?\s", re.IGNORECASE) +RANGE_RE = re.compile(r"\d[^\n]*[-–—]\s*\d") +THRESHOLD_RE = re.compile(r"[<>≤≥]\s*\d") BULLET_RE = re.compile(r"^\s*([-+*]|\d+[.)])\s+") BINARY_LEAD_RE = re.compile( r"^\s*(is|are|was|were|does|do|did|can|could|should|would|will|has|have|had)\b", @@ -98,6 +104,41 @@ def leakage_ratio(heading: str, body: str) -> float: return len(hw & content_words(body)) / len(hw) +def prose_body(body: str) -> str: + """Body with Source: citation and created-date lines removed. + + Those lines are metadata, not the answer. A Source line's URL slug often + repeats the question's words, and a created date is bookkeeping — neither + should count toward answer-leakage overlap. + """ + return "\n".join( + ln for ln in body.splitlines() + if not SOURCE_LINE_RE.match(ln) and not CREATED_LINE_RE.match(ln) + ) + + +def has_distinct_numeric_recall(heading: str, body: str) -> bool: + """True if the answer carries numeric ranges/thresholds the question lacks. + + A range/category card ("What are the HbA1c ranges across normal, + prediabetes, and diabetes?") echoes its categories in the answer, but the + recalled content is the numbers, which the question doesn't give away — so + high word overlap isn't leakage. + """ + body_nums = bool(RANGE_RE.search(body) or THRESHOLD_RE.search(body)) + head_nums = bool(RANGE_RE.search(heading) or THRESHOLD_RE.search(heading)) + return body_nums and not head_nums + + +def is_leaky(heading: str, body: str) -> bool: + """True if a card leaks its answer, after excluding citation lines and + numeric-recall (range/category) cards.""" + prose = prose_body(body) + if leakage_ratio(heading, prose) < LEAKAGE_RATIO: + return False + return not has_distinct_numeric_recall(heading, prose) + + def normalize_heading(heading: str) -> str: """Collapse a heading to a comparison key (lowercase, alnum + single spaces).""" return re.sub(r"\s+", " ", re.sub(r"[^a-z0-9 ]", " ", heading.lower())).strip() @@ -212,8 +253,7 @@ def main() -> int: no_id = [c["heading"] for c in cards if not c["has_id"]] not_prompt = [c["heading"] for c in cards if not is_prompt_form(c["heading"])] answer_count = sum(1 for c in cards if c["has_answer"]) - leaky = [c["heading"] for c in cards - if leakage_ratio(c["heading"], c["body"]) >= LEAKAGE_RATIO] + leaky = [c["heading"] for c in cards if is_leaky(c["heading"], c["body"])] dups = find_duplicate_fronts(cards) overloaded = [c["heading"] for c in cards if back_word_count(c["body"]) > BACK_WORD_LIMIT] listy = [c["heading"] for c in cards if is_list_back(c["body"])] diff --git a/claude-templates/.ai/scripts/drill-to-anki.py b/claude-templates/.ai/scripts/drill-to-anki.py index 1050021..9fe954e 100755 --- a/claude-templates/.ai/scripts/drill-to-anki.py +++ b/claude-templates/.ai/scripts/drill-to-anki.py @@ -90,15 +90,18 @@ def escape_html(s: str) -> str: def strip_org_metadata(body_lines: list[str]) -> list[str]: - """Drop :PROPERTIES: drawers and SCHEDULED/DEADLINE/CLOSED planning lines. + """Drop :PROPERTIES: drawers, planning lines, and created-date lines. Org-drill needs these in the source file (SRS state lives in the PROPERTIES drawer; SCHEDULED carries the next-review date), but they - are noise on the back of an Anki card. + are noise on the back of an Anki card. A created/added date never + belongs on a card, so a stray "Created:" or ":CREATED:" body line is + dropped too. """ cleaned: list[str] = [] in_drawer = False planning_re = re.compile(r"^\s*(SCHEDULED|DEADLINE|CLOSED):\s") + created_re = re.compile(r"^\s*:?created:?\s", re.IGNORECASE) drawer_start_re = re.compile(r"^\s*:PROPERTIES:\s*$") drawer_end_re = re.compile(r"^\s*:END:\s*$") for line in body_lines: @@ -109,7 +112,7 @@ def strip_org_metadata(body_lines: list[str]) -> list[str]: if drawer_start_re.match(line): in_drawer = True continue - if planning_re.match(line): + if planning_re.match(line) or created_re.match(line): continue cleaned.append(line) return cleaned diff --git a/claude-templates/.ai/scripts/tests/test_drill_deck_stats.py b/claude-templates/.ai/scripts/tests/test_drill_deck_stats.py index 80b9913..d60084d 100644 --- a/claude-templates/.ai/scripts/tests/test_drill_deck_stats.py +++ b/claude-templates/.ai/scripts/tests/test_drill_deck_stats.py @@ -303,3 +303,77 @@ def test_cli_non_blocking_notes_keep_exit_zero(tmp_path): r = _run(f) assert r.returncode == 0 assert "NOTE" in r.stdout + + +# --- leakage refinements: source-line strip + numeric carve-out --- + +def test_prose_body_strips_source_and_created_lines(stats): + body = "The real answer here.\nCreated: 2026-05-30\nSource: AHA — https://heart.org/x" + assert stats.prose_body(body) == "The real answer here." + + +def test_has_distinct_numeric_recall_true_for_range_card(stats): + assert stats.has_distinct_numeric_recall( + "What are the HbA1c ranges across normal, prediabetes, and diabetes?", + "Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%.", + ) is True + + +def test_has_distinct_numeric_recall_false_without_numbers(stats): + assert stats.has_distinct_numeric_recall("What is LEO?", "Low Earth Orbit.") is False + + +def test_is_leaky_false_when_overlap_is_only_in_the_source_line(stats): + heading = "What blood pressure constitutes a hypertensive crisis?" + body = ("A reading at or above 180/120.\n" + "Source: AHA — https://heart.org/high-blood-pressure/hypertensive-crisis") + assert stats.is_leaky(heading, body) is False + + +def test_is_leaky_false_for_numeric_range_card(stats): + heading = "What are the HbA1c ranges across normal, prediabetes, and diabetes?" + body = "HbA1c ranges. Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%." + assert stats.is_leaky(heading, body) is False + + +def test_is_leaky_true_for_genuine_restatement(stats): + heading = "primary orbital regimes satellites classification" + body = "The primary orbital regimes satellites classification scheme." + assert stats.is_leaky(heading, body) is True + + +SOURCE_LINE_DECK = """#+TITLE: Test Flashcards + +* Section +** What blood pressure constitutes a hypertensive crisis? :drill: +:PROPERTIES: +:ID: c1 +:END: +A reading at or above 180/120. + +Source: AHA — https://heart.org/high-blood-pressure/hypertensive-crisis-blood-pressure +""" + +RANGE_CARD_DECK = """#+TITLE: Test Flashcards + +* Section +** What are the HbA1c ranges across normal, prediabetes, and diabetes? :drill: +:PROPERTIES: +:ID: c1 +:END: +HbA1c ranges. Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%. +""" + + +def test_cli_source_line_overlap_is_not_flagged(tmp_path): + f = tmp_path / "source.org" + f.write_text(SOURCE_LINE_DECK) + r = _run(f) + assert r.returncode == 0 + + +def test_cli_numeric_range_card_is_not_flagged(tmp_path): + f = tmp_path / "range.org" + f.write_text(RANGE_CARD_DECK) + r = _run(f) + assert r.returncode == 0 diff --git a/claude-templates/.ai/scripts/tests/test_drill_to_anki.py b/claude-templates/.ai/scripts/tests/test_drill_to_anki.py index 6c5ef9b..fc17817 100644 --- a/claude-templates/.ai/scripts/tests/test_drill_to_anki.py +++ b/claude-templates/.ai/scripts/tests/test_drill_to_anki.py @@ -121,6 +121,11 @@ def test_strip_org_metadata_unclosed_drawer_swallows_the_rest(drill): assert drill.strip_org_metadata(body) == [] +def test_strip_org_metadata_drops_created_date_line(drill): + # A created/added date never belongs on a card back. + assert drill.strip_org_metadata(["Created: 2026-05-30", "real answer"]) == ["real answer"] + + # --- parse (pure, core parser) --- SECTIONED = """* Orbital Regimes diff --git a/claude-templates/.ai/workflows/drill-deck-review.org b/claude-templates/.ai/workflows/drill-deck-review.org index fe12f3c..390f296 100644 --- a/claude-templates/.ai/workflows/drill-deck-review.org +++ b/claude-templates/.ai/workflows/drill-deck-review.org @@ -103,6 +103,8 @@ The =drill-deck-stats.py= helper recognizes both =?=-form and imperative-verb fo - *Body opens by naming the topic.* "Air Force Research Laboratory. Air Force's R&D arm." or "Vrezh Mikayelyan. Armenia-based, full-time as of ..." The Anki back shows this directly under the front question; restating the topic makes the back read as a complete answer. - *PROPERTIES drawer stays.* Org-drill needs the =:ID:=, =:DRILL_LAST_INTERVAL:=, =:DRILL_EASE:= etc. for SRS state. The Anki output strips it (see the script change). - *=SCHEDULED:= / =DEADLINE:= planning lines stay.* Same reason. The Anki output strips them. +- *Source citation goes at the very end, after two blank lines.* When a card cites a source, put a =Source: <label> — <url>= line at the end of the body, separated from the answer by two blank lines (two empty paragraphs) so it reads as a footer, not part of the answer. =drill-deck-stats.py= ignores =Source:= lines when checking for answer leakage, since a URL slug often repeats the question's words. +- *No created/added date on the card.* Don't stamp a card with the date it was written. If a card body carries a =Created:= line (or a =:CREATED:= line outside the drawer), remove it during the rewrite. The Anki output strips =Created:= lines as a backstop, but they shouldn't be in the source either. Volatile facts get dated in the answer prose itself ("full-time as of April 2026"), never via a card-level timestamp. * Card Authoring Principles @@ -110,7 +112,7 @@ The canonical shapes above are the house style; these are the reasons behind the - *One fact per card (minimum information principle).* A card should test a single retrievable connection. A back that bundles several independent facts gets partially recalled and burns repetitions on the parts you already know. When a body covers unrelated attributes, split it into separate cards. =drill-deck-stats.py= flags long backs as a non-blocking NOTE. -- *Demand recall, not recognition (effortful retrieval).* Pulling the answer from memory is what strengthens it, so the question must not let you infer the answer from its own wording. This is why person headings never name the person, and why a question that restates its own answer is a defect. =drill-deck-stats.py= flags high front/back word overlap as answer leakage. +- *Demand recall, not recognition (effortful retrieval).* Pulling the answer from memory is what strengthens it, so the question must not let you infer the answer from its own wording. This is why person headings never name the person, and why a question that restates its own answer is a defect. =drill-deck-stats.py= flags high front/back word overlap as answer leakage — excluding =Source:= citation lines, and exempting range/category cards whose answer recalls numbers the question doesn't give away. - *Avoid binary prompts.* "Is X true?" and "A or B?" allow a coin-flip guess and produce shallow understanding. Reformulate open-ended — "How does X affect Y?" beats "Does X affect Y?" Flagged as a non-blocking NOTE. @@ -188,6 +190,8 @@ Rewrite shape per card: Drop the =*** Answer= sub-header entirely. The body that was under =*** Answer= becomes the body of the card. If the original body had a question above =*** Answer= (the pre-rewrite norm), drop that question — the new heading carries it. +Two body conventions to apply during the rewrite: remove any =Created:= / created-date line (no card-level timestamps), and if the card cites a source, put the =Source:= line at the end of the body after two blank lines. + For the file as a whole, use a single =Write= rather than per-card =Edit= calls. One pass through the source, one write back. Per-card edits multiply tool calls by N and risk drift. ** Phase D: Regenerate the Anki deck @@ -244,7 +248,7 @@ The core converter. Reads an org-drill source file, emits a stable-ID Anki =.apk ** =drill-deck-stats.py= -Inventory + authoring-quality checks for a single deck source. Counts cards, PROPERTIES drawers, =*** Answer= sub-headers, cards missing =:ID:=, and cards whose heading is neither =?=-form nor an imperative-verb prompt. It also checks authoring quality: answer leakage (front/back content-word overlap) and duplicate / near-duplicate fronts are blocking WARNs; overloaded backs, list-shaped backs, and binary prompts are non-blocking NOTEs. Exits 0 when no blocking warning is present, 1 otherwise, so it gates =drill-deck-sync=. +Inventory + authoring-quality checks for a single deck source. Counts cards, PROPERTIES drawers, =*** Answer= sub-headers, cards missing =:ID:=, and cards whose heading is neither =?=-form nor an imperative-verb prompt. It also checks authoring quality: answer leakage (front/back content-word overlap) and duplicate / near-duplicate fronts are blocking WARNs; overloaded backs, list-shaped backs, and binary prompts are non-blocking NOTEs. Exits 0 when no blocking warning is present, 1 otherwise, so it gates =drill-deck-sync=. The leakage check ignores =Source:= and created-date lines and exempts range/category cards whose answer recalls numbers the question doesn't give away. Imperative-verb allowlist: Spell, Describe, Explain, Name, List, Give, Show, Tell, Define, Compare, Identify, Outline, Introduce, Walk, State, Recite, Recall, Summarize. @@ -269,7 +273,7 @@ drill-deck-sync <source.org> --diff-against <previous-version.org> The =drill-to-anki.py= script has these contracts that this workflow depends on: 1. *Strips =:PROPERTIES:= drawers* from the card body before rendering. Org-drill needs them in source; Anki cards shouldn't show them. -2. *Strips =SCHEDULED:= / =DEADLINE:= / =CLOSED:= planning lines* from the card body. Same reason. +2. *Strips =SCHEDULED:= / =DEADLINE:= / =CLOSED:= planning lines and =Created:= / =:CREATED:= date lines* from the card body. Same reason — and a created date never belongs on a card. 3. *Does NOT strip =*** Answer= sub-headers.* If the source still has them, the Anki cards will show them. This workflow's Phase C removes them at the source. =drill-deck-stats.py= flags any remaining as a workflow violation. 4. *Front of each Anki card* = the heading text without the =:drill:= tag. 5. *Back of each Anki card* = the cleaned body (after #1 and #2), joined with =<br>= and HTML-escaped. @@ -318,3 +322,6 @@ Craig noticed the Anki deck name still showed as "DeepSat Org-Drill Flashcards" *** 2026-05-30: Authoring-quality checks + Card Authoring section (same day) Researched flashcard / spaced-repetition best practices (Wozniak's twenty rules, Matuschak's prompt-writing guide, Nielsen, the Anki manual, the FSRS docs) and folded the findings in. =drill-deck-stats.py= gained answer-leakage and duplicate-front checks (blocking), plus non-blocking NOTEs for overloaded backs, list-shaped backs, and binary prompts. Added a "Card Authoring Principles" section (the why behind the canonical shapes), a person-card splitting path, a Phase B cost-benefit-removal + leech-feedback disposition, and a scheduling-is-Anki-side note in the Overview. Deliberately not adopted, with reasons: cloze cards (would need a second note type and an authoring convention), per-card tractability targeting and FSRS-retention encoding (Anki-side telemetry that never flows back to the source), on-face source-stamping (the converter strips those drawers by design; provenance stays in the org layer). + +*** 2026-05-30: Leakage false-positive fixes + source/created-date conventions (same day) +Health ran the leakage check on a 43-card deck and hit two false-positive classes. Fixed both in =drill-deck-stats.py=: =Source:= citation lines are stripped before the overlap is computed (a URL slug repeats the question's words), and range/category cards whose answer carries numeric ranges or thresholds the question lacks are exempted (the recalled content is the numbers, which aren't given away). Codified two body conventions: a =Source:= citation sits at the end of the card after two blank lines, and no created/added date goes on a card. =drill-to-anki.py= now strips =Created:= / =:CREATED:= lines from the back as a backstop, and Phase C removes them from the source during the rewrite. |
