4 files changed, 127 insertions, 5 deletions
diff --git a/.ai/scripts/drill-deck-stats.py b/.ai/scripts/drill-deck-stats.py
index d0707e2..04c3468 100755
--- a/.ai/scripts/drill-deck-stats.py
+++ b/.ai/scripts/drill-deck-stats.py
@@ -10,6 +10,8 @@ Blocking WARNs (exit 1):
 - Non-prompt headings (topic-as-heading not yet rewritten)
 - #+TITLE missing, or carrying source-tool jargon ("org-drill")
 - Answer leakage: a card whose question echoes most of its own answer
+  (Source: citation lines and created-date lines are excluded from the
+  overlap, and range/category cards that recall numbers are exempted)
 - Duplicate / near-duplicate fronts (interference between confusable cards)
 
 Non-blocking NOTEs (exit unaffected):
@@ -41,6 +43,10 @@ ID_RE = re.compile(r"^\s*:ID:\s+(\S+)\s*$")
 TITLE_RE = re.compile(r"^#\+TITLE:\s*(.+?)\s*$", re.IGNORECASE)
 SOURCE_TOOL_RE = re.compile(r"\borg[-\s]?drill\b", re.IGNORECASE)
 PLANNING_RE = re.compile(r"^\s*(SCHEDULED|DEADLINE|CLOSED):\s")
+SOURCE_LINE_RE = re.compile(r"^\s*source:\s", re.IGNORECASE)
+CREATED_LINE_RE = re.compile(r"^\s*:?created:?\s", re.IGNORECASE)
+RANGE_RE = re.compile(r"\d[^\n]*[-–—]\s*\d")
+THRESHOLD_RE = re.compile(r"[<>≤≥]\s*\d")
 BULLET_RE = re.compile(r"^\s*([-+*]|\d+[.)])\s+")
 BINARY_LEAD_RE = re.compile(
     r"^\s*(is|are|was|were|does|do|did|can|could|should|would|will|has|have|had)\b",
@@ -98,6 +104,41 @@ def leakage_ratio(heading: str, body: str) -> float:
     return len(hw & content_words(body)) / len(hw)
 
 
+def prose_body(body: str) -> str:
+    """Body with Source: citation and created-date lines removed.
+
+    Those lines are metadata, not the answer. A Source line's URL slug often
+    repeats the question's words, and a created date is bookkeeping — neither
+    should count toward answer-leakage overlap.
+    """
+    return "\n".join(
+        ln for ln in body.splitlines()
+        if not SOURCE_LINE_RE.match(ln) and not CREATED_LINE_RE.match(ln)
+    )
+
+
+def has_distinct_numeric_recall(heading: str, body: str) -> bool:
+    """True if the answer carries numeric ranges/thresholds the question lacks.
+
+    A range/category card ("What are the HbA1c ranges across normal,
+    prediabetes, and diabetes?") echoes its categories in the answer, but the
+    recalled content is the numbers, which the question doesn't give away — so
+    high word overlap isn't leakage.
+    """
+    body_nums = bool(RANGE_RE.search(body) or THRESHOLD_RE.search(body))
+    head_nums = bool(RANGE_RE.search(heading) or THRESHOLD_RE.search(heading))
+    return body_nums and not head_nums
+
+
+def is_leaky(heading: str, body: str) -> bool:
+    """True if a card leaks its answer, after excluding citation lines and
+    numeric-recall (range/category) cards."""
+    prose = prose_body(body)
+    if leakage_ratio(heading, prose) < LEAKAGE_RATIO:
+        return False
+    return not has_distinct_numeric_recall(heading, prose)
+
+
 def normalize_heading(heading: str) -> str:
     """Collapse a heading to a comparison key (lowercase, alnum + single spaces)."""
     return re.sub(r"\s+", " ", re.sub(r"[^a-z0-9 ]", " ", heading.lower())).strip()
@@ -212,8 +253,7 @@ def main() -> int:
     no_id = [c["heading"] for c in cards if not c["has_id"]]
     not_prompt = [c["heading"] for c in cards if not is_prompt_form(c["heading"])]
     answer_count = sum(1 for c in cards if c["has_answer"])
-    leaky = [c["heading"] for c in cards
-             if leakage_ratio(c["heading"], c["body"]) >= LEAKAGE_RATIO]
+    leaky = [c["heading"] for c in cards if is_leaky(c["heading"], c["body"])]
     dups = find_duplicate_fronts(cards)
     overloaded = [c["heading"] for c in cards if back_word_count(c["body"]) > BACK_WORD_LIMIT]
     listy = [c["heading"] for c in cards if is_list_back(c["body"])]
diff --git a/.ai/scripts/drill-to-anki.py b/.ai/scripts/drill-to-anki.py
index 1050021..9fe954e 100755
--- a/.ai/scripts/drill-to-anki.py
+++ b/.ai/scripts/drill-to-anki.py
@@ -90,15 +90,18 @@ def escape_html(s: str) -> str:
 
 
 def strip_org_metadata(body_lines: list[str]) -> list[str]:
-    """Drop :PROPERTIES: drawers and SCHEDULED/DEADLINE/CLOSED planning lines.
+    """Drop :PROPERTIES: drawers, planning lines, and created-date lines.
 
     Org-drill needs these in the source file (SRS state lives in the
     PROPERTIES drawer; SCHEDULED carries the next-review date), but they
-    are noise on the back of an Anki card.
+    are noise on the back of an Anki card. A created/added date never
+    belongs on a card, so a stray "Created:" or ":CREATED:" body line is
+    dropped too.
     """
     cleaned: list[str] = []
     in_drawer = False
     planning_re = re.compile(r"^\s*(SCHEDULED|DEADLINE|CLOSED):\s")
+    created_re = re.compile(r"^\s*:?created:?\s", re.IGNORECASE)
     drawer_start_re = re.compile(r"^\s*:PROPERTIES:\s*$")
     drawer_end_re = re.compile(r"^\s*:END:\s*$")
     for line in body_lines:
@@ -109,7 +112,7 @@ def strip_org_metadata(body_lines: list[str]) -> list[str]:
         if drawer_start_re.match(line):
             in_drawer = True
             continue
-        if planning_re.match(line):
+        if planning_re.match(line) or created_re.match(line):
             continue
         cleaned.append(line)
     return cleaned
diff --git a/.ai/scripts/tests/test_drill_deck_stats.py b/.ai/scripts/tests/test_drill_deck_stats.py
index 80b9913..d60084d 100644
--- a/.ai/scripts/tests/test_drill_deck_stats.py
+++ b/.ai/scripts/tests/test_drill_deck_stats.py
@@ -303,3 +303,77 @@ def test_cli_non_blocking_notes_keep_exit_zero(tmp_path):
     r = _run(f)
     assert r.returncode == 0
     assert "NOTE" in r.stdout
+
+
+# --- leakage refinements: source-line strip + numeric carve-out ---
+
+def test_prose_body_strips_source_and_created_lines(stats):
+    body = "The real answer here.\nCreated: 2026-05-30\nSource: AHA — https://heart.org/x"
+    assert stats.prose_body(body) == "The real answer here."
+
+
+def test_has_distinct_numeric_recall_true_for_range_card(stats):
+    assert stats.has_distinct_numeric_recall(
+        "What are the HbA1c ranges across normal, prediabetes, and diabetes?",
+        "Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%.",
+    ) is True
+
+
+def test_has_distinct_numeric_recall_false_without_numbers(stats):
+    assert stats.has_distinct_numeric_recall("What is LEO?", "Low Earth Orbit.") is False
+
+
+def test_is_leaky_false_when_overlap_is_only_in_the_source_line(stats):
+    heading = "What blood pressure constitutes a hypertensive crisis?"
+    body = ("A reading at or above 180/120.\n"
+            "Source: AHA — https://heart.org/high-blood-pressure/hypertensive-crisis")
+    assert stats.is_leaky(heading, body) is False
+
+
+def test_is_leaky_false_for_numeric_range_card(stats):
+    heading = "What are the HbA1c ranges across normal, prediabetes, and diabetes?"
+    body = "HbA1c ranges. Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%."
+    assert stats.is_leaky(heading, body) is False
+
+
+def test_is_leaky_true_for_genuine_restatement(stats):
+    heading = "primary orbital regimes satellites classification"
+    body = "The primary orbital regimes satellites classification scheme."
+    assert stats.is_leaky(heading, body) is True
+
+
+SOURCE_LINE_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** What blood pressure constitutes a hypertensive crisis? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+A reading at or above 180/120.
+
+Source: AHA — https://heart.org/high-blood-pressure/hypertensive-crisis-blood-pressure
+"""
+
+RANGE_CARD_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** What are the HbA1c ranges across normal, prediabetes, and diabetes? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+HbA1c ranges. Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%.
+"""
+
+
+def test_cli_source_line_overlap_is_not_flagged(tmp_path):
+    f = tmp_path / "source.org"
+    f.write_text(SOURCE_LINE_DECK)
+    r = _run(f)
+    assert r.returncode == 0
+
+
+def test_cli_numeric_range_card_is_not_flagged(tmp_path):
+    f = tmp_path / "range.org"
+    f.write_text(RANGE_CARD_DECK)
+    r = _run(f)
+    assert r.returncode == 0
diff --git a/.ai/scripts/tests/test_drill_to_anki.py b/.ai/scripts/tests/test_drill_to_anki.py
index 6c5ef9b..fc17817 100644
--- a/.ai/scripts/tests/test_drill_to_anki.py
+++ b/.ai/scripts/tests/test_drill_to_anki.py
@@ -121,6 +121,11 @@ def test_strip_org_metadata_unclosed_drawer_swallows_the_rest(drill):
     assert drill.strip_org_metadata(body) == []
 
 
+def test_strip_org_metadata_drops_created_date_line(drill):
+    # A created/added date never belongs on a card back.
+    assert drill.strip_org_metadata(["Created: 2026-05-30", "real answer"]) == ["real answer"]
+
+
 # --- parse (pure, core parser) ---
 
 SECTIONED = """* Orbital Regimes