aboutsummaryrefslogtreecommitdiff
path: root/.ai/scripts
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-05-30 15:46:00 -0500
committerCraig Jennings <c@cjennings.net>2026-05-30 15:46:00 -0500
commitb80a9ceb3fc9cdca9798b48fbc4f9ab9c1592b57 (patch)
treebe0f328c6be0ddb8d999730cc6fa07277ebc2ec7 /.ai/scripts
parent968a39bb3978e6ad499447ce173e2265dee772a2 (diff)
downloadrulesets-b80a9ceb3fc9cdca9798b48fbc4f9ab9c1592b57.tar.gz
rulesets-b80a9ceb3fc9cdca9798b48fbc4f9ab9c1592b57.zip
fix(drill-deck): cut leakage false positives and codify source/date conventions
Health ran the new leakage check on a 43-card deck and hit two false-positive classes. The check read the whole card body, so a =Source: <label> — <url>= citation line inflated the front/back overlap whenever the URL slug repeated the question's words. Range/category cards ("What are the HbA1c ranges across normal, prediabetes, and diabetes?") tripped it too, because the question's categories echo in the answer even though the recalled content is the numbers. drill-deck-stats.py now routes leakage through an is_leaky helper. It strips =Source:= and created-date lines before computing overlap, and exempts a card when the answer carries a numeric range or threshold the question lacks. leakage_ratio itself is unchanged, so the genuine-restatement case still flags. Two body conventions now hold: a =Source:= citation goes at the end of a card after two blank lines, and no created/added date goes on a card. drill-to-anki.py now strips =Created:= / =:CREATED:= lines from the back as a backstop, and the workflow's Phase C removes them from the source during the rewrite. I added tests for the source-strip, the numeric carve-out, and the created-line strip, and documented all of it in drill-deck-review.org.
Diffstat (limited to '.ai/scripts')
-rwxr-xr-x.ai/scripts/drill-deck-stats.py44
-rwxr-xr-x.ai/scripts/drill-to-anki.py9
-rw-r--r--.ai/scripts/tests/test_drill_deck_stats.py74
-rw-r--r--.ai/scripts/tests/test_drill_to_anki.py5
4 files changed, 127 insertions, 5 deletions
diff --git a/.ai/scripts/drill-deck-stats.py b/.ai/scripts/drill-deck-stats.py
index d0707e2..04c3468 100755
--- a/.ai/scripts/drill-deck-stats.py
+++ b/.ai/scripts/drill-deck-stats.py
@@ -10,6 +10,8 @@ Blocking WARNs (exit 1):
- Non-prompt headings (topic-as-heading not yet rewritten)
- #+TITLE missing, or carrying source-tool jargon ("org-drill")
- Answer leakage: a card whose question echoes most of its own answer
+ (Source: citation lines and created-date lines are excluded from the
+ overlap, and range/category cards that recall numbers are exempted)
- Duplicate / near-duplicate fronts (interference between confusable cards)
Non-blocking NOTEs (exit unaffected):
@@ -41,6 +43,10 @@ ID_RE = re.compile(r"^\s*:ID:\s+(\S+)\s*$")
TITLE_RE = re.compile(r"^#\+TITLE:\s*(.+?)\s*$", re.IGNORECASE)
SOURCE_TOOL_RE = re.compile(r"\borg[-\s]?drill\b", re.IGNORECASE)
PLANNING_RE = re.compile(r"^\s*(SCHEDULED|DEADLINE|CLOSED):\s")
+SOURCE_LINE_RE = re.compile(r"^\s*source:\s", re.IGNORECASE)
+CREATED_LINE_RE = re.compile(r"^\s*:?created:?\s", re.IGNORECASE)
+RANGE_RE = re.compile(r"\d[^\n]*[-–—]\s*\d")
+THRESHOLD_RE = re.compile(r"[<>≤≥]\s*\d")
BULLET_RE = re.compile(r"^\s*([-+*]|\d+[.)])\s+")
BINARY_LEAD_RE = re.compile(
r"^\s*(is|are|was|were|does|do|did|can|could|should|would|will|has|have|had)\b",
@@ -98,6 +104,41 @@ def leakage_ratio(heading: str, body: str) -> float:
return len(hw & content_words(body)) / len(hw)
+def prose_body(body: str) -> str:
+ """Body with Source: citation and created-date lines removed.
+
+ Those lines are metadata, not the answer. A Source line's URL slug often
+ repeats the question's words, and a created date is bookkeeping — neither
+ should count toward answer-leakage overlap.
+ """
+ return "\n".join(
+ ln for ln in body.splitlines()
+ if not SOURCE_LINE_RE.match(ln) and not CREATED_LINE_RE.match(ln)
+ )
+
+
+def has_distinct_numeric_recall(heading: str, body: str) -> bool:
+ """True if the answer carries numeric ranges/thresholds the question lacks.
+
+ A range/category card ("What are the HbA1c ranges across normal,
+ prediabetes, and diabetes?") echoes its categories in the answer, but the
+ recalled content is the numbers, which the question doesn't give away — so
+ high word overlap isn't leakage.
+ """
+ body_nums = bool(RANGE_RE.search(body) or THRESHOLD_RE.search(body))
+ head_nums = bool(RANGE_RE.search(heading) or THRESHOLD_RE.search(heading))
+ return body_nums and not head_nums
+
+
+def is_leaky(heading: str, body: str) -> bool:
+ """True if a card leaks its answer, after excluding citation lines and
+ numeric-recall (range/category) cards."""
+ prose = prose_body(body)
+ if leakage_ratio(heading, prose) < LEAKAGE_RATIO:
+ return False
+ return not has_distinct_numeric_recall(heading, prose)
+
+
def normalize_heading(heading: str) -> str:
"""Collapse a heading to a comparison key (lowercase, alnum + single spaces)."""
return re.sub(r"\s+", " ", re.sub(r"[^a-z0-9 ]", " ", heading.lower())).strip()
@@ -212,8 +253,7 @@ def main() -> int:
no_id = [c["heading"] for c in cards if not c["has_id"]]
not_prompt = [c["heading"] for c in cards if not is_prompt_form(c["heading"])]
answer_count = sum(1 for c in cards if c["has_answer"])
- leaky = [c["heading"] for c in cards
- if leakage_ratio(c["heading"], c["body"]) >= LEAKAGE_RATIO]
+ leaky = [c["heading"] for c in cards if is_leaky(c["heading"], c["body"])]
dups = find_duplicate_fronts(cards)
overloaded = [c["heading"] for c in cards if back_word_count(c["body"]) > BACK_WORD_LIMIT]
listy = [c["heading"] for c in cards if is_list_back(c["body"])]
diff --git a/.ai/scripts/drill-to-anki.py b/.ai/scripts/drill-to-anki.py
index 1050021..9fe954e 100755
--- a/.ai/scripts/drill-to-anki.py
+++ b/.ai/scripts/drill-to-anki.py
@@ -90,15 +90,18 @@ def escape_html(s: str) -> str:
def strip_org_metadata(body_lines: list[str]) -> list[str]:
- """Drop :PROPERTIES: drawers and SCHEDULED/DEADLINE/CLOSED planning lines.
+ """Drop :PROPERTIES: drawers, planning lines, and created-date lines.
Org-drill needs these in the source file (SRS state lives in the
PROPERTIES drawer; SCHEDULED carries the next-review date), but they
- are noise on the back of an Anki card.
+ are noise on the back of an Anki card. A created/added date never
+ belongs on a card, so a stray "Created:" or ":CREATED:" body line is
+ dropped too.
"""
cleaned: list[str] = []
in_drawer = False
planning_re = re.compile(r"^\s*(SCHEDULED|DEADLINE|CLOSED):\s")
+ created_re = re.compile(r"^\s*:?created:?\s", re.IGNORECASE)
drawer_start_re = re.compile(r"^\s*:PROPERTIES:\s*$")
drawer_end_re = re.compile(r"^\s*:END:\s*$")
for line in body_lines:
@@ -109,7 +112,7 @@ def strip_org_metadata(body_lines: list[str]) -> list[str]:
if drawer_start_re.match(line):
in_drawer = True
continue
- if planning_re.match(line):
+ if planning_re.match(line) or created_re.match(line):
continue
cleaned.append(line)
return cleaned
diff --git a/.ai/scripts/tests/test_drill_deck_stats.py b/.ai/scripts/tests/test_drill_deck_stats.py
index 80b9913..d60084d 100644
--- a/.ai/scripts/tests/test_drill_deck_stats.py
+++ b/.ai/scripts/tests/test_drill_deck_stats.py
@@ -303,3 +303,77 @@ def test_cli_non_blocking_notes_keep_exit_zero(tmp_path):
r = _run(f)
assert r.returncode == 0
assert "NOTE" in r.stdout
+
+
+# --- leakage refinements: source-line strip + numeric carve-out ---
+
+def test_prose_body_strips_source_and_created_lines(stats):
+ body = "The real answer here.\nCreated: 2026-05-30\nSource: AHA — https://heart.org/x"
+ assert stats.prose_body(body) == "The real answer here."
+
+
+def test_has_distinct_numeric_recall_true_for_range_card(stats):
+ assert stats.has_distinct_numeric_recall(
+ "What are the HbA1c ranges across normal, prediabetes, and diabetes?",
+ "Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%.",
+ ) is True
+
+
+def test_has_distinct_numeric_recall_false_without_numbers(stats):
+ assert stats.has_distinct_numeric_recall("What is LEO?", "Low Earth Orbit.") is False
+
+
+def test_is_leaky_false_when_overlap_is_only_in_the_source_line(stats):
+ heading = "What blood pressure constitutes a hypertensive crisis?"
+ body = ("A reading at or above 180/120.\n"
+ "Source: AHA — https://heart.org/high-blood-pressure/hypertensive-crisis")
+ assert stats.is_leaky(heading, body) is False
+
+
+def test_is_leaky_false_for_numeric_range_card(stats):
+ heading = "What are the HbA1c ranges across normal, prediabetes, and diabetes?"
+ body = "HbA1c ranges. Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%."
+ assert stats.is_leaky(heading, body) is False
+
+
+def test_is_leaky_true_for_genuine_restatement(stats):
+ heading = "primary orbital regimes satellites classification"
+ body = "The primary orbital regimes satellites classification scheme."
+ assert stats.is_leaky(heading, body) is True
+
+
+SOURCE_LINE_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** What blood pressure constitutes a hypertensive crisis? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+A reading at or above 180/120.
+
+Source: AHA — https://heart.org/high-blood-pressure/hypertensive-crisis-blood-pressure
+"""
+
+RANGE_CARD_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** What are the HbA1c ranges across normal, prediabetes, and diabetes? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+HbA1c ranges. Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%.
+"""
+
+
+def test_cli_source_line_overlap_is_not_flagged(tmp_path):
+ f = tmp_path / "source.org"
+ f.write_text(SOURCE_LINE_DECK)
+ r = _run(f)
+ assert r.returncode == 0
+
+
+def test_cli_numeric_range_card_is_not_flagged(tmp_path):
+ f = tmp_path / "range.org"
+ f.write_text(RANGE_CARD_DECK)
+ r = _run(f)
+ assert r.returncode == 0
diff --git a/.ai/scripts/tests/test_drill_to_anki.py b/.ai/scripts/tests/test_drill_to_anki.py
index 6c5ef9b..fc17817 100644
--- a/.ai/scripts/tests/test_drill_to_anki.py
+++ b/.ai/scripts/tests/test_drill_to_anki.py
@@ -121,6 +121,11 @@ def test_strip_org_metadata_unclosed_drawer_swallows_the_rest(drill):
assert drill.strip_org_metadata(body) == []
+def test_strip_org_metadata_drops_created_date_line(drill):
+ # A created/added date never belongs on a card back.
+ assert drill.strip_org_metadata(["Created: 2026-05-30", "real answer"]) == ["real answer"]
+
+
# --- parse (pure, core parser) ---
SECTIONED = """* Orbital Regimes