aboutsummaryrefslogtreecommitdiff
path: root/.ai/scripts/tests
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-05-30 15:46:00 -0500
committerCraig Jennings <c@cjennings.net>2026-05-30 15:46:00 -0500
commitb80a9ceb3fc9cdca9798b48fbc4f9ab9c1592b57 (patch)
treebe0f328c6be0ddb8d999730cc6fa07277ebc2ec7 /.ai/scripts/tests
parent968a39bb3978e6ad499447ce173e2265dee772a2 (diff)
downloadrulesets-b80a9ceb3fc9cdca9798b48fbc4f9ab9c1592b57.tar.gz
rulesets-b80a9ceb3fc9cdca9798b48fbc4f9ab9c1592b57.zip
fix(drill-deck): cut leakage false positives and codify source/date conventions
Health ran the new leakage check on a 43-card deck and hit two false-positive classes. The check read the whole card body, so a =Source: <label> — <url>= citation line inflated the front/back overlap whenever the URL slug repeated the question's words. Range/category cards ("What are the HbA1c ranges across normal, prediabetes, and diabetes?") tripped it too, because the question's categories echo in the answer even though the recalled content is the numbers. drill-deck-stats.py now routes leakage through an is_leaky helper. It strips =Source:= and created-date lines before computing overlap, and exempts a card when the answer carries a numeric range or threshold the question lacks. leakage_ratio itself is unchanged, so the genuine-restatement case still flags. Two body conventions now hold: a =Source:= citation goes at the end of a card after two blank lines, and no created/added date goes on a card. drill-to-anki.py now strips =Created:= / =:CREATED:= lines from the back as a backstop, and the workflow's Phase C removes them from the source during the rewrite. I added tests for the source-strip, the numeric carve-out, and the created-line strip, and documented all of it in drill-deck-review.org.
Diffstat (limited to '.ai/scripts/tests')
-rw-r--r--.ai/scripts/tests/test_drill_deck_stats.py74
-rw-r--r--.ai/scripts/tests/test_drill_to_anki.py5
2 files changed, 79 insertions, 0 deletions
diff --git a/.ai/scripts/tests/test_drill_deck_stats.py b/.ai/scripts/tests/test_drill_deck_stats.py
index 80b9913..d60084d 100644
--- a/.ai/scripts/tests/test_drill_deck_stats.py
+++ b/.ai/scripts/tests/test_drill_deck_stats.py
@@ -303,3 +303,77 @@ def test_cli_non_blocking_notes_keep_exit_zero(tmp_path):
r = _run(f)
assert r.returncode == 0
assert "NOTE" in r.stdout
+
+
+# --- leakage refinements: source-line strip + numeric carve-out ---
+
+def test_prose_body_strips_source_and_created_lines(stats):
+ body = "The real answer here.\nCreated: 2026-05-30\nSource: AHA — https://heart.org/x"
+ assert stats.prose_body(body) == "The real answer here."
+
+
+def test_has_distinct_numeric_recall_true_for_range_card(stats):
+ assert stats.has_distinct_numeric_recall(
+ "What are the HbA1c ranges across normal, prediabetes, and diabetes?",
+ "Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%.",
+ ) is True
+
+
+def test_has_distinct_numeric_recall_false_without_numbers(stats):
+ assert stats.has_distinct_numeric_recall("What is LEO?", "Low Earth Orbit.") is False
+
+
+def test_is_leaky_false_when_overlap_is_only_in_the_source_line(stats):
+ heading = "What blood pressure constitutes a hypertensive crisis?"
+ body = ("A reading at or above 180/120.\n"
+ "Source: AHA — https://heart.org/high-blood-pressure/hypertensive-crisis")
+ assert stats.is_leaky(heading, body) is False
+
+
+def test_is_leaky_false_for_numeric_range_card(stats):
+ heading = "What are the HbA1c ranges across normal, prediabetes, and diabetes?"
+ body = "HbA1c ranges. Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%."
+ assert stats.is_leaky(heading, body) is False
+
+
+def test_is_leaky_true_for_genuine_restatement(stats):
+ heading = "primary orbital regimes satellites classification"
+ body = "The primary orbital regimes satellites classification scheme."
+ assert stats.is_leaky(heading, body) is True
+
+
+SOURCE_LINE_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** What blood pressure constitutes a hypertensive crisis? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+A reading at or above 180/120.
+
+Source: AHA — https://heart.org/high-blood-pressure/hypertensive-crisis-blood-pressure
+"""
+
+RANGE_CARD_DECK = """#+TITLE: Test Flashcards
+
+* Section
+** What are the HbA1c ranges across normal, prediabetes, and diabetes? :drill:
+:PROPERTIES:
+:ID: c1
+:END:
+HbA1c ranges. Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%.
+"""
+
+
+def test_cli_source_line_overlap_is_not_flagged(tmp_path):
+ f = tmp_path / "source.org"
+ f.write_text(SOURCE_LINE_DECK)
+ r = _run(f)
+ assert r.returncode == 0
+
+
+def test_cli_numeric_range_card_is_not_flagged(tmp_path):
+ f = tmp_path / "range.org"
+ f.write_text(RANGE_CARD_DECK)
+ r = _run(f)
+ assert r.returncode == 0
diff --git a/.ai/scripts/tests/test_drill_to_anki.py b/.ai/scripts/tests/test_drill_to_anki.py
index 6c5ef9b..fc17817 100644
--- a/.ai/scripts/tests/test_drill_to_anki.py
+++ b/.ai/scripts/tests/test_drill_to_anki.py
@@ -121,6 +121,11 @@ def test_strip_org_metadata_unclosed_drawer_swallows_the_rest(drill):
assert drill.strip_org_metadata(body) == []
+def test_strip_org_metadata_drops_created_date_line(drill):
+ # A created/added date never belongs on a card back.
+ assert drill.strip_org_metadata(["Created: 2026-05-30", "real answer"]) == ["real answer"]
+
+
# --- parse (pure, core parser) ---
SECTIONED = """* Orbital Regimes