.ai/scripts/tests/test_flashcard_stats.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379

"""Tests for flashcard-stats.py: prompt-form heuristic + CLI inventory/gate.

Plain python3 script (no third-party deps), so the pure helper imports directly;
the inventory/gate behavior is exercised through the CLI.
"""
from __future__ import annotations

import importlib.util
import subprocess
import sys
from pathlib import Path

import pytest

SCRIPT = Path(__file__).resolve().parents[1] / "flashcard-stats.py"


@pytest.fixture(scope="module")
def stats():
    spec = importlib.util.spec_from_file_location("flashcard_stats", SCRIPT)
    assert spec and spec.loader
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    return module


# --- is_prompt_form (pure) ---

def test_is_prompt_form_question_mark(stats):
    assert stats.is_prompt_form("What is DeepSat?") is True


def test_is_prompt_form_imperative_verb(stats):
    assert stats.is_prompt_form("Spell out the orbital regimes") is True


def test_is_prompt_form_imperative_is_case_insensitive(stats):
    assert stats.is_prompt_form("introduce yourself") is True


def test_is_prompt_form_topic_heading_is_not_a_prompt(stats):
    assert stats.is_prompt_form("DeepSat") is False


def test_is_prompt_form_strips_trailing_punctuation_off_first_word(stats):
    assert stats.is_prompt_form("List: the founders") is True


# --- CLI inventory + gate (integration) ---

CLEAN_DECK = """#+TITLE: DeepSat Flashcards

* Section
** What is DeepSat? :drill:
:PROPERTIES:
:ID: card-1
:END:
A satellite company.
"""

DIRTY_DECK = """#+TITLE: DeepSat Org-Drill Flashcards

* Section
** DeepSat :drill:
*** Answer
A satellite company.
"""


def _run(path):
    return subprocess.run(
        [sys.executable, str(SCRIPT), str(path)],
        capture_output=True, text=True,
    )


def test_cli_clean_deck_exits_zero(tmp_path):
    f = tmp_path / "clean.org"
    f.write_text(CLEAN_DECK)
    r = _run(f)
    assert r.returncode == 0
    assert "clean" in r.stdout


def test_cli_dirty_deck_warns_and_exits_one(tmp_path):
    f = tmp_path / "dirty.org"
    f.write_text(DIRTY_DECK)
    r = _run(f)
    assert r.returncode == 1
    assert "WARN" in r.stdout
    assert "org-drill" in r.stdout.lower()  # title-jargon audit fired


def test_cli_missing_file_exits_two(tmp_path):
    r = _run(tmp_path / "nope.org")
    assert r.returncode == 2


NO_TITLE_DECK = """* Section
** What is DeepSat? :drill:
:PROPERTIES:
:ID: card-1
:END:
A satellite company.
"""

# Two cards, only one PROPERTIES drawer.
PROP_MISMATCH_DECK = """#+TITLE: DeepSat Flashcards

* Section
** What is DeepSat? :drill:
A satellite company.
** Who founded it? :drill:
:PROPERTIES:
:ID: card-2
:END:
The team.
"""


def test_cli_missing_title_warns_and_exits_one(tmp_path):
    f = tmp_path / "notitle.org"
    f.write_text(NO_TITLE_DECK)
    r = _run(f)
    assert r.returncode == 1
    assert "no #+TITLE" in r.stdout


def test_cli_properties_count_mismatch_warns_and_exits_one(tmp_path):
    f = tmp_path / "mismatch.org"
    f.write_text(PROP_MISMATCH_DECK)
    r = _run(f)
    assert r.returncode == 1
    assert "does not match card count" in r.stdout


# --- content_words / leakage_ratio (pure) ---

def test_content_words_drops_stopwords_and_short_tokens(stats):
    assert stats.content_words("What is the LEO regime?") == {"leo", "regime"}


def test_leakage_ratio_high_when_answer_restates_question(stats):
    ratio = stats.leakage_ratio(
        "primary orbital regimes satellites",
        "the primary orbital regimes for satellites are listed",
    )
    assert ratio == 1.0


def test_leakage_ratio_zero_for_short_question(stats):
    # "LEO" is the only content word, below LEAKAGE_MIN_WORDS, so overlap is noise.
    assert stats.leakage_ratio("What is LEO?", "LEO means low earth orbit") == 0.0


# --- normalize_heading (pure) ---

def test_normalize_heading_lowercases_and_strips_punctuation(stats):
    assert stats.normalize_heading("  What is L.E.O.? ") == "what is l e o"


def test_normalize_heading_collisions_match(stats):
    assert stats.normalize_heading("What is LEO?") == stats.normalize_heading("what is leo")


# --- is_binary_prompt (pure) ---

def test_is_binary_prompt_true_for_yes_no_lead(stats):
    assert stats.is_binary_prompt("Is LEO below GEO?") is True


def test_is_binary_prompt_true_for_a_or_b(stats):
    assert stats.is_binary_prompt("Is it LEO or GEO?") is True


def test_is_binary_prompt_false_for_open_question(stats):
    assert stats.is_binary_prompt("What distinguishes LEO from GEO?") is False


# --- back_word_count / is_list_back (pure) ---

def test_back_word_count(stats):
    assert stats.back_word_count("one two three") == 3
    assert stats.back_word_count("") == 0


def test_is_list_back_true_for_bulleted_body(stats):
    assert stats.is_list_back("- LEO\n- MEO\n- GEO") is True


def test_is_list_back_false_for_prose(stats):
    assert stats.is_list_back("Low Earth Orbit.\nThe closest regime.") is False


def test_is_list_back_false_for_single_bullet(stats):
    assert stats.is_list_back("- only one bullet\nplain prose line") is False


# --- parse_cards (pure) ---

def test_parse_cards_captures_body_without_drawer_planning_or_answer_header(stats):
    text = (
        "* Sec\n"
        "** Q one? :drill:\n"
        ":PROPERTIES:\n:ID: id-1\n:END:\n"
        "SCHEDULED: <2026-05-20 Wed>\n"
        "*** Answer\n"
        "the real answer\n"
    )
    cards, prop_count = stats.parse_cards(text.splitlines())
    assert prop_count == 1
    assert len(cards) == 1
    c = cards[0]
    assert c["heading"] == "Q one?"
    assert c["has_id"] is True
    assert c["has_answer"] is True
    assert c["body"] == "the real answer"


def test_find_duplicate_fronts_matches_normalized_headings(stats):
    cards = [
        {"heading": "What is LEO?"},
        {"heading": "what is leo?"},
        {"heading": "What is GEO?"},
    ]
    dups = stats.find_duplicate_fronts(cards)
    assert len(dups) == 1
    assert dups[0] == ("What is LEO?", "what is leo?")


# --- CLI: new blocking checks ---

LEAKY_DECK = """#+TITLE: Test Flashcards

* Section
** What are the primary orbital regimes for satellites? :drill:
:PROPERTIES:
:ID: c1
:END:
The primary orbital regimes for satellites are listed here.
"""

DUP_FRONT_DECK = """#+TITLE: Test Flashcards

* Section
** What is LEO? :drill:
:PROPERTIES:
:ID: c1
:END:
Low Earth Orbit.
** What is LEO? :drill:
:PROPERTIES:
:ID: c2
:END:
Low Earth Orbit, restated.
"""


def test_cli_answer_leakage_warns_and_exits_one(tmp_path):
    f = tmp_path / "leaky.org"
    f.write_text(LEAKY_DECK)
    r = _run(f)
    assert r.returncode == 1
    assert "leak" in r.stdout.lower()


def test_cli_duplicate_front_warns_and_exits_one(tmp_path):
    f = tmp_path / "dup.org"
    f.write_text(DUP_FRONT_DECK)
    r = _run(f)
    assert r.returncode == 1
    assert "duplicate" in r.stdout.lower()


# --- CLI: non-blocking NOTEs keep exit 0 ---

NOTES_DECK = """#+TITLE: Test Flashcards

* Section
** Is LEO closer than GEO? :drill:
:PROPERTIES:
:ID: c1
:END:
Yes, much closer.
** What orbital regimes exist? :drill:
:PROPERTIES:
:ID: c2
:END:
- LEO
- MEO
- GEO
** Describe the platform elements in full :drill:
:PROPERTIES:
:ID: c3
:END:
The platform carries power generation, propulsion, attitude control, thermal regulation, and radio hardware arranged around a central frame. Each element draws from shared resources and must survive launch loads, vacuum, and radiation. Engineers trade mass against capability when every kilogram raises cost, so redundancy is added only where a single failure would end the mission entirely and cheaper options cannot cover the same risk.
"""


def test_cli_non_blocking_notes_keep_exit_zero(tmp_path):
    f = tmp_path / "notes.org"
    f.write_text(NOTES_DECK)
    r = _run(f)
    assert r.returncode == 0
    assert "NOTE" in r.stdout


# --- leakage refinements: source-line strip + numeric carve-out ---

def test_prose_body_strips_source_and_created_lines(stats):
    body = "The real answer here.\nCreated: 2026-05-30\nSource: AHA — https://heart.org/x"
    assert stats.prose_body(body) == "The real answer here."


def test_has_distinct_numeric_recall_true_for_range_card(stats):
    assert stats.has_distinct_numeric_recall(
        "What are the HbA1c ranges across normal, prediabetes, and diabetes?",
        "Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%.",
    ) is True


def test_has_distinct_numeric_recall_false_without_numbers(stats):
    assert stats.has_distinct_numeric_recall("What is LEO?", "Low Earth Orbit.") is False


def test_is_leaky_false_when_overlap_is_only_in_the_source_line(stats):
    heading = "What blood pressure constitutes a hypertensive crisis?"
    body = ("A reading at or above 180/120.\n"
            "Source: AHA — https://heart.org/high-blood-pressure/hypertensive-crisis")
    assert stats.is_leaky(heading, body) is False


def test_is_leaky_false_for_numeric_range_card(stats):
    heading = "What are the HbA1c ranges across normal, prediabetes, and diabetes?"
    body = "HbA1c ranges. Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%."
    assert stats.is_leaky(heading, body) is False


def test_is_leaky_true_for_genuine_restatement(stats):
    heading = "primary orbital regimes satellites classification"
    body = "The primary orbital regimes satellites classification scheme."
    assert stats.is_leaky(heading, body) is True


SOURCE_LINE_DECK = """#+TITLE: Test Flashcards

* Section
** What blood pressure constitutes a hypertensive crisis? :drill:
:PROPERTIES:
:ID: c1
:END:
A reading at or above 180/120.

Source: AHA — https://heart.org/high-blood-pressure/hypertensive-crisis-blood-pressure
"""

RANGE_CARD_DECK = """#+TITLE: Test Flashcards

* Section
** What are the HbA1c ranges across normal, prediabetes, and diabetes? :drill:
:PROPERTIES:
:ID: c1
:END:
HbA1c ranges. Normal: <5.7%. Prediabetes: 5.7-6.4%. Diabetes: >=6.5%.
"""


def test_cli_source_line_overlap_is_not_flagged(tmp_path):
    f = tmp_path / "source.org"
    f.write_text(SOURCE_LINE_DECK)
    r = _run(f)
    assert r.returncode == 0


def test_cli_numeric_range_card_is_not_flagged(tmp_path):
    f = tmp_path / "range.org"
    f.write_text(RANGE_CARD_DECK)
    r = _run(f)
    assert r.returncode == 0