aboutsummaryrefslogtreecommitdiff
path: root/.ai/scripts/drill-deck-stats.py
blob: 72d1cdeed08a6e1ae5cad7ed6bf798a27deca333 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python3
"""Inventory + workflow-violation warnings for an org-drill deck source file.

Reports counts and flags violations:
- Total cards (depth-2 `:drill:` headings)
- PROPERTIES drawer count (should match card count)
- `*** Answer` sub-header count (should be 0 per drill-deck-review.org)
- Cards missing :ID: (loses identity across versions, risks SRS-state loss)
- Cards whose heading lacks `?` (likely a topic-as-heading not yet rewritten)

Exits 0 when clean, 1 when any warnings are present. Use as a gate before
regenerating the Anki deck or running drill-deck-sync.

Usage:
  drill-deck-stats.py <file.org>
"""
from __future__ import annotations

import re
import sys
from pathlib import Path

CARD_RE = re.compile(r"^\*\*\s+(.+?)\s+:drill:\s*$")
ANSWER_RE = re.compile(r"^\*\*\*\s+Answer\b")
PROP_START_RE = re.compile(r"^\s*:PROPERTIES:\s*$")
PROP_END_RE = re.compile(r"^\s*:END:\s*$")
ID_RE = re.compile(r"^\s*:ID:\s+(\S+)\s*$")
TITLE_RE = re.compile(r"^#\+TITLE:\s*(.+?)\s*$", re.IGNORECASE)
SOURCE_TOOL_RE = re.compile(r"\borg[-\s]?drill\b", re.IGNORECASE)

# A heading qualifies as "prompt form" if it contains `?` or starts with
# one of these imperative verbs (directive prompts like "Spell these out"
# and "Introduce yourself" are valid even without `?`).
IMPERATIVE_VERBS = frozenset({
    "spell", "describe", "explain", "name", "list", "give",
    "show", "tell", "define", "compare", "identify", "outline",
    "introduce", "walk", "state", "recite", "recall", "summarize",
})


def is_prompt_form(heading: str) -> bool:
    """True if the heading reads as a question or imperative prompt."""
    if "?" in heading:
        return True
    first_word = heading.split(None, 1)[0].lower().rstrip(":,;")
    return first_word in IMPERATIVE_VERBS


def main() -> int:
    if len(sys.argv) != 2:
        print(f"usage: {sys.argv[0]} <file.org>", file=sys.stderr)
        return 2

    path = Path(sys.argv[1]).expanduser().resolve()
    if not path.is_file():
        print(f"error: {path} not found", file=sys.stderr)
        return 2

    lines = path.read_text(encoding="utf-8").splitlines()

    title: str | None = None
    for line in lines[:20]:
        m = TITLE_RE.match(line)
        if m:
            title = m.group(1).strip()
            break

    cards: list[tuple[str, bool, bool]] = []  # (heading, has_id, has_answer_subheader)
    answer_count = 0
    prop_count = 0

    i = 0
    while i < len(lines):
        m = CARD_RE.match(lines[i])
        if m:
            heading = m.group(1).strip()
            i += 1
            has_id = False
            has_answer = False
            in_drawer = False
            while i < len(lines):
                line = lines[i]
                if line.startswith("* ") or CARD_RE.match(line):
                    break
                if PROP_START_RE.match(line):
                    prop_count += 1
                    in_drawer = True
                elif in_drawer and PROP_END_RE.match(line):
                    in_drawer = False
                elif in_drawer and ID_RE.match(line):
                    has_id = True
                elif ANSWER_RE.match(line):
                    answer_count += 1
                    has_answer = True
                i += 1
            cards.append((heading, has_id, has_answer))
            continue
        i += 1

    not_prompt = [h for h, _, _ in cards if not is_prompt_form(h)]
    no_id = [h for h, has_id, _ in cards if not has_id]

    print(f"{path.name} — drill deck stats")
    print()
    title_display = title if title else "(no #+TITLE)"
    print(f"Deck title: {title_display}")
    print(f"Cards: {len(cards)}")
    drawer_status = "match" if prop_count == len(cards) else f"mismatch (expected {len(cards)})"
    print(f"PROPERTIES drawers: {prop_count} ({drawer_status})")
    answer_status = "clean" if answer_count == 0 else "workflow violation"
    print(f"*** Answer sub-headers: {answer_count} ({answer_status})")
    print(f"Cards missing :ID:: {len(no_id)}")
    print(f"Cards with non-prompt heading: {len(not_prompt)}")
    print()

    warnings = 0
    if title is None:
        warnings += 1
        print("WARN: no #+TITLE: line found; deck name will fall back to the file basename")
    elif SOURCE_TOOL_RE.search(title):
        warnings += 1
        print(f"WARN: #+TITLE contains source-tool jargon ('{title}'); the deck name shows in Anki — drop 'Org-Drill' for a name that reads well on the consumption side")
    if answer_count:
        warnings += 1
        print(f"WARN: {answer_count} cards have *** Answer sub-headers (drop per drill-deck-review.org)")
    if prop_count != len(cards):
        warnings += 1
        print(f"WARN: PROPERTIES count {prop_count} does not match card count {len(cards)}")
    if no_id:
        warnings += 1
        print(f"WARN: {len(no_id)} cards missing :ID:; losing identity risks SRS-state loss across rewrites")
        for h in no_id[:5]:
            print(f"      - {h}")
        if len(no_id) > 5:
            print(f"      - ... and {len(no_id) - 5} more")
    if not_prompt:
        warnings += 1
        print(f"WARN: {len(not_prompt)} cards have non-prompt headings (no '?' and no imperative-verb start); likely topic-as-heading not yet rewritten")
        for h in not_prompt[:5]:
            print(f"      - {h}")
        if len(not_prompt) > 5:
            print(f"      - ... and {len(not_prompt) - 5} more")

    if warnings == 0:
        print("clean")
        return 0
    return 1


if __name__ == "__main__":
    raise SystemExit(main())