aboutsummaryrefslogtreecommitdiff
path: root/.ai/scripts/flashcard-to-anki.py
blob: 7227683f0086036e6f97886c9050f0684b768738 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.11"
# dependencies = [
#   "genanki>=0.13",
# ]
# ///
"""Convert an org-drill file into an Anki .apkg deck.

Parses org-drill structure:
  - Top-level "* Section" headings become tags on every card under them.
  - Each "** Card name :drill:" entry becomes a card. Front = heading
    text (sans :drill: tag). Back = entry body with newlines converted
    to <br>.

Deck name defaults to the input basename, case preserved. Deck and model
IDs are derived from the deck name via stable hash so re-importing the
same deck updates existing cards instead of duplicating them.

Output defaults to ~/sync/phone/anki/<input-basename>.apkg. The .apkg is
a mobile-Anki artifact the phone picks up from its sync dir, so it lands
there rather than next to the org source.

Usage:
  flashcard-to-anki.py <input.org>
  flashcard-to-anki.py <input.org> --deck "My Deck Name"
  flashcard-to-anki.py <input.org> --output /path/to/deck.apkg

Requires genanki, which uv resolves automatically via the PEP 723
script metadata above. No venv or system install needed.
"""
from __future__ import annotations

import argparse
import hashlib
import re
import sys
from pathlib import Path

import genanki

# 32-bit integer space genanki accepts. Start above the conventional
# "user model" floor so collisions with hand-written decks stay
# unlikely.
ID_BASE = 1_500_000_000
ID_RANGE = 500_000_000


def stable_id(name: str, salt: str) -> int:
    """Derive a deterministic 32-bit id from `name` and a `salt`.

    Same (name, salt) pair always returns the same id, so re-running
    against the same source produces a stable deck/model id pair and
    Anki imports update existing cards in place rather than duplicating.
    """
    h = hashlib.sha256(f"{salt}:{name}".encode()).hexdigest()
    return ID_BASE + (int(h[:8], 16) % ID_RANGE)


def make_model(deck_name: str) -> genanki.Model:
    return genanki.Model(
        stable_id(deck_name, "model"),
        f"{deck_name} (Craig)",
        fields=[{"name": "Front"}, {"name": "Back"}],
        templates=[
            {
                "name": "Card 1",
                "qfmt": "{{Front}}",
                "afmt": '{{FrontSide}}<hr id="answer">{{Back}}',
            }
        ],
        css=(
            ".card { font-family: sans-serif; font-size: 18px; "
            "color: #222; background: #fafafa; line-height: 1.45; }\n"
            "hr#answer { margin: 14px 0; }\n"
        ),
    )


def section_to_tag(title: str) -> str:
    return re.sub(r"[^a-z0-9]+", "-", title.lower()).strip("-")


def escape_html(s: str) -> str:
    return (
        s.replace("&", "&amp;")
        .replace("<", "&lt;")
        .replace(">", "&gt;")
    )


def strip_org_metadata(body_lines: list[str]) -> list[str]:
    """Drop :PROPERTIES: drawers, planning lines, and created-date lines.

    Org-drill needs these in the source file (SRS state lives in the
    PROPERTIES drawer; SCHEDULED carries the next-review date), but they
    are noise on the back of an Anki card. A created/added date never
    belongs on a card, so a stray "Created:" or ":CREATED:" body line is
    dropped too.
    """
    cleaned: list[str] = []
    in_drawer = False
    planning_re = re.compile(r"^\s*(SCHEDULED|DEADLINE|CLOSED):\s")
    created_re = re.compile(r"^\s*:?created:?\s", re.IGNORECASE)
    drawer_start_re = re.compile(r"^\s*:PROPERTIES:\s*$")
    drawer_end_re = re.compile(r"^\s*:END:\s*$")
    for line in body_lines:
        if in_drawer:
            if drawer_end_re.match(line):
                in_drawer = False
            continue
        if drawer_start_re.match(line):
            in_drawer = True
            continue
        if planning_re.match(line) or created_re.match(line):
            continue
        cleaned.append(line)
    return cleaned


def parse(org_text: str) -> list[tuple[str, str, str]]:
    """Return [(front, back_html, tag), ...] for every :drill: card."""
    cards: list[tuple[str, str, str]] = []
    current_section: str | None = None

    section_re = re.compile(r"^\*\s+(.+?)\s*$")
    card_re = re.compile(r"^\*\*\s+(.+?)\s+:drill:\s*$")

    lines = org_text.splitlines()
    i = 0
    while i < len(lines):
        line = lines[i]

        sec = section_re.match(line)
        if sec:
            current_section = sec.group(1).strip()
            i += 1
            continue

        card = card_re.match(line)
        if card:
            front = card.group(1).strip()
            body_lines: list[str] = []
            i += 1
            while i < len(lines):
                nxt = lines[i]
                if nxt.startswith("* ") or card_re.match(nxt):
                    break
                body_lines.append(nxt)
                i += 1
            body_lines = strip_org_metadata(body_lines)
            while body_lines and not body_lines[0].strip():
                body_lines.pop(0)
            while body_lines and not body_lines[-1].strip():
                body_lines.pop()
            back_html = "<br>".join(escape_html(ln) for ln in body_lines)
            tag = section_to_tag(current_section) if current_section else "drill"
            cards.append((front, back_html, tag))
            continue

        i += 1

    return cards


def build(cards: list[tuple[str, str, str]], deck_name: str) -> genanki.Deck:
    deck = genanki.Deck(stable_id(deck_name, "deck"), deck_name)
    model = make_model(deck_name)
    for front, back, tag in cards:
        note = genanki.Note(
            model=model,
            fields=[front, back],
            tags=[tag],
            guid=genanki.guid_for(front),
        )
        deck.add_note(note)
    return deck


def default_deck_name(input_path: Path) -> str:
    return input_path.stem


def default_output_path(input_path: Path) -> Path:
    anki_dir = Path.home() / "sync" / "phone" / "anki"
    return anki_dir / f"{input_path.stem}.apkg"


def main() -> int:
    parser = argparse.ArgumentParser(
        description="Convert an org-drill file into an Anki .apkg deck.",
    )
    parser.add_argument(
        "input",
        type=Path,
        help="Path to the org-drill source file.",
    )
    parser.add_argument(
        "--deck",
        help="Deck name. Defaults to the input basename.",
    )
    parser.add_argument(
        "--output",
        type=Path,
        help="Output .apkg path. Defaults to "
             "~/sync/phone/anki/<input-basename>.apkg.",
    )
    args = parser.parse_args()

    input_path: Path = args.input.expanduser().resolve()
    if not input_path.is_file():
        print(f"error: {input_path} not found", file=sys.stderr)
        return 1

    org_text = input_path.read_text(encoding="utf-8")
    deck_name = args.deck or default_deck_name(input_path)
    output_path: Path = (args.output or default_output_path(input_path)).expanduser().resolve()
    output_path.parent.mkdir(parents=True, exist_ok=True)

    cards = parse(org_text)
    if not cards:
        print(f"error: no :drill: cards found in {input_path}", file=sys.stderr)
        return 1

    deck = build(cards, deck_name)
    genanki.Package(deck).write_to_file(str(output_path))
    print(f"wrote {output_path} ({len(cards)} cards, deck '{deck_name}')")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())