aboutsummaryrefslogtreecommitdiff
path: root/.ai/scripts/spec-sort
blob: ebfef821c20d0d09bf8016919a7612902d7ff780 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
#!/usr/bin/env python3
"""spec-sort — one-time docs-pile retrofit for the docs-lifecycle convention.

Classifies every docs/**/*.org outside docs/specs/ by one predicate: a doc
carrying BOTH a "Decisions" heading AND an "Implementation phases" heading is
a spec candidate; everything else is a note. For each candidate it shows an
evidence panel (Status field, decision/finding cookies, the linking todo.org
task, recent dated history, cheap existence checks on phase-named artifacts)
and proposes a lifecycle keyword the evidence supports — conservative
non-terminal (DRAFT) when inconclusive. The helper proposes; a human confirms
every move.

Dry-run report is the default. --apply executes under the fail-safe contract:

  - Clean-worktree preflight: refuses on a dirty git tree (exit 2) unless
    --allow-dirty, which prints exactly what recovery loses.
  - Every candidate must be addressed with --confirm REL=KEYWORD or
    --skip REL; terminal keywords (IMPLEMENTED SUPERSEDED CANCELLED) also
    need --reason REL=TEXT, recorded in the status-history line.
  - The full move + relink plan is computed and validated first (every
    destination free, every link resolvable), written to a plan file, and
    only then executed from that recorded plan.
  - Bare-path mentions of a moving doc inside the rewritten roots are
    reported, never rewritten; they block --apply until --acknowledge-bare
    explicitly waives them.
  - Mid-apply failure stops the run, names what was and wasn't applied, and
    prints the git-restore recovery recipe (plus deletion of newly created
    destination copies, which git restore can't remove).
  - After a successful apply, a residue scan across the rewritten roots must
    find no link still resolving to an old path, or spec-sort exits non-zero
    naming the residue.

Per move: rename to carry the -spec.org suffix, prepend the status heading
(:ID: UUID + dated history line), rewrite the keyword header to the
two-sequence form, mirror the keyword into the Metadata Status field, and
recompute every affected file: link (inbound links to the moved doc AND the
moved doc's own outbound relative links). Rewritten roots: todo.org,
.ai/notes.org, docs/**, .ai/project-workflows/, .ai/project-scripts/.
Reported-never-rewritten: .ai/sessions/ (frozen history) and synced template
paths (.ai/workflows/, .ai/scripts/, .ai/protocols.org — the report names
the canonical claude-templates file instead).

Finally stamps :LAST_SPEC_SORT: YYYY-MM-DD in .ai/notes.org's
* Workflow State section (created idempotently), which permanently clears
the startup nudge. A run with zero candidates still stamps.

Exit codes: 0 done (or clean report), 1 blocked (confirm gate, validation,
bare mentions, residue, mid-apply failure), 2 usage / preflight refusal.

Test hook: SPEC_SORT_INJECT_FAIL_AFTER=N aborts the apply after N write
operations, exercising the recovery path in the bats suite.
"""

import argparse
import json
import os
import re
import subprocess
import sys
import tempfile
import uuid
from datetime import datetime

LIFECYCLE = ("DRAFT", "READY", "DOING", "IMPLEMENTED", "SUPERSEDED", "CANCELLED")
TERMINAL = {"IMPLEMENTED", "SUPERSEDED", "CANCELLED"}
TODO_HEADER = [
    "#+TODO: TODO | DONE",
    "#+TODO: DRAFT READY DOING | IMPLEMENTED SUPERSEDED CANCELLED",
]

# Project-owned surfaces whose file: links get rewritten.
REWRITE_ROOTS = ("todo.org", ".ai/notes.org", "docs", ".ai/project-workflows", ".ai/project-scripts")
# Frozen or synced surfaces: occurrences are reported, never rewritten.
REPORT_ROOTS = (".ai/sessions", ".ai/workflows", ".ai/scripts", ".ai/protocols.org")
# Synced template paths map to their canonical rulesets file for the report.
SYNCED_PREFIX = (".ai/workflows", ".ai/scripts", ".ai/protocols.org")

LINK_RE = re.compile(r"\[\[file:([^\]\[]+)\](?:\[([^\]\[]*)\])?\]")
HEADING_RE = re.compile(r"^(\*+)\s+(.*)$")
COOKIE_RE = re.compile(r"\[\d+/\d+\]")
DATED_RE = re.compile(r"\b\d{4}-\d{2}-\d{2}\b")


def read_text(path):
    try:
        with open(path, encoding="utf-8") as f:
            return f.read()
    except (UnicodeDecodeError, OSError):
        return None


def heading_text(line):
    """Heading text with the org keyword and priority cookie stripped."""
    m = HEADING_RE.match(line)
    if not m:
        return None
    text = re.sub(r"^[A-Z]+\s+", "", m.group(2))
    text = re.sub(r"^\[#[A-Z]\]\s+", "", text)
    return text.strip()


def has_spine(content):
    """The classification predicate: Decisions AND Implementation phases."""
    dec = imp = False
    for line in content.splitlines():
        t = heading_text(line)
        if t is None:
            continue
        tl = t.lower()
        if tl.startswith("decisions"):
            dec = True
        elif tl.startswith("implementation phases"):
            imp = True
    return dec and imp


def walk_files(root, rel_base):
    """Yield project-relative paths of files under rel_base (file or dir)."""
    abs_base = os.path.join(root, rel_base)
    if os.path.isfile(abs_base):
        yield rel_base
        return
    for dirpath, dirs, files in os.walk(abs_base):
        dirs.sort()
        for name in sorted(files):
            yield os.path.relpath(os.path.join(dirpath, name), root)


def classify(root):
    """Split docs/**/*.org outside docs/specs/ into candidates / anomalies / notes."""
    candidates, anomalies, notes = [], [], []
    docs = os.path.join(root, "docs")
    if not os.path.isdir(docs):
        return candidates, anomalies, notes
    for rel in walk_files(root, "docs"):
        if not rel.endswith(".org"):
            continue
        parts = rel.split(os.sep)
        if len(parts) > 1 and parts[1] == "specs":
            continue
        content = read_text(os.path.join(root, rel))
        if content is None:
            continue
        if has_spine(content):
            candidates.append(rel)
        elif os.path.basename(rel).endswith("-spec.org"):
            anomalies.append(rel)
        else:
            notes.append(rel)
    return candidates, anomalies, notes


def dest_for(rel):
    base = os.path.basename(rel)
    if not base.endswith("-spec.org"):
        base = base[: -len(".org")] + "-spec.org"
    return os.path.join("docs", "specs", base)


# ---- Evidence panel ---------------------------------------------------


def todo_task_for(root, rel):
    """Heading of the first todo.org task whose subtree mentions the doc."""
    content = read_text(os.path.join(root, "todo.org"))
    if content is None:
        return None
    lines = content.splitlines()
    basename = os.path.basename(rel)
    for i, line in enumerate(lines):
        if basename in line or rel in line:
            for j in range(i, -1, -1):
                if HEADING_RE.match(lines[j]):
                    return lines[j].lstrip("* ").strip()
            return None
    return None


def gather_evidence(root, rel, content):
    ev = {}
    m = re.search(r"^\|\s*Status\s*\|\s*([^|]*)\|", content, re.MULTILINE | re.IGNORECASE)
    ev["status"] = m.group(1).strip() if m else None

    cookies = []
    for line in content.splitlines():
        t = heading_text(line)
        if t and COOKIE_RE.search(t) and (
            t.lower().startswith("decisions") or t.lower().startswith("review findings")
        ):
            cookies.append(t)
    ev["cookies"] = cookies

    ev["todo"] = todo_task_for(root, rel)
    kw = None
    if ev["todo"]:
        m = re.match(r"([A-Z]+)\s", ev["todo"])
        kw = m.group(1) if m else None
    ev["todo_keyword"] = kw

    dated = [ln.strip() for ln in content.splitlines() if DATED_RE.search(ln)]
    ev["history"] = dated[-1][:100] if dated else None

    # Cheap artifact check: =path= tokens inside the Implementation phases section.
    artifacts, exists = [], 0
    section = re.split(r"^\*+\s+.*implementation phases.*$", content, maxsplit=1, flags=re.MULTILINE | re.IGNORECASE)
    if len(section) > 1:
        for tok in re.findall(r"=([^=\s]+)=", section[1]):
            if "/" in tok:
                artifacts.append(tok)
                if os.path.exists(os.path.join(root, tok)):
                    exists += 1
    ev["artifacts"] = (exists, artifacts)
    return ev


def propose_keyword(ev):
    s = (ev["status"] or "").lower()
    words = set(re.findall(r"[a-z]+", s))
    if words & {"implemented", "shipped", "complete", "completed", "done"}:
        return "IMPLEMENTED"
    if words & {"superseded"}:
        return "SUPERSEDED"
    if words & {"cancelled", "canceled", "dead", "abandoned"}:
        return "CANCELLED"
    if words & {"doing", "implementing"} or "in progress" in s or "in-progress" in s:
        return "DOING"
    if ev["todo_keyword"] == "DOING":
        return "DOING"
    if words & {"ready", "approved", "accepted"}:
        return "READY"
    return "DRAFT"  # conservative non-terminal default


# ---- Link scanning ----------------------------------------------------


def rewrite_files(root):
    """Project-relative *.org files under the rewritten roots."""
    seen = []
    for base in REWRITE_ROOTS:
        if not os.path.exists(os.path.join(root, base)):
            continue
        for rel in walk_files(root, base):
            if rel.endswith(".org") and rel not in seen:
                seen.append(rel)
    return seen


def resolve_target(root, linker_rel, raw_target, moved):
    """Resolve a file: link target to a project-relative path (org semantics
    first — relative to the linking file's directory — then project-root
    anchoring as a fallback for root-anchored links)."""
    if raw_target.startswith(("/", "~", "http:", "https:")):
        return None
    rel_a = os.path.normpath(os.path.join(os.path.dirname(linker_rel), raw_target))
    if rel_a in moved or os.path.exists(os.path.join(root, rel_a)):
        return rel_a
    rel_b = os.path.normpath(raw_target)
    if rel_b in moved or os.path.exists(os.path.join(root, rel_b)):
        return rel_b
    return rel_a


def plan_link_edits(root, moved):
    """Compute every link rewrite: inbound links to moved docs and moved
    docs' own outbound relative links. Returns ({linker_rel: [(old, new)]},
    [ambiguity descriptions]) — a link whose file-relative and root-anchored
    readings are both live and disagree about a moving doc blocks validation
    rather than being rewritten against a guess."""
    edits = {}
    ambiguous = []
    for linker in rewrite_files(root):
        content = read_text(os.path.join(root, linker))
        if content is None:
            continue
        linker_post = moved.get(linker, linker)
        for m in LINK_RE.finditer(content):
            raw = m.group(1)
            desc = m.group(2)
            target_path, sep, anchor = raw.partition("::")
            target = resolve_target(root, linker, target_path, moved)
            if target is None:
                continue
            rel_a = os.path.normpath(os.path.join(os.path.dirname(linker), target_path))
            rel_b = os.path.normpath(target_path)
            if rel_a != rel_b:
                live_a = rel_a in moved or os.path.exists(os.path.join(root, rel_a))
                live_b = rel_b in moved or os.path.exists(os.path.join(root, rel_b))
                if live_a and live_b and (rel_a in moved or rel_b in moved):
                    ambiguous.append(
                        "%s: [[file:%s]] reads as %s (file-relative) or %s (root-anchored) "
                        "and a moving doc is involved — resolve the link by hand" % (linker, raw, rel_a, rel_b))
                    continue
            if target not in moved and linker not in moved:
                continue
            if target not in moved and not os.path.exists(os.path.join(root, target)):
                continue  # already broken before this run; not ours to guess
            target_post = moved.get(target, target)
            new_path = os.path.relpath(target_post, os.path.dirname(linker_post) or ".")
            new_raw = new_path + (sep + anchor if sep else "")
            if new_raw == raw:
                continue
            new_link = "[[file:%s]%s]" % (new_raw, "[%s]" % desc if desc is not None else "")
            if m.group(0) != new_link:
                edits.setdefault(linker, []).append((m.group(0), new_link))
    return edits, ambiguous


def scan_bare_mentions(root, moved):
    """Bare-path mentions of moving docs in the rewritten roots — text
    occurrences outside any [[...]] link. Reported, never rewritten."""
    found = []
    for base in REWRITE_ROOTS:
        if not os.path.exists(os.path.join(root, base)):
            continue
        for rel in walk_files(root, base):
            content = read_text(os.path.join(root, rel))
            if content is None:
                continue
            for i, line in enumerate(content.splitlines(), 1):
                stripped = re.sub(r"\[\[[^\]]*\](?:\[[^\]]*\])?\]", "", line)
                for src in moved:
                    if src in stripped:
                        found.append((rel, i, src))
    return found


def scan_report_only(root, moved):
    """Occurrences of moving docs in frozen/synced surfaces."""
    reports = []
    for base in REPORT_ROOTS:
        if not os.path.exists(os.path.join(root, base)):
            continue
        for rel in walk_files(root, base):
            content = read_text(os.path.join(root, rel))
            if content is None:
                continue
            for src in moved:
                if src in content:
                    if rel.startswith(SYNCED_PREFIX):
                        note = ("synced template, not rewritten — a local edit is reverted by the "
                                "next sync; edit the canonical claude-templates/%s instead" % rel)
                    else:
                        note = "frozen history; not rewritten"
                    reports.append((rel, src, note))
    return reports


# ---- Content transforms -----------------------------------------------


def transform_spec(content, keyword, reason, title, doc_id, link_edits):
    """Apply the retrofit rewrite to a moving spec's content: two-sequence
    keyword header, prepended status heading, Status-field mirror, and the
    doc's own link edits."""
    for old, new in link_edits:
        content = content.replace(old, new)
    lines = content.splitlines()

    todo_idx = None
    kept = []
    for line in lines:
        if line.startswith("#+TODO:"):
            if todo_idx is None:
                todo_idx = len(kept)
            continue
        kept.append(line)
    lines = kept
    if todo_idx is None:
        todo_idx = 0
        while todo_idx < len(lines) and lines[todo_idx].startswith("#+"):
            todo_idx += 1
    lines[todo_idx:todo_idx] = TODO_HEADER

    head_end = 0
    while head_end < len(lines) and (lines[head_end].startswith("#+") or not lines[head_end].strip()):
        head_end += 1
    ts = datetime.now().astimezone().strftime("%Y-%m-%d %a @ %H:%M:%S %z")
    provenance = "reason: %s" % reason if reason else "evidence-based, human-confirmed"
    block = [
        "* %s %s" % (keyword, title),
        ":PROPERTIES:",
        ":ID:       %s" % doc_id,
        ":END:",
        "- %s — retrofitted by spec-sort; status set to %s (%s)" % (ts, keyword, provenance),
        "",
    ]
    lines[head_end:head_end] = block

    out = []
    mirrored = False
    for line in lines:
        m = re.match(r"^(\|\s*Status\s*\|)([^|]*)(\|.*)$", line, re.IGNORECASE)
        if m and not mirrored:
            value = " %s" % keyword.lower()
            width = len(m.group(2))
            line = m.group(1) + (value.ljust(width) if len(value) <= width else value + " ") + m.group(3)
            mirrored = True
        out.append(line)
    return "\n".join(out) + "\n"


def title_for(content, rel):
    m = re.search(r"^#\+TITLE:\s*(.+)$", content, re.MULTILINE | re.IGNORECASE)
    if m:
        return m.group(1).strip()
    base = os.path.basename(rel)[: -len(".org")]
    return base[: -len("-spec")] if base.endswith("-spec") else base


# ---- Marker ------------------------------------------------------------


def stamp_marker(root, date):
    path = os.path.join(root, ".ai", "notes.org")
    os.makedirs(os.path.dirname(path), exist_ok=True)
    content = read_text(path) or ""
    line = ":LAST_SPEC_SORT: %s" % date
    if ":LAST_SPEC_SORT:" in content:
        content = re.sub(r":LAST_SPEC_SORT:.*", line, content, count=1)
    elif re.search(r"^\* Workflow State\s*$", content, re.MULTILINE):
        content = re.sub(r"(^\* Workflow State\s*$)", r"\1\n" + line, content, count=1, flags=re.MULTILINE)
    else:
        if content and not content.endswith("\n"):
            content += "\n"
        content += "\n* Workflow State\n\n%s\n" % line
    with open(path, "w", encoding="utf-8") as f:
        f.write(content)


# ---- Apply -------------------------------------------------------------


class ApplyFailure(Exception):
    """Mid-apply failure: args are (applied_labels, remaining_ops, cause)."""


def apply_plan(root, plan, fail_after):
    """Execute the recorded plan. Returns the applied-op labels; raises
    ApplyFailure mid-way on a write error or when the test hook fires."""
    ops = []
    for mv in plan["moves"]:
        ops.append(("move", mv))
    for linker, edits in plan["link_edits"].items():
        if linker in {mv["src"] for mv in plan["moves"]}:
            continue  # a moving doc's own edits ride along in its transform
        ops.append(("relink", (linker, edits)))

    applied = []
    specs_dir = os.path.join(root, "docs", "specs")
    if plan["moves"] and not os.path.isdir(specs_dir):
        os.makedirs(specs_dir)
        plan["created_dirs"].append(os.path.join("docs", "specs"))

    for n, (kind, payload) in enumerate(ops, 1):
        if fail_after and n > fail_after:
            raise ApplyFailure(applied, ops[n - 1:], "injected test failure")
        try:
            if kind == "move":
                mv = payload
                content = read_text(os.path.join(root, mv["src"]))
                new = transform_spec(content, mv["keyword"], mv["reason"], mv["title"], mv["id"],
                                     plan["link_edits"].get(mv["src"], []))
                with open(os.path.join(root, mv["dest"]), "w", encoding="utf-8") as f:
                    f.write(new)
                os.remove(os.path.join(root, mv["src"]))
                applied.append("move %s -> %s" % (mv["src"], mv["dest"]))
            else:
                linker, edits = payload
                path = os.path.join(root, linker)
                content = read_text(path)
                for old, new in edits:
                    content = content.replace(old, new)
                with open(path, "w", encoding="utf-8") as f:
                    f.write(content)
                applied.append("relink %s (%d link%s)" % (linker, len(edits), "s" if len(edits) != 1 else ""))
        except OSError as exc:
            raise ApplyFailure(applied, ops[n - 1:], str(exc))
    return applied


def residue_check(root, plan):
    """Post-apply: no link in the rewritten roots may still resolve to an
    old path; bare mentions beyond the acknowledged set fail too."""
    moved = {mv["src"]: mv["dest"] for mv in plan["moves"]}
    residue = []
    for linker in rewrite_files(root):
        content = read_text(os.path.join(root, linker))
        if content is None:
            continue
        for m in LINK_RE.finditer(content):
            target_path = m.group(1).partition("::")[0]
            target = resolve_target(root, linker, target_path, {})
            if target in moved:
                residue.append("%s: link still resolves to %s" % (linker, target))
    # Acknowledged mentions were recorded pre-apply; a mention inside a moved
    # doc now lives at the doc's destination, so map the file side through the
    # moves before comparing.
    acknowledged = {(moved.get(f, f), src) for f, _ln, src in plan["bare"]}
    for f, ln, src in scan_bare_mentions(root, moved):
        if (f, src) not in acknowledged:
            residue.append("%s:%d: bare mention of %s" % (f, ln, src))
    return residue


def print_recovery(plan, applied, not_applied):
    print("FAILURE — the apply did not complete.")
    print("  applied:")
    for a in applied or ["(nothing)"]:
        print("    %s" % a)
    print("  not applied:")
    for kind, payload in not_applied:
        if kind == "move":
            print("    move %s -> %s" % (payload["src"], payload["dest"]))
        else:
            print("    relink %s" % payload[0])
    print("RECOVERY — restore the pre-run state (safe: preflight required a clean tree):")
    touched = [mv["src"] for mv in plan["moves"]] + [l for l in plan["link_edits"] if l not in {mv["src"] for mv in plan["moves"]}]
    print("  git restore -- %s" % " ".join(touched))
    created = [mv["dest"] for mv in plan["moves"]]
    print("  rm -f -- %s   # git restore can't remove the created copies" % " ".join(created))
    for d in plan.get("created_dirs", []):
        print("  rmdir --ignore-fail-on-non-empty -- %s" % d)


# ---- Main ---------------------------------------------------------------


def parse_kv(pairs, label):
    out = {}
    for item in pairs or []:
        if "=" not in item:
            sys.exit("spec-sort: %s expects REL=VALUE, got %r" % (label, item))
        k, v = item.split("=", 1)
        out[os.path.normpath(k)] = v
    return out


def main():
    ap = argparse.ArgumentParser(prog="spec-sort", add_help=True)
    ap.add_argument("--project-root", default=".")
    ap.add_argument("--apply", action="store_true")
    ap.add_argument("--allow-dirty", action="store_true")
    ap.add_argument("--acknowledge-bare", action="store_true")
    ap.add_argument("--confirm", action="append", metavar="REL=KEYWORD")
    ap.add_argument("--reason", action="append", metavar="REL=TEXT")
    ap.add_argument("--skip", action="append", metavar="REL")
    ap.add_argument("--plan-file")
    args = ap.parse_args()

    root = os.path.abspath(args.project_root)
    confirms = parse_kv(args.confirm, "--confirm")
    reasons = parse_kv(args.reason, "--reason")
    skips = {os.path.normpath(s) for s in (args.skip or [])}

    candidates, anomalies, notes = classify(root)
    if not candidates and not anomalies and not notes and not os.path.isdir(os.path.join(root, "docs")):
        return 0  # no docs pile at all — silent no-op

    for named in list(confirms) + list(skips) + list(reasons):
        if named not in candidates:
            print("spec-sort: %s is not a spec candidate" % named)
            return 1
    for rel, kw in confirms.items():
        if kw not in LIFECYCLE:
            print("spec-sort: %r is not a lifecycle keyword (%s)" % (kw, " ".join(LIFECYCLE)))
            return 1

    # ---- Build the plan (shared by report and apply) ----
    moves = []
    for rel in candidates:
        if rel in skips:
            continue
        if args.apply and rel not in confirms:
            continue  # gate failure reported below
        content = read_text(os.path.join(root, rel))
        moves.append({
            "src": rel,
            "dest": dest_for(rel),
            "keyword": confirms.get(rel, None),
            "reason": reasons.get(rel),
            "title": title_for(content, rel),
            "id": str(uuid.uuid4()),
        })
    moved_map = {mv["src"]: mv["dest"] for mv in moves}
    link_edits, ambiguous = plan_link_edits(root, moved_map)
    bare = scan_bare_mentions(root, moved_map)
    reports = scan_report_only(root, moved_map)

    # ---- Report ----
    for rel in candidates:
        content = read_text(os.path.join(root, rel))
        ev = gather_evidence(root, rel, content)
        proposed = propose_keyword(ev)
        print("CANDIDATE %s -> %s" % (rel, dest_for(rel)))
        suffix = "  (terminal — requires --reason to apply)" if proposed in TERMINAL else ""
        print("  proposed keyword: %s%s" % (proposed, suffix))
        print("  evidence:")
        print("    status field: %s" % (ev["status"] or "(none)"))
        print("    cookies: %s" % ("; ".join(ev["cookies"]) or "(none)"))
        print("    todo.org: %s" % (ev["todo"] or "(no linking task)"))
        print("    history: %s" % (ev["history"] or "(none)"))
        n_exist, artifacts = ev["artifacts"]
        if artifacts:
            print("    artifacts: %d/%d named paths exist (%s)" % (n_exist, len(artifacts), ", ".join(artifacts)))
        else:
            print("    artifacts: (none named)")
    for rel in anomalies:
        print("ANOMALY %s: named -spec.org but lacks the spec spine (Decisions + Implementation phases); surfaced, not moved" % rel)
    for rel in notes:
        print("NOTE %s" % rel)
    for linker, edits in sorted(link_edits.items()):
        for old, new in edits:
            print("RELINK %s: %s -> %s" % (linker, old, new))
    for a in ambiguous:
        print("AMBIGUOUS %s" % a)
    for f, ln, src in bare:
        print("BARE-PATH %s:%d: %s (reported for manual handling, never rewritten)" % (f, ln, src))
    for rel, src, note in reports:
        print("REPORT %s: reference to %s (%s)" % (rel, src, note))

    if not args.apply:
        if candidates or anomalies or notes:
            print("DRY RUN — no changes written. Pass --apply with per-candidate --confirm/--skip to execute.")
        return 0

    # ---- Apply: preflight ----
    try:
        porcelain = subprocess.run(
            ["git", "status", "--porcelain"], cwd=root,
            capture_output=True, text=True, check=True,
        ).stdout
    except (subprocess.CalledProcessError, FileNotFoundError):
        print("spec-sort: --apply needs a git worktree (recovery depends on git restore)")
        return 2
    if porcelain.strip():
        dirty = [ln[3:] for ln in porcelain.splitlines()]
        if not args.allow_dirty:
            print("spec-sort: refusing --apply on a dirty worktree (%d path%s). Commit or stash first, or pass --allow-dirty."
                  % (len(dirty), "s" if len(dirty) != 1 else ""))
            return 2
        print("WARNING --allow-dirty: recovery via git restore would also revert your pre-existing uncommitted changes:")
        for p in dirty:
            print("  %s" % p)

    # ---- Apply: confirm gate ----
    unaddressed = [rel for rel in candidates if rel not in confirms and rel not in skips]
    if unaddressed:
        print("spec-sort: unconfirmed candidate(s) — pass --confirm REL=KEYWORD or --skip REL for each:")
        for rel in unaddressed:
            print("  %s" % rel)
        return 1
    for mv in moves:
        if mv["keyword"] in TERMINAL and not mv["reason"]:
            print("spec-sort: %s -> %s is a terminal state and requires an explicit --reason %s=TEXT"
                  % (mv["src"], mv["keyword"], mv["src"]))
            return 1

    # ---- Apply: validation ----
    problems = []
    dests = {}
    for mv in moves:
        if os.path.exists(os.path.join(root, mv["dest"])):
            problems.append("%s: destination exists (%s)" % (mv["src"], mv["dest"]))
        if mv["dest"] in dests:
            problems.append("%s and %s: destination exists twice (%s)" % (mv["src"], dests[mv["dest"]], mv["dest"]))
        dests[mv["dest"]] = mv["src"]
    for a in ambiguous:
        problems.append("ambiguous link: %s" % a)
    if bare and not args.acknowledge_bare:
        problems.append("bare-path mention(s) listed above need manual handling — re-run with --acknowledge-bare to proceed without rewriting them")
    if problems:
        print("spec-sort: validation blocked — nothing written:")
        for p in problems:
            print("  %s" % p)
        return 1

    # ---- Apply: record the plan, then execute from it ----
    today = datetime.now().astimezone().strftime("%Y-%m-%d")
    plan = {
        "root": root, "date": today, "moves": moves,
        "link_edits": link_edits, "bare": bare,
        "reports": [list(r) for r in reports], "created_dirs": [],
    }
    plan_path = args.plan_file or os.path.join(
        tempfile.gettempdir(), "spec-sort-plan-%s.json" % os.path.basename(root))
    with open(plan_path, "w", encoding="utf-8") as f:
        json.dump(plan, f, indent=2)
    print("plan written: %s" % plan_path)

    fail_after = int(os.environ.get("SPEC_SORT_INJECT_FAIL_AFTER", "0") or 0)
    try:
        applied = apply_plan(root, plan, fail_after)
    except ApplyFailure as exc:
        print("write failed: %s" % exc.args[2])
        print_recovery(plan, exc.args[0], exc.args[1])
        return 1

    residue = residue_check(root, plan)
    if residue:
        print("spec-sort: residue after apply — old paths still referenced:")
        for r in residue:
            print("  %s" % r)
        print_recovery(plan, applied, [])
        return 1

    stamp_marker(root, today)
    for a in applied:
        print("applied: %s" % a)
    print("spec-sort: done — %d spec(s) sorted, :LAST_SPEC_SORT: %s stamped" % (len(moves), today))
    return 0


if __name__ == "__main__":
    sys.exit(main())