scripts/workflow-integrity.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158

#!/usr/bin/env python3
"""Integrity checks for the .ai/workflows/ directory and its INDEX.org.

Startup's drift check catches index-vs-directory mismatches; this goes deeper:
a workflow referencing a renamed script, a plugin whose engine was deleted, a
missing required section, a duplicate trigger phrase. Runs against the canonical
claude-templates/.ai/workflows/ by default; pass a directory to check another.

Checks:
  1. indexed-or-plugin   every *.org (except INDEX.org) is a catalog entry in
                         INDEX.org, or a source plugin (engine.plugin.org) of an
                         indexed engine.
  2. indexed-exists      every catalog entry points at a file that exists.
  3. script-refs         every .ai/scripts/<x> reference in a workflow resolves
                         to a real file under the canonical scripts dir.
  4. plugin-parent       every engine.plugin.org maps to an indexed engine.
  5. orientation         every non-plugin workflow has an orientation section
                         (Overview / Purpose / When to Use|Run / Status).
  6. trigger-uniqueness  no trigger phrase is claimed by two different workflows.

Exit 0 when clean, 1 when any check fails, 2 on bad usage.

Usage:
  workflow-integrity.py [WORKFLOWS_DIR]
"""
from __future__ import annotations

import re
import sys
from pathlib import Path

REPO = Path(__file__).resolve().parent.parent
DEFAULT_DIR = REPO / "claude-templates" / ".ai" / "workflows"

CATALOG_RE = re.compile(r"^- =([A-Za-z0-9._-]+\.org)= ", re.M)
PLUGIN_RE = re.compile(r"^(.+)\.[^.]+\.org$")            # engine.plugin.org
SCRIPT_REF_RE = re.compile(r"\.ai/scripts/([A-Za-z0-9][A-Za-z0-9._/-]*)")
QUOTED_RE = re.compile(r'"([^"]+)"')
ORIENTATION_RE = re.compile(r"^\* (overview|purpose|when to use|when to run|status)\b",
                            re.M | re.I)


def catalog_entries(index_text: str) -> set[str]:
    """Indexed workflow filenames — list items of the form `- =name.org= — ...`.
    Distinguishes catalog entries from prose mentions like =todo.org=."""
    return set(CATALOG_RE.findall(index_text))


def scripts_dir_for(workflows_dir: Path) -> Path:
    """The scripts dir a workflows dir's .ai/scripts/ references resolve against."""
    return workflows_dir.parent / "scripts"


def trigger_map(index_text: str) -> dict[str, set[str]]:
    """Map each trigger phrase to the set of workflows that claim it.

    Tracks the current workflow as catalog entries are seen, then attributes
    quoted phrases on any subsequent 'trigger' line to it.
    """
    phrases: dict[str, set[str]] = {}
    current: str | None = None
    for line in index_text.splitlines():
        m = CATALOG_RE.match(line)
        if m:
            current = m.group(1)
            continue
        if current and "rigger" in line:  # "Triggers:", "Full-prep triggers:", etc.
            for phrase in QUOTED_RE.findall(line):
                phrases.setdefault(phrase, set()).add(current)
    return phrases


def check(workflows_dir: Path) -> list[str]:
    findings: list[str] = []
    index = workflows_dir / "INDEX.org"
    if not index.is_file():
        return [f"no INDEX.org in {workflows_dir}"]
    index_text = index.read_text(encoding="utf-8")
    indexed = catalog_entries(index_text)
    engines = {n[:-len(".org")] for n in indexed}
    workflows = sorted(p.name for p in workflows_dir.glob("*.org") if p.name != "INDEX.org")
    scripts_dir = scripts_dir_for(workflows_dir)

    def is_plugin(name: str) -> bool:
        return any(name.startswith(e + ".") and name != e + ".org" for e in engines)

    # 1. indexed-or-plugin
    for w in workflows:
        if w not in indexed and not is_plugin(w):
            findings.append(f"[orphan] {w}: not indexed in INDEX.org and not a plugin of an indexed engine")

    # 2. indexed-exists
    for n in sorted(indexed):
        if not (workflows_dir / n).is_file():
            findings.append(f"[stale-index] INDEX lists {n} but no such file exists")

    # 4. plugin-parent
    for w in workflows:
        if w in indexed:
            continue
        m = PLUGIN_RE.match(w)
        if m:
            parent = m.group(1) + ".org"
            if parent not in indexed:
                findings.append(f"[orphan-plugin] {w}: parent engine {parent} is not indexed")
            elif not (workflows_dir / parent).is_file():
                findings.append(f"[orphan-plugin] {w}: parent engine {parent} is missing")

    # 3. script-refs
    for w in workflows:
        text = (workflows_dir / w).read_text(encoding="utf-8")
        for ref in sorted(set(SCRIPT_REF_RE.findall(text))):
            ref = ref.rstrip(".")
            if (scripts_dir / ref).exists():
                continue
            if (scripts_dir / ref.split("/")[0]).exists():
                continue
            findings.append(f"[bad-ref] {w}: references .ai/scripts/{ref} which does not exist")

    # 5. orientation (plugins are adapters loaded by their engine — exempt)
    for w in workflows:
        if is_plugin(w):
            continue
        text = (workflows_dir / w).read_text(encoding="utf-8")
        if not ORIENTATION_RE.search(text):
            findings.append(f"[missing-section] {w}: no orientation section "
                            "(Overview / Purpose / When to Use|Run / Status)")

    # 6. trigger-uniqueness
    for phrase, owners in sorted(trigger_map(index_text).items()):
        if len(owners) > 1:
            findings.append(f"[dup-trigger] \"{phrase}\" claimed by {', '.join(sorted(owners))}")

    return findings


def main() -> int:
    if len(sys.argv) > 2:
        print("usage: workflow-integrity.py [WORKFLOWS_DIR]", file=sys.stderr)
        return 2
    workflows_dir = Path(sys.argv[1]) if len(sys.argv) == 2 else DEFAULT_DIR
    if not workflows_dir.is_dir():
        print(f"workflow-integrity: {workflows_dir} is not a directory", file=sys.stderr)
        return 2

    findings = check(workflows_dir)
    n_workflows = len([p for p in workflows_dir.glob("*.org") if p.name != "INDEX.org"])
    if findings:
        print("workflow-integrity: FAIL")
        for f in findings:
            print(f"  {f}")
        return 1
    print(f"workflow-integrity: OK ({n_workflows} workflows)")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())