aboutsummaryrefslogtreecommitdiff
path: root/.ai/scripts/route_recommend.py
blob: 7b36405fd8ea009a0577108aa86734f291946fc1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python3
"""Wrap-up routing recommendation engine.

Given an inbox keeper's text and a list of candidate project names, infer which
project the item belongs to, with a confidence tier:

  strong  a project's name (or its dot-stripped form, or a path containing it)
          appears literally in the item
  weak    a distinctive name token overlaps, but the full name doesn't
  none    no overlap; the item stays put

A multi-way tie at the top tier is ambiguous, so it downgrades to weak with a
deterministic pick (most token overlap, then alphabetical). An empty candidate
list yields none.

The pure core is `recommend(item, projects) -> (destination, confidence)` — the
shape the wrap-up router (Phase 4) and the process-inbox marker (Phase 2) both
call. The CLI wires it to inbox-send.py's `discover_projects` so the candidate
set is the same project universe inbox-send already knows.

CLI:
    route_recommend.py --item "<text>" [--exclude <current-project>]
prints "<destination>\\t<confidence>" on a match, or "none".
"""

import argparse
import importlib.util
import re
import sys
from pathlib import Path

# A distinctive-enough token for weak matching; shorter tokens (of, to, id) are
# too noisy to route on.
MIN_WEAK_TOKEN = 4

_TOKEN_RE = re.compile(r"[a-z0-9]+")


def _tokens(text: str) -> set[str]:
    return set(_TOKEN_RE.findall(text.lower()))


def _name_variants(name: str) -> set[str]:
    """A project name and its dot-stripped alias (.emacs.d -> emacsd)."""
    return {v for v in (name.lower(), name.replace(".", "").lower()) if v}


def _literal_present(name: str, item_lower: str) -> bool:
    """True if a name variant appears in the item on word-ish boundaries.

    Boundaries keep 'home' from matching inside 'homeowner' while still
    matching it inside a path ('~/code/home/...') or a hyphenated name.
    """
    for variant in _name_variants(name):
        if re.search(r"(?<![a-z0-9])" + re.escape(variant) + r"(?![a-z0-9])", item_lower):
            return True
    return False


def _tiebreak(candidates: list[str], item_tokens: set[str]) -> str:
    """Most token overlap first, then alphabetical — deterministic."""
    return sorted(candidates, key=lambda p: (-len(_tokens(p) & item_tokens), p))[0]


def recommend(item: str, projects: list[str]) -> tuple[str | None, str]:
    """Infer the destination project for `item` from `projects`.

    Returns (destination, confidence). confidence is "strong" / "weak" / "none";
    destination is None exactly when confidence is "none".
    """
    if not projects:
        return (None, "none")

    item_lower = item.lower()
    item_tokens = _tokens(item)

    strong: list[str] = []
    weak: list[str] = []
    for project in projects:
        if _literal_present(project, item_lower):
            strong.append(project)
            continue
        name_tokens = {t for t in _tokens(project) if len(t) >= MIN_WEAK_TOKEN}
        if name_tokens & item_tokens:
            weak.append(project)

    if len(strong) == 1:
        return (strong[0], "strong")
    if len(strong) > 1:
        return (_tiebreak(strong, item_tokens), "weak")
    if len(weak) == 1:
        return (weak[0], "weak")
    if len(weak) > 1:
        return (_tiebreak(weak, item_tokens), "weak")
    return (None, "none")


def _load_inbox_send():
    """Load the sibling kebab-named inbox-send.py as a module for its discovery."""
    path = Path(__file__).with_name("inbox-send.py")
    spec = importlib.util.spec_from_file_location("inbox_send", path)
    if spec is None or spec.loader is None:
        raise ImportError(f"cannot load {path}")
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    return module


def discover_destination_names(exclude: str | None = None) -> list[str]:
    """The candidate project names, reusing inbox-send's discovery.

    `exclude` drops the current project (matched by exact name or dot-stripped
    alias) so the engine never recommends routing an item to where it already is.
    """
    mod = _load_inbox_send()
    names = [p.name for p in mod.discover_projects(mod.resolve_roots())]
    if exclude:
        drop = _name_variants(exclude)
        names = [n for n in names if not (_name_variants(n) & drop)]
    return names


def main() -> int:
    parser = argparse.ArgumentParser(description="Recommend a routing destination for an inbox keeper.")
    parser.add_argument("--item", required=True, help="the keeper's text")
    parser.add_argument("--exclude", help="current project to exclude from candidates")
    args = parser.parse_args()

    projects = discover_destination_names(exclude=args.exclude)
    destination, confidence = recommend(args.item, projects)
    print("none" if destination is None else f"{destination}\t{confidence}")
    return 0


if __name__ == "__main__":
    sys.exit(main())