aboutsummaryrefslogtreecommitdiff
path: root/.ai/scripts/route_recommend.py
diff options
context:
space:
mode:
Diffstat (limited to '.ai/scripts/route_recommend.py')
-rw-r--r--.ai/scripts/route_recommend.py136
1 files changed, 136 insertions, 0 deletions
diff --git a/.ai/scripts/route_recommend.py b/.ai/scripts/route_recommend.py
new file mode 100644
index 0000000..7b36405
--- /dev/null
+++ b/.ai/scripts/route_recommend.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+"""Wrap-up routing recommendation engine.
+
+Given an inbox keeper's text and a list of candidate project names, infer which
+project the item belongs to, with a confidence tier:
+
+ strong a project's name (or its dot-stripped form, or a path containing it)
+ appears literally in the item
+ weak a distinctive name token overlaps, but the full name doesn't
+ none no overlap; the item stays put
+
+A multi-way tie at the top tier is ambiguous, so it downgrades to weak with a
+deterministic pick (most token overlap, then alphabetical). An empty candidate
+list yields none.
+
+The pure core is `recommend(item, projects) -> (destination, confidence)` — the
+shape the wrap-up router (Phase 4) and the process-inbox marker (Phase 2) both
+call. The CLI wires it to inbox-send.py's `discover_projects` so the candidate
+set is the same project universe inbox-send already knows.
+
+CLI:
+ route_recommend.py --item "<text>" [--exclude <current-project>]
+prints "<destination>\\t<confidence>" on a match, or "none".
+"""
+
+import argparse
+import importlib.util
+import re
+import sys
+from pathlib import Path
+
+# A distinctive-enough token for weak matching; shorter tokens (of, to, id) are
+# too noisy to route on.
+MIN_WEAK_TOKEN = 4
+
+_TOKEN_RE = re.compile(r"[a-z0-9]+")
+
+
+def _tokens(text: str) -> set[str]:
+ return set(_TOKEN_RE.findall(text.lower()))
+
+
+def _name_variants(name: str) -> set[str]:
+ """A project name and its dot-stripped alias (.emacs.d -> emacsd)."""
+ return {v for v in (name.lower(), name.replace(".", "").lower()) if v}
+
+
+def _literal_present(name: str, item_lower: str) -> bool:
+ """True if a name variant appears in the item on word-ish boundaries.
+
+ Boundaries keep 'home' from matching inside 'homeowner' while still
+ matching it inside a path ('~/code/home/...') or a hyphenated name.
+ """
+ for variant in _name_variants(name):
+ if re.search(r"(?<![a-z0-9])" + re.escape(variant) + r"(?![a-z0-9])", item_lower):
+ return True
+ return False
+
+
+def _tiebreak(candidates: list[str], item_tokens: set[str]) -> str:
+ """Most token overlap first, then alphabetical — deterministic."""
+ return sorted(candidates, key=lambda p: (-len(_tokens(p) & item_tokens), p))[0]
+
+
+def recommend(item: str, projects: list[str]) -> tuple[str | None, str]:
+ """Infer the destination project for `item` from `projects`.
+
+ Returns (destination, confidence). confidence is "strong" / "weak" / "none";
+ destination is None exactly when confidence is "none".
+ """
+ if not projects:
+ return (None, "none")
+
+ item_lower = item.lower()
+ item_tokens = _tokens(item)
+
+ strong: list[str] = []
+ weak: list[str] = []
+ for project in projects:
+ if _literal_present(project, item_lower):
+ strong.append(project)
+ continue
+ name_tokens = {t for t in _tokens(project) if len(t) >= MIN_WEAK_TOKEN}
+ if name_tokens & item_tokens:
+ weak.append(project)
+
+ if len(strong) == 1:
+ return (strong[0], "strong")
+ if len(strong) > 1:
+ return (_tiebreak(strong, item_tokens), "weak")
+ if len(weak) == 1:
+ return (weak[0], "weak")
+ if len(weak) > 1:
+ return (_tiebreak(weak, item_tokens), "weak")
+ return (None, "none")
+
+
+def _load_inbox_send():
+ """Load the sibling kebab-named inbox-send.py as a module for its discovery."""
+ path = Path(__file__).with_name("inbox-send.py")
+ spec = importlib.util.spec_from_file_location("inbox_send", path)
+ if spec is None or spec.loader is None:
+ raise ImportError(f"cannot load {path}")
+ module = importlib.util.module_from_spec(spec)
+ spec.loader.exec_module(module)
+ return module
+
+
+def discover_destination_names(exclude: str | None = None) -> list[str]:
+ """The candidate project names, reusing inbox-send's discovery.
+
+ `exclude` drops the current project (matched by exact name or dot-stripped
+ alias) so the engine never recommends routing an item to where it already is.
+ """
+ mod = _load_inbox_send()
+ names = [p.name for p in mod.discover_projects(mod.resolve_roots())]
+ if exclude:
+ drop = _name_variants(exclude)
+ names = [n for n in names if not (_name_variants(n) & drop)]
+ return names
+
+
+def main() -> int:
+ parser = argparse.ArgumentParser(description="Recommend a routing destination for an inbox keeper.")
+ parser.add_argument("--item", required=True, help="the keeper's text")
+ parser.add_argument("--exclude", help="current project to exclude from candidates")
+ args = parser.parse_args()
+
+ projects = discover_destination_names(exclude=args.exclude)
+ destination, confidence = recommend(args.item, projects)
+ print("none" if destination is None else f"{destination}\t{confidence}")
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())