diff options
| author | Craig Jennings <c@cjennings.net> | 2026-06-16 23:40:42 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-06-16 23:40:42 -0500 |
| commit | e1933fe685a3e15d001552537df90e33ba00b83a (patch) | |
| tree | 4b6435152b6605a96c0cc7a3f3b1dcadd4fc6a02 /.ai/scripts | |
| parent | 4e2db8f20a259d43d2198b120ac32660298d0d63 (diff) | |
| download | rulesets-e1933fe685a3e15d001552537df90e33ba00b83a.tar.gz rulesets-e1933fe685a3e15d001552537df90e33ba00b83a.zip | |
refactor: remove unused cross-agent-comms subsystem
Nothing used the cross-agent message system (send/recv/watch/status/discover/halt/resume over the inbox/from-agents/ file-IPC protocol). Every cross-project handoff goes through inbox-send instead. I removed the scripts, READMEs, workflow, tests, INDEX entry, the three startup.org wirings, and the legacy bin symlinks, then repointed helper-mode's escalation to inbox-send and noted the removal in the generic-agent-runtime spec.
Diffstat (limited to '.ai/scripts')
20 files changed, 0 insertions, 3612 deletions
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-discover b/.ai/scripts/cross-agent-comms/cross-agent-discover deleted file mode 100755 index 152cf27..0000000 --- a/.ai/scripts/cross-agent-comms/cross-agent-discover +++ /dev/null @@ -1,230 +0,0 @@ -#!/usr/bin/env python3 -"""Enumerate cross-agent destinations: local projects + tailnet peers. - -See cross-agent-discover.md. Local: scan ~/projects/*/.ai/. Peers: read -peers.toml, SSH-probe each for reachability. --enumerate-remote optionally -runs `ls -d ~/projects/*/.ai/` over SSH to list remote projects. - -Cache results for 5 min at ~/.cache/cross-agent-comms/discovery.json so -repeated invocations don't re-probe. - -HALT: prints a banner; otherwise continues. -""" - -from __future__ import annotations - -import argparse -import datetime as _dt -import json -import os -import subprocess -import sys -import time -import tomllib -from pathlib import Path - -CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms" -PEERS_TOML = CONFIG_DIR / "peers.toml" -HALT_FILE = CONFIG_DIR / "HALT" -CACHE_DIR = Path.home() / ".cache" / "cross-agent-comms" -CACHE_FILE = CACHE_DIR / "discovery.json" -CACHE_TTL_SECONDS = 300 - -EXIT_OK = 0 -EXIT_GENERAL = 1 -EXIT_PEERS_TOML = 1 - - -def err(msg: str) -> None: - print(msg, file=sys.stderr) - - -def render_banner_if_halt() -> None: - if not HALT_FILE.exists(): - return - try: - reason = HALT_FILE.read_text().strip() - except OSError: - reason = "(HALT file unreadable; treated as halted)" - print("⚠ HALT ACTIVE — cross-agent comms paused") - if reason: - print(f" reason: {reason}") - print() - - -def enumerate_local_projects() -> list[str]: - projects_dir = Path.home() / "projects" - if not projects_dir.is_dir(): - return [] - found = [] - for child in sorted(projects_dir.iterdir()): - if child.is_dir() and (child / ".ai").is_dir(): - found.append(child.name) - return found - - -def load_peers() -> dict: - if not PEERS_TOML.exists(): - return {"peers": {}} - try: - return tomllib.loads(PEERS_TOML.read_text()) - except (tomllib.TOMLDecodeError, OSError) as e: - err(f"cannot parse peers.toml: {e}") - sys.exit(EXIT_PEERS_TOML) - - -def probe_peer_reachability(host: str, ssh_user: str | None) -> tuple[bool, str | None]: - """Run a short SSH probe with BatchMode=yes (no interactive prompt).""" - target = f"{ssh_user}@{host}" if ssh_user else host - try: - result = subprocess.run( - ["ssh", "-o", "ConnectTimeout=2", "-o", "BatchMode=yes", target, "true"], - capture_output=True, - text=True, - timeout=5, - ) - except (FileNotFoundError, subprocess.TimeoutExpired): - return False, "ssh probe failed" - if result.returncode == 0: - return True, None - return False, (result.stderr.strip().splitlines() or [f"exit {result.returncode}"])[-1] - - -def enumerate_remote_projects(host: str, ssh_user: str | None) -> list[str] | None: - target = f"{ssh_user}@{host}" if ssh_user else host - try: - result = subprocess.run( - [ - "ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes", target, - "ls -d ~/projects/*/.ai/ 2>/dev/null", - ], - capture_output=True, - text=True, - timeout=10, - ) - except (FileNotFoundError, subprocess.TimeoutExpired): - return None - if result.returncode != 0: - return None - projects = [] - for line in result.stdout.splitlines(): - # Each line looks like /home/<user>/projects/<name>/.ai/ - parts = line.rstrip("/").split("/") - if len(parts) >= 2 and parts[-1] == ".ai": - projects.append(parts[-2]) - return projects - - -def read_cache() -> dict | None: - if not CACHE_FILE.exists(): - return None - try: - age = time.time() - CACHE_FILE.stat().st_mtime - if age > CACHE_TTL_SECONDS: - return None - return json.loads(CACHE_FILE.read_text()) - except (OSError, json.JSONDecodeError): - return None - - -def write_cache(payload: dict) -> None: - CACHE_DIR.mkdir(parents=True, exist_ok=True) - CACHE_FILE.write_text(json.dumps(payload, indent=2)) - - -def discover(peer_filter: str | None, enumerate_remote: bool) -> dict: - local = enumerate_local_projects() - peers_cfg = load_peers().get("peers", {}) - - peers_out = [] - for name, cfg in sorted(peers_cfg.items()): - if peer_filter and name != peer_filter: - continue - host = cfg.get("host", name) - ssh_user = cfg.get("ssh_user") - reachable, error = probe_peer_reachability(host, ssh_user) - entry = { - "name": name, - "host": host, - "reachable": reachable, - } - if not reachable: - entry["error"] = error - if enumerate_remote and reachable: - entry["projects"] = enumerate_remote_projects(host, ssh_user) or [] - peers_out.append(entry) - - return { - "scanned_at": _dt.datetime.now(_dt.timezone.utc).isoformat(), - "halt_active": HALT_FILE.exists(), - "local": local, - "peers": peers_out, - } - - -def render_table(payload: dict, enumerate_remote: bool) -> None: - local = payload.get("local", []) - print(f"Local ({_local_hostname()}):") - if local: - wrapped = ", ".join(local) - print(f" {wrapped} [{len(local)} project{'s' if len(local) != 1 else ''}]") - else: - print(" (no projects with .ai/ found)") - print() - - peers = payload.get("peers", []) - if not peers: - print("Peers (from peers.toml):") - print(" (no peers configured)") - return - - print("Peers (from ~/.config/cross-agent-comms/peers.toml):") - for p in peers: - marker = "✓ reachable" if p.get("reachable") else f"✗ UNREACHABLE ({p.get('error', 'unknown')})" - print(f" {p['name']:<16} {p['host']:<24} {marker}") - if enumerate_remote and p.get("projects"): - wrapped = ", ".join(p["projects"]) - print(f" projects: {wrapped}") - - -def _local_hostname() -> str: - import socket - return socket.gethostname().split(".")[0] - - -def main() -> int: - parser = argparse.ArgumentParser(description="Discover cross-agent destinations.") - parser.add_argument("--enumerate-remote", action="store_true", - help="SSH into each peer and list ~/projects/*/.ai/") - parser.add_argument("--no-cache", action="store_true", help="Skip cache; force fresh probe") - parser.add_argument("--peer", help="Limit to a single peer name from peers.toml") - parser.add_argument("--json", action="store_true", help="Machine-readable output") - args = parser.parse_args() - - render_banner_if_halt() - - payload = None - if not args.no_cache: - cached = read_cache() - if cached is not None: - # Honor --peer filter on cached payload. - if args.peer: - cached["peers"] = [p for p in cached.get("peers", []) if p["name"] == args.peer] - payload = cached - - if payload is None: - payload = discover(args.peer, args.enumerate_remote) - if not args.no_cache and not args.peer: - # Only cache full (unfiltered) discoveries. - write_cache(payload) - - if args.json: - print(json.dumps(payload, indent=2)) - return EXIT_OK - - render_table(payload, args.enumerate_remote) - return EXIT_OK - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/.ai/scripts/cross-agent-comms/cross-agent-discover.md b/.ai/scripts/cross-agent-comms/cross-agent-discover.md deleted file mode 100644 index 95134bb..0000000 --- a/.ai/scripts/cross-agent-comms/cross-agent-discover.md +++ /dev/null @@ -1,155 +0,0 @@ -# cross-agent-discover - -**Purpose.** Enumerate available cross-agent destinations — local projects on -this machine and remote projects on tailnet peers. Validates SSH reachability -for cross-machine destinations before reporting them as usable. - -## Usage - -``` -cross-agent-discover [--enumerate-remote] [--no-cache] [--peer <name>] -``` - -No args required for the common case (local enumeration + peer reachability). - -### Flags - -| Flag | Default | Purpose | -|---|---|---| -| `--enumerate-remote` | off | SSH into each peer and list projects under `~/projects/*/.ai/`. Off by default because SSH adds latency; turn on when you want to see what's available on a remote machine you haven't fully configured. | -| `--no-cache` | off | Skip the 5-minute cache; force fresh discovery. | -| `--peer <name>` | (all) | Limit to a single peer from `peers.toml`. | -| `--json` | off | Machine-readable output. | - -## Output - -### Default - -``` -$ cross-agent-discover -Local (ratio): - career, claude-templates, clipper, danneel, documents, elibrary, - finances, health, homelab, jr-estate, kit, little-elisper, - philosophy, website [14 projects] - -Peers (from ~/.config/cross-agent-comms/peers.toml): - velox.local reachable (last seen 2 sec ago) - bastion.local UNREACHABLE (ssh exit 255: connection refused) -``` - -### With `--enumerate-remote` - -``` -$ cross-agent-discover --enumerate-remote -Local (ratio): - ... (as above) - -velox.local (reachable): - career, homelab [2 projects] -``` - -## Configuration - -Reads `~/.config/cross-agent-comms/peers.toml`: - -```toml -# Each peer is a remote machine reachable via SSH (typically over Tailscale). - -[peers.velox] -host = "velox.local" -ssh_user = "cjennings" - -[peers.bastion] -host = "bastion.local" -ssh_user = "cjennings" -``` - -Peers entries describe machines, NOT projects. Projects are enumerated -on-demand under `~/projects/*/.ai/` either locally or via SSH. - -## Cache - -Successful discovery results are cached at -`~/.cache/cross-agent-comms/discovery.json` for 5 minutes. Repeated invocations -within the window read from cache. - -`--no-cache` forces a fresh probe. Useful when adding a new peer or after a -network change. - -## SSH reachability check - -For each peer, runs: - -``` -ssh -o ConnectTimeout=2 -o BatchMode=yes <user>@<host> true -``` - -`BatchMode=yes` prevents interactive password prompts — peers that don't have -key-based auth set up are reported as UNREACHABLE. - -If `--enumerate-remote` is set, on success runs: - -``` -ssh <user>@<host> 'ls -d ~/projects/*/.ai/ 2>/dev/null' -``` - -## Failure modes - -| Symptom | Likely cause | Fix | -|---|---|---| -| Peer reported UNREACHABLE | Tailscale not connected, SSH key not authorized, host firewalled | `tailscale status`; `ssh -v <peer>` to debug. | -| Local list is empty | Glob misresolved, or `~/projects/` doesn't exist | Check `ls -d ~/projects/*/.ai/`. | -| `--enumerate-remote` slow | Cold cache, slow tailnet, many peers | First run is slow, subsequent runs hit cache. Use `--peer <name>` to scope. | -| Peer unexpectedly missing from output | Not in `peers.toml`, or `peers.toml` malformed | `cat ~/.config/cross-agent-comms/peers.toml` and validate. | - -## HALT awareness - -Checks `~/.config/cross-agent-comms/HALT` at start. If HALT exists, prints a -prominent banner before normal output: - -``` -$ cross-agent-discover -⚠ HALT ACTIVE — cross-agent comms paused - Reason: <reason from HALT file body, if any> - Resume with: cross-agent-resume - -(enumeration continues normally — HALT does not suppress visibility) - -Local (ratio): - career, claude-templates, ... - -Peers: - velox.local reachable -``` - -Discover is read-only. Like `cross-agent-status`, it always runs so the user -keeps visibility into what destinations exist regardless of halt state. The -banner makes the halt state impossible to miss. - -If the HALT file exists but is unreadable, print a warning banner and -continue. - -See `cross-agent-halt.md` for the full halt mechanism. - -## Examples - -```bash -# Common: see what's available -cross-agent-discover - -# Force fresh probe after network change -cross-agent-discover --no-cache - -# What's on velox specifically -cross-agent-discover --peer velox --enumerate-remote - -# Pipe to grep -cross-agent-discover --json | jq '.peers[] | select(.reachable)' -``` - -## See also - -- `cross-agent-send` — uses `peers.toml` for routing destinations. -- `cross-agent-status` — local pending messages. -- `cross-agent-comms.org` — protocol spec, `* Limitations` section - explains the cross-machine model. diff --git a/.ai/scripts/cross-agent-comms/cross-agent-halt b/.ai/scripts/cross-agent-comms/cross-agent-halt deleted file mode 100755 index df25115..0000000 --- a/.ai/scripts/cross-agent-comms/cross-agent-halt +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env python3 -"""Failsafe halt for cross-agent comms. - -See cross-agent-halt.md. Touches ~/.config/cross-agent-comms/HALT and stops -the cross-agent-watch systemd user service. With --tailnet, propagates the -HALT file to every peer in peers.toml via SSH; reports per-peer status with -non-zero exit on partial halt. - -Does NOT pkill in-flight scripts — they detect HALT on next iteration and -stop themselves. -""" - -from __future__ import annotations - -import argparse -import subprocess -import sys -import tomllib -from pathlib import Path - -CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms" -HALT_FILE = CONFIG_DIR / "HALT" -PEERS_TOML = CONFIG_DIR / "peers.toml" - -EXIT_OK = 0 -EXIT_PARTIAL = 1 - - -def err(msg: str) -> None: - print(msg, file=sys.stderr) - - -def write_halt_file(reason: str) -> None: - CONFIG_DIR.mkdir(parents=True, exist_ok=True) - HALT_FILE.write_text((reason + "\n") if reason else "") - - -def stop_watcher_service() -> None: - """Best-effort stop of the systemd watcher service. Failures are logged but not fatal.""" - try: - subprocess.run( - ["systemctl", "--user", "stop", "cross-agent-watch.path"], - capture_output=True, text=True, timeout=5, - ) - except (FileNotFoundError, subprocess.TimeoutExpired): - # Watcher service may not be installed — fine. - pass - - -def load_peers() -> dict: - if not PEERS_TOML.exists(): - return {} - try: - return tomllib.loads(PEERS_TOML.read_text()) - except (tomllib.TOMLDecodeError, OSError) as e: - err(f"cannot parse peers.toml: {e}") - return {} - - -def ssh_touch_halt(host: str, ssh_user: str | None, reason: str) -> tuple[bool, str]: - target = f"{ssh_user}@{host}" if ssh_user else host - # Build the remote command. Quote the reason carefully. - remote_cmd = ( - f"mkdir -p ~/.config/cross-agent-comms && " - f"printf %s {_sh_quote(reason)} > ~/.config/cross-agent-comms/HALT" - ) - try: - result = subprocess.run( - ["ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes", target, remote_cmd], - capture_output=True, text=True, timeout=10, - ) - except (FileNotFoundError, subprocess.TimeoutExpired): - return False, "ssh unavailable or timed out" - if result.returncode == 0: - return True, "HALT file written" - return False, (result.stderr.strip().splitlines() or [f"exit {result.returncode}"])[-1] - - -def _sh_quote(s: str) -> str: - return "'" + s.replace("'", "'\"'\"'") + "'" - - -def main() -> int: - parser = argparse.ArgumentParser(description="Halt all cross-agent comms on this machine (and optionally tailnet).") - parser.add_argument("reason", nargs="?", default="", help="Optional human-readable reason") - parser.add_argument("--tailnet", action="store_true", - help="Propagate HALT to every peer in peers.toml") - args = parser.parse_args() - - # Local halt. - write_halt_file(args.reason) - stop_watcher_service() - print("Halting locally ✓ (HALT file written)") - - if not args.tailnet: - print() - print(f"Halt active. Remove {HALT_FILE} or run cross-agent-resume to clear.") - print("Agent polling will stop within ~5 min (one cadence cycle).") - return EXIT_OK - - peers = load_peers().get("peers", {}) - if not peers: - print() - print("No peers configured in peers.toml — local-only halt complete.") - return EXIT_OK - - print() - successes = 1 # local already counted - failures = [] - for name, cfg in sorted(peers.items()): - host = cfg.get("host", name) - ssh_user = cfg.get("ssh_user") - ok, detail = ssh_touch_halt(host, ssh_user, args.reason) - marker = "✓" if ok else "✗" - print(f"Halting {host:<28} {marker} ({detail})") - if ok: - successes += 1 - else: - failures.append(f"{name} ({host}): {detail}") - - print() - total = len(peers) + 1 - if failures: - print(f"PARTIAL HALT: {successes}/{total} machines halted.") - for f in failures: - print(f" - {f}") - print("Resolve the failures or manually halt each machine.") - return EXIT_PARTIAL - print(f"Halt active across {total} machine(s).") - return EXIT_OK - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/.ai/scripts/cross-agent-comms/cross-agent-halt.md b/.ai/scripts/cross-agent-comms/cross-agent-halt.md deleted file mode 100644 index b817fbc..0000000 --- a/.ai/scripts/cross-agent-comms/cross-agent-halt.md +++ /dev/null @@ -1,134 +0,0 @@ -# cross-agent-halt - -**Purpose.** Failsafe stop for all cross-agent activity on the local machine -(or, with `--tailnet`, across all configured peers). Creates the HALT file -that every component in the protocol checks; within one polling cadence -(~5 min) all polling, sending, watching, and receiving stops. - -This is the user's emergency brake. Use when something is misbehaving and -visiting individual sessions is too slow. - -## Usage - -``` -cross-agent-halt [reason] [--tailnet] [--no-stop-watcher] -``` - -### Positional argument - -| Position | Meaning | Example | -|---|---|---| -| 1 | Optional human-readable reason for the halt. Written into the HALT file's body. Helps future-you remember why you stopped things. | `"investigating runaway poll loop, 2026-04-27"` | - -### Flags - -| Flag | Default | Purpose | -|---|---|---| -| `--tailnet` | local only | Propagate halt to every peer in `peers.toml` via SSH over Tailscale. | -| `--no-stop-watcher` | (stops watcher) | Skip stopping the `cross-agent-watch.path` systemd unit. Useful if the watcher is intentionally separate from comms (rare). | - -## Behavior - -### Local halt (default) - -1. Write the HALT file: `~/.config/cross-agent-comms/HALT`. If a `[reason]` was - passed, write it as the file's body. Otherwise the file is empty (existence - alone triggers halt). -2. Stop the watcher service: `systemctl --user stop cross-agent-watch.path` - (and the corresponding `.service` if running). -3. Print a summary: - ``` - ✓ HALT file written: ~/.config/cross-agent-comms/HALT - ✓ Watcher service stopped (cross-agent-watch.path) - - In-flight sends will complete their current rsync step (~seconds), then - stop. New sends are blocked. - - Active agent polling sessions stop within one cadence (~5 min). - - Use `cross-agent-resume` to clear HALT. - Per-session polling does NOT auto-resume — you re-engage each session by - telling its agent to resume polling. - ``` -4. Exit 0. - -### Cross-tailnet halt (`--tailnet`) - -1. Apply local halt steps 1-2 first. -2. Read `peers.toml` for the list of remote machines. -3. For each peer, SSH and write the HALT file: - ``` - ssh <user>@<host> "echo '<reason>' > ~/.config/cross-agent-comms/HALT && \ - systemctl --user stop cross-agent-watch.path" - ``` -4. Track per-peer success/failure. Print results: - ``` - Halting velox.local ✓ (HALT file written) - Halting bastion.local ✗ (ssh exit 255: no route to host) - Halting locally ✓ (HALT file written) - - PARTIAL HALT: 2/3 machines halted. bastion.local needs manual halt. - ``` -5. Exit 0 if all peers halted; exit 1 if any peer failed (so scripts can - detect partial halt). The local halt always succeeds — even on `--tailnet`, - if remote peers fail, local is still halted. - -## What "halt active" means for each component - -| Component | Behavior under HALT | -|---|---| -| `cross-agent-send` | Refuses to send. Exits 5 with "halt active; remove ~/.config/cross-agent-comms/HALT to resume." Checks HALT at start AND between each retry/rsync step, so an in-flight send completes its current step then stops. | -| `cross-agent-recv` | Refuses to verify or dedup. Exits 5 with same message. Inbound files are **left in place** — not moved, not rejected — so resume picks them up cleanly via cold-start. | -| `cross-agent-watch` | Continues running but suppresses notifications. Logs each event with `(suppressed by HALT)` so the operator can see what would have fired. | -| `cross-agent-status` | Prints prominent `⚠ HALT ACTIVE` banner before normal output. Continues to enumerate (read-only). | -| `cross-agent-discover` | Same banner. Continues (read-only). | -| Agent polling loops | Check HALT on every wake. If set: write a final `progress` note to any active conversation ("HALT fired locally; pausing"), surface "(HALT active; cross-agent comms paused)" in every user response, and stop rescheduling. Polling decays naturally within one cadence. | -| Conversation initiator | Refuses to write sequence 1 of any new conversation. Surfaces refusal to user. | -| Startup workflow (Phase A) | Checks HALT at session boot. If set, surfaces immediately and skips cross-agent inbox checks. | - -## Failure modes - -| Symptom | Cause | Fix | -|---|---|---| -| `~/.config/cross-agent-comms/HALT` already exists | Halt was already active | OK — running halt again refreshes the reason text. Safe. | -| `systemctl --user stop` fails | Watcher service not installed, or systemd not available | The HALT file is still written — components that check HALT will still stop. The systemctl failure surfaces as a non-fatal warning. | -| `--tailnet` halts some peers but not others | One or more peers unreachable | Exit 1 with per-peer status. Manually halt the unreachable peers (visit each machine, `touch ~/.config/cross-agent-comms/HALT`), or fix the network and re-run. | -| Permission denied writing the HALT file | `~/.config/cross-agent-comms/` doesn't exist or is owned by another user | `mkdir -p ~/.config/cross-agent-comms/`; check ownership. | - -## What halt does NOT do - -- Does not kill running Claude sessions. Polling stops within ~5 min, but the - session itself stays alive and can be re-engaged after resume. -- Does not delete pending messages. Inbound files in `inbox/from-agents/` - remain; they get processed when polling resumes. -- Does not abort in-flight rsync push mid-byte. Atomic-write semantics - guarantee in-flight messages either complete cleanly or leave only `.tmp.*` - files (which receivers ignore). - -## Examples - -```bash -# Quick halt with no reason -cross-agent-halt - -# Halt with a memo -cross-agent-halt "runaway poll loop in homelab session, debugging" - -# Halt all tailnet peers + local -cross-agent-halt --tailnet "shutting down for system update" - -# Halt protocol comms but leave the watcher service running -cross-agent-halt --no-stop-watcher -``` - -## Recovery - -Always pair with `cross-agent-resume` when the situation is resolved: - -```bash -cross-agent-resume # local -cross-agent-resume --tailnet # all peers -``` - -## See also - -- `cross-agent-resume` — counterpart that clears HALT. -- `cross-agent-status` — see HALT state at a glance. -- `cross-agent-comms.org` — protocol spec, `* Halt mechanism` section. diff --git a/.ai/scripts/cross-agent-comms/cross-agent-recv b/.ai/scripts/cross-agent-comms/cross-agent-recv deleted file mode 100755 index b67533a..0000000 --- a/.ai/scripts/cross-agent-comms/cross-agent-recv +++ /dev/null @@ -1,250 +0,0 @@ -#!/usr/bin/env python3 -"""Cross-agent message receiver. - -See cross-agent-recv.md for the full contract. Reads one message file and -emits a structured decision the agent acts on: - - process | dedup | query | reject - -Decision exit codes: - 0 = process 1 = dedup 2 = query 3 = reject - -When HALT is set, the script refuses to verify or dedup and leaves the -inbound file in place — resume picks it up via cold-start. -""" - -from __future__ import annotations - -import argparse -import hashlib -import json -import re -import shutil -import subprocess -import sys -from pathlib import Path - -CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms" -HALT_FILE = CONFIG_DIR / "HALT" -EXPECTED_PROTOCOL_VERSION = "5" - -REQUIRED_FRONTMATTER = ["TITLE", "CONVERSATION_ID", "MESSAGE_TYPE", "SEQUENCE", "TIMESTAMP", "PROTOCOL_VERSION"] -VALID_MESSAGE_TYPES = {"request", "progress", "query", "pushback", "complete", "release", "escalate"} - -DEC_PROCESS = "process" -DEC_DEDUP = "dedup" -DEC_QUERY = "query" -DEC_REJECT = "reject" - -EXIT_FOR_DECISION = { - DEC_PROCESS: 0, - DEC_DEDUP: 1, - DEC_QUERY: 2, - DEC_REJECT: 3, -} - -EXIT_HALT = 5 - - -def err(msg: str) -> None: - print(msg, file=sys.stderr) - - -def check_halt() -> None: - if HALT_FILE.exists(): - try: - reason = HALT_FILE.read_text().strip() - except OSError: - err("halt active (HALT file present but unreadable; treated as halted)") - sys.exit(EXIT_HALT) - msg = "halt active; leaving inbound message in place (resume will pick up)" - if reason: - msg = f"{msg}: {reason}" - err(msg) - sys.exit(EXIT_HALT) - - -def parse_frontmatter(path: Path) -> dict[str, str]: - try: - text = path.read_text() - except OSError as e: - return {"_parse_error": f"cannot read: {e}"} - fm: dict[str, str] = {} - for line in text.splitlines(): - line = line.rstrip() - if not line: - if fm: - break - continue - m = re.match(r"#\+([A-Z_]+):\s*(.*)", line) - if m: - fm[m.group(1)] = m.group(2).strip() - elif fm: - break - return fm - - -def emit_decision( - decision: str, - reason: str | None, - fm: dict[str, str], - sha256: str | None, - args: argparse.Namespace, -) -> int: - payload = { - "decision": decision, - "reason": reason, - "message_type": fm.get("MESSAGE_TYPE"), - "conversation_id": fm.get("CONVERSATION_ID"), - "sequence": fm.get("SEQUENCE"), - "timestamp": fm.get("TIMESTAMP"), - "sha256": sha256, - } - if args.json: - print(json.dumps(payload, indent=None if args.compact_json else 2)) - else: - print(f"decision: {decision}") - if reason: - print(f"reason: {reason}") - for k in ("message_type", "conversation_id", "sequence", "timestamp"): - v = payload[k] - if v is not None: - print(f"{k}: {v}") - if sha256: - print(f"sha256: {sha256}") - return EXIT_FOR_DECISION[decision] - - -def gpg_verify(message_path: Path, sig_path: Path) -> tuple[bool, str]: - try: - result = subprocess.run( - ["gpg", "--verify", str(sig_path), str(message_path)], - capture_output=True, - text=True, - ) - except FileNotFoundError: - return False, "gpg not installed" - if result.returncode == 0: - return True, "" - return False, result.stderr.strip().splitlines()[-1] if result.stderr.strip() else f"exit {result.returncode}" - - -def sha256_of(path: Path) -> str: - h = hashlib.sha256() - with path.open("rb") as f: - for chunk in iter(lambda: f.read(65536), b""): - h.update(chunk) - return h.hexdigest() - - -def find_dedup_match(message_path: Path, fm: dict[str, str], my_hash: str) -> tuple[str, str | None]: - """Scan the message's directory for same-CONVERSATION_ID/SEQUENCE files. - - Returns (decision, reason) — decision is DEC_DEDUP for an exact-hash match, - or DEC_PROCESS when no match or hash differs (sequence collision is OK). - """ - parent = message_path.parent - conv_id = fm["CONVERSATION_ID"] - sequence = fm["SEQUENCE"] - for sibling in parent.iterdir(): - if sibling == message_path or not sibling.is_file() or sibling.suffix != ".org": - continue - sib_fm = parse_frontmatter(sibling) - if sib_fm.get("CONVERSATION_ID") != conv_id or sib_fm.get("SEQUENCE") != sequence: - continue - # Same conv-id + same sequence — check hash. - if sha256_of(sibling) == my_hash: - return DEC_DEDUP, f"identical retry of {sibling.name}" - return DEC_PROCESS, None - - -def check_requires_tools(fm: dict[str, str]) -> tuple[bool, list[str]]: - """REQUIRES_TOOLS is a comma-separated list of tool names. - - For v5, "tool available" is a heuristic: an executable on PATH whose name - matches the tool slug. MCP availability is currently out of scope (no - portable way to query it from a CLI). - """ - tools_field = fm.get("REQUIRES_TOOLS") - if not tools_field: - return True, [] - tools = [t.strip() for t in tools_field.split(",") if t.strip()] - missing = [t for t in tools if shutil.which(t) is None] - return len(missing) == 0, missing - - -def main() -> int: - parser = argparse.ArgumentParser(description="Receive and decide on a cross-agent message.") - parser.add_argument("message_file", type=Path) - parser.add_argument("--no-verify", action="store_true", help="Skip GPG verification (testing only)") - parser.add_argument("--no-dedup", action="store_true", help="Skip SHA-256 dedup against existing files") - parser.add_argument("--protocol-version", default=EXPECTED_PROTOCOL_VERSION, - help="Override expected protocol version (default: 5)") - parser.add_argument("--json", action="store_true", help="Emit JSON output") - parser.add_argument("--compact-json", action="store_true", help="Compact JSON (no indent)") - args = parser.parse_args() - - check_halt() - - if not args.message_file.is_file(): - err(f"message file not found: {args.message_file}") - return EXIT_FOR_DECISION[DEC_REJECT] - - fm = parse_frontmatter(args.message_file) - if "_parse_error" in fm: - return emit_decision(DEC_REJECT, fm["_parse_error"], {}, None, args) - - # Step 1: frontmatter sanity-check. - missing = [k for k in REQUIRED_FRONTMATTER if k not in fm] - if missing: - return emit_decision( - DEC_REJECT, f"frontmatter missing required fields: {', '.join(missing)}", fm, None, args - ) - if fm["MESSAGE_TYPE"] not in VALID_MESSAGE_TYPES: - return emit_decision( - DEC_REJECT, f"invalid MESSAGE_TYPE: {fm['MESSAGE_TYPE']!r}", fm, None, args - ) - - # Step 2: PROTOCOL_VERSION check. - if fm["PROTOCOL_VERSION"] != args.protocol_version: - return emit_decision( - DEC_QUERY, - f"PROTOCOL_VERSION mismatch: expected {args.protocol_version}, got {fm['PROTOCOL_VERSION']}", - fm, - None, - args, - ) - - # Step 3: GPG verify. - if not args.no_verify: - sig_path = args.message_file.with_suffix(args.message_file.suffix + ".asc") - if not sig_path.is_file(): - return emit_decision(DEC_REJECT, f"signature file missing: {sig_path.name}", fm, None, args) - ok, gpg_err = gpg_verify(args.message_file, sig_path) - if not ok: - return emit_decision(DEC_REJECT, f"gpg verify failed: {gpg_err}", fm, None, args) - - # Step 4: SHA-256 dedup. - my_hash = sha256_of(args.message_file) - if not args.no_dedup: - decision, reason = find_dedup_match(args.message_file, fm, my_hash) - if decision == DEC_DEDUP: - return emit_decision(DEC_DEDUP, reason, fm, my_hash, args) - - # Step 5: REQUIRES_TOOLS check. - ok, missing_tools = check_requires_tools(fm) - if not ok: - return emit_decision( - DEC_QUERY, - f"required tools unavailable: {', '.join(missing_tools)}", - fm, - my_hash, - args, - ) - - # Step 6: process. - return emit_decision(DEC_PROCESS, None, fm, my_hash, args) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/.ai/scripts/cross-agent-comms/cross-agent-recv.md b/.ai/scripts/cross-agent-comms/cross-agent-recv.md deleted file mode 100644 index 247a27a..0000000 --- a/.ai/scripts/cross-agent-comms/cross-agent-recv.md +++ /dev/null @@ -1,218 +0,0 @@ -# cross-agent-recv - -**Purpose.** The canonical receiver-side processor. Reads a single incoming -message file and reports a structured decision the agent acts on: -process / dedup / query / reject. - -The script handles only mechanical checks (frontmatter, signature, dedup, -version, tools). Substance-level decisions like `pushback` ("I disagree with -this request") happen one layer up — after the agent reads the message body -the script returns as `process`-able. - -This is the read-side counterpart to `cross-agent-send`. Together they are the -two halves of the per-message contract. The agent's polling loop calls -`cross-agent-recv` on every new file in `inbox/from-agents/` and dispatches on -the decision. - -Without this script, every receiver implementation re-invents GPG verify + -frontmatter sanity-check + SHA-256 dedup. With it, behavior is consistent -across projects. - -## Usage - -``` -cross-agent-recv <message-file> -``` - -Single positional argument: a `.org` file in `inbox/from-agents/`. The matching -`.asc` signature file must be present alongside it. - -### Flags - -| Flag | Default | Purpose | -|---|---|---| -| `--no-verify` | (verify on) | Skip GPG verification. Testing only. | -| `--no-dedup` | (dedup on) | Skip SHA-256 dedup against existing files. Testing only. | -| `--protocol-version <N>` | 5 | Override the expected protocol version. Useful for testing forward-compatibility checks. | -| `--json` | off | Output decision as JSON for easier parsing by the agent. | - -## Behavior - -Runs the receiver checks in order. First failure determines the decision. - -### Step 1 — Frontmatter sanity-check - -Parse the message's org-mode frontmatter. Required fields: - -- `#+TITLE` -- `#+CONVERSATION_ID` -- `#+MESSAGE_TYPE` (must be one of: `request`, `progress`, `query`, `pushback`, - `complete`, `release`, `escalate`) -- `#+SEQUENCE` (integer) -- `#+TIMESTAMP` (ISO 8601 with explicit offset) -- `#+PROTOCOL_VERSION` (must match the expected version; default 5) - -Any required field missing, malformed, or the protocol version mismatched → -decision = `reject` (frontmatter) or `query` (version mismatch — see below). - -### Step 2 — Protocol-version check - -If `PROTOCOL_VERSION` doesn't match the expected: - -- Decision = `query`. Action: receiver should write a `query` reply asking the - sender to upgrade to the expected protocol version. - -### Step 3 — Signature verification - -Look for `<message-file>.asc` alongside the `.org`. If missing or `gpg ---verify` fails: - -- Decision = `reject` (signature). Surface to user; do not act. - -The `.asc` file MUST be present when the `.org` is — `cross-agent-send` -guarantees this with its strict ordering (`.asc` lands first). If the `.asc` -is missing despite the `.org` being present, the sender violated atomic-write -ordering or the file was tampered with in transit. - -### Step 4 — SHA-256 dedup - -Compute SHA-256 of the message file. Scan the same directory for existing -files matching `CONVERSATION_ID + SEQUENCE`: - -- No match → decision = `process` (new message, dispatch by type). -- Match with **identical** SHA-256 → decision = `dedup` (silent retry; do not - reprocess). -- Match with **different** SHA-256 → decision = `process` (sequence collision - with non-identical content; both are legitimate, ordered by `#+TIMESTAMP`). - -### Step 5 — REQUIRES_TOOLS optional check - -If the message has a `#+REQUIRES_TOOLS` field, verify each named tool/MCP is -available in the receiver's environment. - -- All available → `process`. -- One or more missing → decision = `query`. The agent should write a `query` - reply naming the missing tools, asking the sender to reframe the request to - avoid them. - -### Step 6 — Dispatch decision - -If all checks pass, decision = `process` with the parsed `MESSAGE_TYPE` so the -agent's main loop knows which handler to invoke. - -## Output - -### Default (human-readable) - -``` -$ cross-agent-recv inbox/from-agents/20260427T091015Z-from-homelab-prep-fixup.org -decision: process -message_type: request -conversation_id: prep-fixup -sequence: 6 -sha256: a1b2c3d4... -``` - -### `--json` - -```json -{ - "decision": "process", - "reason": null, - "message_type": "request", - "conversation_id": "prep-fixup", - "sequence": 6, - "timestamp": "2026-04-27T04:11:42-05:00", - "sha256": "a1b2c3d4..." -} -``` - -For decisions other than `process`, `reason` carries a human-readable -explanation: - -```json -{ - "decision": "query", - "reason": "PROTOCOL_VERSION mismatch: expected 5, got 4", - "conversation_id": "prep-fixup", - "sequence": 6 -} -``` - -## Decision exit codes - -| Decision | Exit code | Agent action | -|---|---|---| -| `process` | 0 | Dispatch to the message-type handler | -| `dedup` | 1 | Silent — do nothing further | -| `query` | 2 | Write a `query` reply (see `reason` for what to ask) | -| `reject` | 3 | Surface to user; do not auto-reply | - -The agent reads stdout/JSON to learn the decision; it can also key off exit -code for simpler bash-style dispatching. - -## Failure modes - -| Symptom | Cause | Fix | -|---|---|---| -| `decision: reject (frontmatter)` | Required field missing or malformed | Open the message; fix or surface to user. The sender should not have produced this file. | -| `decision: reject (signature)` | `.asc` missing, GPG verify failed, or signer unknown | Check that `.asc` exists alongside `.org`. If yes, run `gpg --verify <msg>.asc <msg>` manually for diagnostic output. | -| `decision: query (PROTOCOL_VERSION)` | Sender on older/newer protocol | Reply with a `query` asking sender to upgrade. Both sides should align before continuing. | -| `decision: query (REQUIRES_TOOLS)` | Receiver lacks one of the named tools | Reply with a `query` naming the missing tools; sender should reframe to avoid. | -| `decision: dedup` | Already-processed identical retry | No action. The script handled it correctly. | - -## HALT awareness - -Checks `~/.config/cross-agent-comms/HALT` at the start of every invocation. If -HALT exists, exits with code 5 ("halt active; remove -~/.config/cross-agent-comms/HALT to resume") without verifying, deduping, or -returning a decision. - -**The inbound file is left in place** — not moved, not rejected, not -deduped. When HALT clears and polling resumes, the file gets picked up via -the normal cold-start handling (whichever surfaces first: watcher -notification, startup workflow check, or the next agent poll). Reversibility -is preserved. - -If the HALT file exists but is unreadable, fail-closed — treat as if HALT is -set. - -See `cross-agent-halt.md` for the full halt mechanism. - -## Examples - -```bash -# Basic invocation in an agent's polling loop -for msg in inbox/from-agents/*.org; do - decision=$(cross-agent-recv --json "$msg") - case "$(echo "$decision" | jq -r '.decision')" in - process) handle_message "$msg" ;; - dedup) ;; # silent - query) write_query_reply "$msg" "$decision" ;; - reject) surface_to_user "$msg" "$decision" ;; - esac -done - -# Test signature verification only -cross-agent-recv --no-dedup inbox/from-agents/test-msg.org - -# Test against a future protocol version -cross-agent-recv --protocol-version 6 inbox/from-agents/future-msg.org -``` - -## Performance - -The script is fast (single SHA-256 compute, single GPG verify, frontmatter -parse). For typical messages (single-digit KB), runs in well under 100ms. -Dedup-scan is O(N) over files in the directory; if a project's -`inbox/from-agents/` accumulates hundreds of files, archive released -conversations to keep the scan fast. - -## See also - -- `cross-agent-send` — counterpart writer. -- `cross-agent-watch` — fires when a new message arrives; agent then calls - `cross-agent-recv` to process it. -- `cross-agent-status` — pending-message snapshot (uses similar - released-vs-unreleased logic, but doesn't process individual messages). -- `cross-agent-comms.org` — protocol spec, the "what" the script implements. diff --git a/.ai/scripts/cross-agent-comms/cross-agent-resume b/.ai/scripts/cross-agent-comms/cross-agent-resume deleted file mode 100755 index 1fb83bc..0000000 --- a/.ai/scripts/cross-agent-comms/cross-agent-resume +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env python3 -"""Resume cross-agent comms after a halt. - -See cross-agent-resume.md. Removes ~/.config/cross-agent-comms/HALT and -restarts the cross-agent-watch systemd user service. With --tailnet, -propagates the removal to every peer in peers.toml via SSH; reports -per-peer status with non-zero exit on partial resume. - -Per the asymmetry rule: clearing HALT does NOT auto-resume agent polling. -Each session must explicitly re-engage. -""" - -from __future__ import annotations - -import argparse -import subprocess -import sys -import tomllib -from pathlib import Path - -CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms" -HALT_FILE = CONFIG_DIR / "HALT" -PEERS_TOML = CONFIG_DIR / "peers.toml" - -EXIT_OK = 0 -EXIT_PARTIAL = 1 - - -def err(msg: str) -> None: - print(msg, file=sys.stderr) - - -def remove_halt_file() -> bool: - """Returns True if HALT was removed, False if it didn't exist.""" - if HALT_FILE.exists(): - try: - HALT_FILE.unlink() - return True - except OSError as e: - err(f"could not remove HALT: {e}") - return False - return False - - -def start_watcher_service() -> None: - """Best-effort start of the systemd watcher path unit.""" - try: - subprocess.run( - ["systemctl", "--user", "start", "cross-agent-watch.path"], - capture_output=True, text=True, timeout=5, - ) - except (FileNotFoundError, subprocess.TimeoutExpired): - pass - - -def load_peers() -> dict: - if not PEERS_TOML.exists(): - return {} - try: - return tomllib.loads(PEERS_TOML.read_text()) - except (tomllib.TOMLDecodeError, OSError) as e: - err(f"cannot parse peers.toml: {e}") - return {} - - -def ssh_remove_halt(host: str, ssh_user: str | None) -> tuple[bool, str]: - target = f"{ssh_user}@{host}" if ssh_user else host - remote_cmd = "rm -f ~/.config/cross-agent-comms/HALT" - try: - result = subprocess.run( - ["ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes", target, remote_cmd], - capture_output=True, text=True, timeout=10, - ) - except (FileNotFoundError, subprocess.TimeoutExpired): - return False, "ssh unavailable or timed out" - if result.returncode == 0: - return True, "HALT cleared" - return False, (result.stderr.strip().splitlines() or [f"exit {result.returncode}"])[-1] - - -def print_re_engage_instructions() -> None: - print() - print("Halt cleared. Watcher restarted.") - print() - print("Agent polling does NOT auto-resume — per the failsafe asymmetry rule,") - print("agents stay paused until you explicitly re-engage each session.") - print("Open the relevant Claude session and tell the agent to resume polling") - print("for its conversation.") - - -def main() -> int: - parser = argparse.ArgumentParser(description="Resume cross-agent comms after a halt.") - parser.add_argument("--tailnet", action="store_true", - help="Propagate HALT removal to every peer in peers.toml") - args = parser.parse_args() - - removed = remove_halt_file() - start_watcher_service() - if removed: - print("Resuming locally ✓ (HALT cleared)") - else: - print("Resuming locally ✓ (no HALT was active)") - - if not args.tailnet: - print_re_engage_instructions() - return EXIT_OK - - peers = load_peers().get("peers", {}) - if not peers: - print() - print("No peers configured in peers.toml — local-only resume complete.") - print_re_engage_instructions() - return EXIT_OK - - print() - successes = 1 - failures = [] - for name, cfg in sorted(peers.items()): - host = cfg.get("host", name) - ssh_user = cfg.get("ssh_user") - ok, detail = ssh_remove_halt(host, ssh_user) - marker = "✓" if ok else "✗" - print(f"Resuming {host:<27} {marker} ({detail})") - if ok: - successes += 1 - else: - failures.append(f"{name} ({host}): {detail}") - - print() - total = len(peers) + 1 - if failures: - print(f"PARTIAL RESUME: {successes}/{total} machines cleared.") - for f in failures: - print(f" - {f}") - print("Resolve the failures or manually clear HALT on each machine.") - print_re_engage_instructions() - return EXIT_PARTIAL - - print(f"Resume complete across {total} machine(s).") - print_re_engage_instructions() - return EXIT_OK - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/.ai/scripts/cross-agent-comms/cross-agent-resume.md b/.ai/scripts/cross-agent-comms/cross-agent-resume.md deleted file mode 100644 index 8aa8357..0000000 --- a/.ai/scripts/cross-agent-comms/cross-agent-resume.md +++ /dev/null @@ -1,117 +0,0 @@ -# cross-agent-resume - -**Purpose.** Clear the HALT file and restart the watcher service. Counterpart -to `cross-agent-halt`. Resuming agent polling is **explicit per-session** — -this script doesn't auto-revive halted polling loops; you tell each session -to re-engage. - -## Usage - -``` -cross-agent-resume [--tailnet] -``` - -### Flags - -| Flag | Default | Purpose | -|---|---|---| -| `--tailnet` | local only | Clear HALT on every peer in `peers.toml` via SSH over Tailscale. | - -## Behavior - -### Local resume (default) - -1. Remove the HALT file: `rm -f ~/.config/cross-agent-comms/HALT`. (Use `-f` - so a missing file isn't an error — running resume when not halted is safe.) -2. Restart the watcher service: `systemctl --user start cross-agent-watch.path`. -3. Print a summary: - ``` - ✓ HALT file removed - ✓ Watcher service started (cross-agent-watch.path) - - cross-agent-send and cross-agent-recv will accept new operations. - - Inbound messages held during halt will be picked up by the watcher. - - Agent polling does NOT auto-resume. To re-engage polling in a paused - session, open that Claude session and tell the agent to resume. - ``` -4. Exit 0. - -### Cross-tailnet resume (`--tailnet`) - -1. Apply local resume steps 1-2 first. -2. Read `peers.toml` for the list of remote machines. -3. For each peer, SSH: - ``` - ssh <user>@<host> "rm -f ~/.config/cross-agent-comms/HALT && \ - systemctl --user start cross-agent-watch.path" - ``` -4. Track per-peer success/failure: - ``` - Resuming velox.local ✓ (HALT cleared, watcher started) - Resuming bastion.local ✗ (ssh exit 255: no route to host) - Resuming locally ✓ - - PARTIAL RESUME: 2/3 machines resumed. bastion.local still halted. - ``` -5. Exit 0 if all peers resumed; exit 1 on any failure. - -## Why agent polling doesn't auto-resume - -Two reasons the asymmetry is deliberate: - -1. *Auto-resume could silently invert intentional kills.* If you halted - because a session was misbehaving, removing HALT shouldn't quietly revive - that session's polling. You re-engage explicitly so you're aware of which - sessions came back online. - -2. *You may want to inspect before resuming.* After a halt, you might want to - read pending messages, fix configuration, or kill a particular Claude - session entirely. Per-session resume forces that pause. - -## Re-engaging polling in a Claude session - -After `cross-agent-resume`, open the relevant Claude session and say something -like: - -``` -HALT is cleared; resume polling. -``` - -The agent will check the HALT file (now absent), re-create its polling -schedule, and continue the in-flight conversation from wherever it left off. -The conversation file is intact; the receiver will pick up any new messages -that arrived during the halt window. - -## Failure modes - -| Symptom | Cause | Fix | -|---|---|---| -| HALT file doesn't exist | Already resumed (or never halted) | OK — `-f` makes this a no-op. | -| `systemctl --user start` fails | Watcher service not installed | Install per `cross-agent-watch.md`'s systemd recipe. | -| `--tailnet` resumes some peers but not others | Same as halt: peer unreachable | Per-peer status reported; resolve manually for unreachable peers. | -| Permission denied removing HALT file | File owned by another user | Check ownership; HALT files should be owned by the running user. | - -## Examples - -```bash -# Local resume after a halt -cross-agent-resume - -# Resume all tailnet peers + local -cross-agent-resume --tailnet -``` - -## Recovery flow - -After a halt: - -1. Investigate whatever caused the halt (runaway loop, bad config, etc.). -2. Fix the underlying issue. -3. Run `cross-agent-resume`. -4. Open each Claude session that was polling and tell its agent to re-engage. -5. Confirm operation with `cross-agent-status`. - -## See also - -- `cross-agent-halt` — counterpart that creates the HALT file. -- `cross-agent-status` — verify HALT cleared and see pending messages. -- `cross-agent-comms.org` — protocol spec, `* Halt mechanism` section. diff --git a/.ai/scripts/cross-agent-comms/cross-agent-send b/.ai/scripts/cross-agent-comms/cross-agent-send deleted file mode 100755 index 68c010a..0000000 --- a/.ai/scripts/cross-agent-comms/cross-agent-send +++ /dev/null @@ -1,356 +0,0 @@ -#!/usr/bin/env python3 -"""Cross-agent message sender. - -See cross-agent-send.md for the full contract. Briefly: - -- Destination as <machine>.<project>; resolved via peers.toml. -- Same-machine: cp to receiver's inbox/from-agents/ with atomic rename. -- Cross-machine: rsync over SSH (typically Tailscale) with retry+backoff. -- GPG-signs by default; .asc renames before .org so receivers never see - a .org without its sibling signature. -- Generates the canonical filename; user's input filename is ignored. -- Honors the HALT file: refuses to send and exits with code 5 when set. -""" - -from __future__ import annotations - -import argparse -import datetime as _dt -import json -import os -import re -import shutil -import socket -import subprocess -import sys -import tempfile -import time -import tomllib -from pathlib import Path - -CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms" -PEERS_TOML = CONFIG_DIR / "peers.toml" -HALT_FILE = CONFIG_DIR / "HALT" -STATE_DIR = Path.home() / ".local" / "state" / "cross-agent-comms" -FAILED_SENDS_DIR = STATE_DIR / "failed-sends" - -EXIT_OK = 0 -EXIT_GENERAL = 1 -EXIT_DEST_NOT_FOUND = 2 -EXIT_CROSS_MACHINE_FAILED = 3 -EXIT_FRONTMATTER = 4 -EXIT_HALT = 5 - -REQUIRED_FRONTMATTER = ["CONVERSATION_ID", "MESSAGE_TYPE", "SEQUENCE", "TIMESTAMP", "PROTOCOL_VERSION"] -VALID_MESSAGE_TYPES = {"request", "progress", "query", "pushback", "complete", "release", "escalate"} - - -def err(msg: str) -> None: - print(msg, file=sys.stderr) - - -def check_halt() -> None: - """Exit with code 5 if HALT file exists.""" - if HALT_FILE.exists(): - try: - reason = HALT_FILE.read_text().strip() - except OSError: - # Fail-closed on unreadable HALT. - err("halt active (HALT file present but unreadable; treated as halted)") - err(f"remove {HALT_FILE} to resume") - sys.exit(EXIT_HALT) - msg = "halt active" - if reason: - msg += f": {reason}" - err(msg) - err(f"remove {HALT_FILE} to resume") - sys.exit(EXIT_HALT) - - -def parse_frontmatter(path: Path) -> dict[str, str]: - """Extract org-mode #+KEY: value frontmatter from the top of the file.""" - try: - text = path.read_text() - except OSError as e: - err(f"cannot read message file: {e}") - sys.exit(EXIT_GENERAL) - - frontmatter: dict[str, str] = {} - for line in text.splitlines(): - line = line.rstrip() - if not line: - # Blank line ends the frontmatter block. - if frontmatter: - break - continue - m = re.match(r"#\+([A-Z_]+):\s*(.*)", line) - if m: - frontmatter[m.group(1)] = m.group(2).strip() - else: - # First non-frontmatter line ends parsing. - if frontmatter: - break - return frontmatter - - -def validate_frontmatter(fm: dict[str, str]) -> None: - missing = [k for k in REQUIRED_FRONTMATTER if k not in fm] - if missing: - err(f"frontmatter missing required fields: {', '.join(missing)}") - sys.exit(EXIT_FRONTMATTER) - if fm["MESSAGE_TYPE"] not in VALID_MESSAGE_TYPES: - err(f"invalid MESSAGE_TYPE: {fm['MESSAGE_TYPE']!r}; expected one of {sorted(VALID_MESSAGE_TYPES)}") - sys.exit(EXIT_FRONTMATTER) - try: - int(fm["SEQUENCE"]) - except ValueError: - err(f"SEQUENCE must be an integer; got {fm['SEQUENCE']!r}") - sys.exit(EXIT_FRONTMATTER) - - -def load_peers() -> dict: - if not PEERS_TOML.exists(): - return {} - try: - return tomllib.loads(PEERS_TOML.read_text()) - except (tomllib.TOMLDecodeError, OSError) as e: - err(f"cannot read {PEERS_TOML}: {e}") - sys.exit(EXIT_GENERAL) - - -def resolve_destination(dest: str, peers: dict) -> tuple[str, str, str | None, str | None]: - """Resolve <machine>.<project> to (machine, project, host, ssh_user). - - host is None for same-machine destinations. - """ - if "." not in dest: - err(f"destination must be <machine>.<project>; got {dest!r}") - sys.exit(EXIT_DEST_NOT_FOUND) - machine, project = dest.split(".", 1) - - local_hostname = socket.gethostname().split(".")[0] - is_local = machine == local_hostname or machine == "local" - - host = None - ssh_user = None - if not is_local: - peer_cfg = peers.get("peers", {}).get(machine) - if peer_cfg is None: - available = list(peers.get("peers", {}).keys()) - err(f"destination not found in peers.toml; available peers: {available or '(none)'}") - sys.exit(EXIT_DEST_NOT_FOUND) - host = peer_cfg.get("host", machine) - ssh_user = peer_cfg.get("ssh_user", os.environ.get("USER")) - - return machine, project, host, ssh_user - - -def resolve_inbox_path(project: str, peers: dict) -> str: - """Inbox path on the receiver. Defaults to ~/projects/<project>/inbox/from-agents.""" - proj_cfg = peers.get("projects", {}).get(project) - if proj_cfg and "inbox_path" in proj_cfg: - return os.path.expanduser(proj_cfg["inbox_path"]) - return f"~/projects/{project}/inbox/from-agents" - - -def derive_sender_project() -> str: - """Walk up from CWD looking for ~/projects/<name>/. - - Returns the project name if found; falls back to the basename of CWD. - """ - cwd = Path.cwd().resolve() - projects_root = (Path.home() / "projects").resolve() - try: - rel = cwd.relative_to(projects_root) - return rel.parts[0] - except ValueError: - return cwd.name - - -def generate_canonical_filename(sender: str, conv_id: str) -> str: - """YYYYMMDDTHHMMSSZ-from-<sender>-<conv-id>.org""" - now = _dt.datetime.now(_dt.timezone.utc) - timestamp = now.strftime("%Y%m%dT%H%M%SZ") - return f"{timestamp}-from-{sender}-{conv_id}.org" - - -def sign(message_path: Path, sig_path: Path, key: str | None) -> None: - """gpg --detach-sign --armor --output <sig> [--local-user <key>] <message>""" - cmd = ["gpg", "--detach-sign", "--armor", "--yes", "--output", str(sig_path)] - if key: - cmd.extend(["--local-user", key]) - cmd.append(str(message_path)) - try: - result = subprocess.run(cmd, capture_output=True, text=True) - except FileNotFoundError: - err("gpg not found; install gnupg or use --no-sign for testing") - sys.exit(EXIT_GENERAL) - if result.returncode != 0: - err(f"signing failed: {result.stderr.strip()}") - sys.exit(EXIT_GENERAL) - - -def same_machine_deliver(message_path: Path, sig_path: Path | None, target_dir: Path, canonical_name: str) -> None: - """Atomic-write delivery: stage .asc, mv to final, then stage .org, mv to final.""" - target_dir.mkdir(parents=True, exist_ok=True) - final_msg = target_dir / canonical_name - final_sig = target_dir / f"{canonical_name}.asc" - - if sig_path is not None: - # Stage .asc first, mv to final, THEN stage .org and mv to final. - with tempfile.NamedTemporaryFile( - mode="wb", dir=target_dir, prefix=f".tmp.{canonical_name}.asc.", delete=False - ) as tmp: - tmp.write(sig_path.read_bytes()) - tmp_sig_path = Path(tmp.name) - os.replace(tmp_sig_path, final_sig) - - # Re-check HALT between .asc and .org per the layered-checks rule. - check_halt() - - with tempfile.NamedTemporaryFile( - mode="wb", dir=target_dir, prefix=f".tmp.{canonical_name}.", delete=False - ) as tmp: - tmp.write(message_path.read_bytes()) - tmp_msg_path = Path(tmp.name) - os.replace(tmp_msg_path, final_msg) - - -def cross_machine_deliver( - message_path: Path, - sig_path: Path | None, - canonical_name: str, - host: str, - ssh_user: str, - inbox_path: str, - retries: int, -) -> bool: - """rsync push the .asc first (if signed), re-check HALT, then push the .org. - - Returns True on success, False on persistent failure (after retries). - """ - # Stage local copies with the canonical name so rsync sets the right - # destination filename. - with tempfile.TemporaryDirectory(prefix="cross-agent-send-") as staging: - staging_dir = Path(staging) - local_msg = staging_dir / canonical_name - local_msg.write_bytes(message_path.read_bytes()) - local_sig = None - if sig_path is not None: - local_sig = staging_dir / f"{canonical_name}.asc" - local_sig.write_bytes(sig_path.read_bytes()) - - backoffs = [5, 30, 120] - # Step 1: push .asc first if signed. - if local_sig is not None: - if not _rsync_with_retries(local_sig, host, ssh_user, inbox_path, retries, backoffs): - return False - - # Re-check HALT between .asc and .org per the layered-checks rule. - check_halt() - - # Step 2: push .org. - if not _rsync_with_retries(local_msg, host, ssh_user, inbox_path, retries, backoffs): - return False - - return True - - -def _rsync_with_retries( - src: Path, host: str, ssh_user: str, inbox_path: str, retries: int, backoffs: list[int] -) -> bool: - target = f"{ssh_user}@{host}:{inbox_path}/" - last_err = "" - for attempt in range(retries + 1): - if attempt > 0: - check_halt() - wait = backoffs[min(attempt - 1, len(backoffs) - 1)] - err(f"rsync attempt {attempt} failed: {last_err}; retrying in {wait}s") - time.sleep(wait) - try: - result = subprocess.run( - ["rsync", "-a", str(src), target], - capture_output=True, - text=True, - ) - except FileNotFoundError: - err("rsync not found; install rsync") - return False - if result.returncode == 0: - return True - last_err = result.stderr.strip() or f"exit {result.returncode}" - err(f"rsync failed after {retries + 1} attempts: {last_err}") - return False - - -def write_failed_send_marker(dest: str, message_path: Path, error: str, retry_log: list[str]) -> None: - FAILED_SENDS_DIR.mkdir(parents=True, exist_ok=True) - timestamp = _dt.datetime.now(_dt.timezone.utc).strftime("%Y%m%dT%H%M%SZ") - safe_basename = re.sub(r"[^A-Za-z0-9._-]", "_", message_path.name) - marker = FAILED_SENDS_DIR / f"{timestamp}-{dest.replace('.', '-')}-{safe_basename}.json" - marker.write_text(json.dumps( - { - "timestamp": timestamp, - "destination": dest, - "message_path": str(message_path), - "error": error, - "retry_log": retry_log, - }, - indent=2, - )) - err(f"marker written: {marker}") - - -def main() -> int: - parser = argparse.ArgumentParser(description="Send a cross-agent message.") - parser.add_argument("destination", help="Destination as <machine>.<project>") - parser.add_argument("message_file", type=Path, help="Path to the message body file") - parser.add_argument("--no-sign", action="store_true", help="Skip GPG signing (testing only)") - parser.add_argument("--retries", type=int, default=3, help="Retry count for cross-machine sends") - parser.add_argument("--key", help="GPG key id to sign with (default: user's primary)") - args = parser.parse_args() - - check_halt() - - if not args.message_file.is_file(): - err(f"message file not found: {args.message_file}") - return EXIT_GENERAL - - fm = parse_frontmatter(args.message_file) - validate_frontmatter(fm) - - peers = load_peers() - machine, project, host, ssh_user = resolve_destination(args.destination, peers) - inbox_path = resolve_inbox_path(project, peers) - - sender = derive_sender_project() - canonical_name = generate_canonical_filename(sender, fm["CONVERSATION_ID"]) - - sig_tmp = None - if not args.no_sign: - sig_tmp = args.message_file.with_suffix(args.message_file.suffix + ".asc.tmp") - sign(args.message_file, sig_tmp, args.key) - - try: - if host is None: - # Same-machine delivery. - target_dir = Path(os.path.expanduser(inbox_path)) - same_machine_deliver(args.message_file, sig_tmp, target_dir, canonical_name) - print(f"sent: {target_dir}/{canonical_name}") - return EXIT_OK - else: - ok = cross_machine_deliver( - args.message_file, sig_tmp, canonical_name, host, ssh_user, inbox_path, args.retries - ) - if ok: - print(f"sent: {ssh_user}@{host}:{inbox_path}/{canonical_name}") - return EXIT_OK - write_failed_send_marker(args.destination, args.message_file, "rsync failed after retries", []) - return EXIT_CROSS_MACHINE_FAILED - finally: - if sig_tmp is not None and sig_tmp.exists(): - sig_tmp.unlink() - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/.ai/scripts/cross-agent-comms/cross-agent-send.md b/.ai/scripts/cross-agent-comms/cross-agent-send.md deleted file mode 100644 index 29bfb24..0000000 --- a/.ai/scripts/cross-agent-comms/cross-agent-send.md +++ /dev/null @@ -1,199 +0,0 @@ -# cross-agent-send - -**Purpose.** Send a cross-agent message file to a specific destination. Handles -peer-config lookup, GPG signing, atomic write (same-machine) or rsync push -(cross-machine), retry-with-backoff, and failure surfacing. - -This is the canonical writer. The protocol spec defers all writer mechanics to -this script. - -## Usage - -``` -cross-agent-send <destination> <message-file> [--no-sign] [--retries N] -``` - -### Positional arguments - -| Position | Meaning | Example | -|---|---|---| -| 1 | Destination as `<machine>.<project>` | `homelab.career`, `velox.career` | -| 2 | Message file (already-formatted `.org`) | `/tmp/my-message.org` | - -### Flags - -| Flag | Default | Purpose | -|---|---|---| -| `--no-sign` | (signing on) | Skip GPG signing. Use only for testing; receivers reject unsigned messages by default. | -| `--retries N` | 3 | Override retry count for cross-machine sends. | -| `--key <key-id>` | (user's primary key) | GPG key to sign with. Resolution order: `--key` flag, `GPG_USER` env, `git config user.signingkey`, then the first secret key in the keyring. | - -## Behavior - -### Filename generation (script-controlled) - -The script generates the canonical destination filename from the message's -frontmatter and sender context. The user's input filename is ignored — pass any -path, the script names the destination correctly: - -``` -<UTC-now>T<HHMMSS>Z-from-<sender-slug>-<short-conv-id>.org -``` - -`<sender-slug>` comes from the sender machine's project name (config or -hostname-based). `<short-conv-id>` is read from the message's -`#+CONVERSATION_ID` frontmatter field. UTC timestamp is generated at send time. - -The script also performs the **sender-side max-seen scan** before writing: it -reads the receiver's `from-agents/` directory, finds the highest existing -sequence in this conversation across both sender prefixes, and (best-effort) -suggests `max(seen) + 1` for the next sequence. The user/agent is responsible -for setting `#+SEQUENCE` in the message body; the script only advises. - -### Same-machine destinations - -Resolved when the destination's machine matches the current hostname (or is -not in `peers.toml` as a remote). Steps: - -1. Parse frontmatter; extract `CONVERSATION_ID` and `TIMESTAMP`. Validate per - the *Validation before send* section below. -2. Generate canonical filename per *Filename generation* above. -3. Sign: `gpg --detach-sign --armor --output <canonical>.asc --local-user <key> <input>`. -4. Compute target: read `peers.toml` for the project's `inbox_path`. If - missing, fall back to `~/projects/<project>/inbox/from-agents/`. -5. **Atomic write with strict ordering** (signature must precede message): - - Stage `.asc`: write to `<target>/.tmp.XXXXXX-<canonical>.asc`, - then `mv` to `<target>/<canonical>.asc`. - - **Then** stage `.org`: write to `<target>/.tmp.XXXXXX-<canonical>`, - then `mv` to `<target>/<canonical>`. - - Receivers only act on `.org` files; staging the `.asc` first guarantees - the signature is present when the receiver opens the message. Out-of-order - would race: receiver could read the `.org` before the `.asc` lands and - fail GPG verify even though the sender did everything right. -6. Exit 0 on success. Exit non-zero if any step fails. - -### Cross-machine destinations - -Steps: - -1. Parse + generate canonical filename, as same-machine steps 1-2. -2. Sign locally to `<input>.asc` (or a tmp staging file). -3. rsync push **with the same .asc-first ordering**: - - `rsync -a <input>.asc <ssh-user>@<host>:<inbox_path>/<canonical>.asc` - - **Then** `rsync -a <input> <ssh-user>@<host>:<inbox_path>/<canonical>` - rsync writes to a hidden temp file then renames atomically by default - (`--inplace` would defeat this; do not pass it). -4. Retry on failure: 5s, 30s, 120s backoff, then surface error. -5. On persistent failure: write a marker file to - `~/.local/state/cross-agent-comms/failed-sends/<timestamp>-<dest>-<canonical>.json` - containing the destination, message path, error, and retry log. Exit non-zero. - -### Validation before send - -- Destination resolves via `peers.toml` (or local fallback). If neither, exit - immediately with `destination not found in peers.toml; available: <list>`. -- Message file must be readable, non-empty, and have valid org-mode frontmatter - with **all** of the following required fields: - - `#+TITLE` - - `#+CONVERSATION_ID` - - `#+MESSAGE_TYPE` - - `#+SEQUENCE` - - `#+TIMESTAMP` - - `#+PROTOCOL_VERSION` (must equal `5` for v5) - - If any required field is missing or malformed, exit immediately with a parse - error naming the offending field. - -- Optional fields the script recognizes and passes through (no special - handling beyond preservation): - - `#+REQUIRES_TOOLS` — comma-separated tool/MCP slugs the receiver needs. - - `#+RELEASE_STATUS` — valid only on `MESSAGE_TYPE: release`. Values per - spec: `complete`, `cancelled`, `withdrawn-after-pushback`, - `abandoned-after-escalation`. - - `#+WORKFLOW_VERSION` — sender's version of the cross-agent-comms workflow - file. Currently advisory; receiver may warn on mismatch but does not block. - -## Configuration - -Reads `~/.config/cross-agent-comms/peers.toml` for peer routing: - -```toml -[peers.velox] -host = "velox.local" -ssh_user = "cjennings" - -# Optional: per-project inbox-path overrides for non-default layouts. -[projects.work] -inbox_path = "~/projects/work/inbox/from-agents" - -[projects.homelab] -inbox_path = "~/projects/homelab/inbox/from-agents" -``` - -If a project entry is omitted, defaults to `~/projects/<project>/inbox/from-agents`. - -## Failure modes - -| Symptom | Cause | Fix | -|---|---|---| -| `destination not found in peers.toml` | Misspelled destination, or peer not configured | Run `cross-agent-discover` to see available destinations. | -| `signing failed: no secret key` | GPG key missing or not in keyring | `gpg --list-secret-keys` to confirm. Override with `--key <id>`. | -| `signing failed: pinentry timed out` | Headless session, GUI pinentry unavailable | Confirm `pinentry-program` in `gpg-agent.conf` matches available pinentry. Per protocols.org, GUI pinentry works from Claude Code. | -| `rsync exit 255` | SSH unreachable | `cross-agent-discover --peer <name>` to confirm reachability. | -| `rsync exit 23` | Permission denied at destination | Check destination directory perms (`chmod 700`) and ownership. | -| Marker file written to `failed-sends/` | Persistent cross-machine failure | Inspect the marker's `error` field. After fixing, retry: `cross-agent-send <dest> <msg>` (the marker is for visibility; it does not auto-retry). | -| Receiver complains "unsigned message" | `--no-sign` was used in production | Don't use `--no-sign` outside testing. | - -## HALT awareness - -Checks `~/.config/cross-agent-comms/HALT` at the start of every send AND -between the `.asc` and `.org` rsync calls AND between each retry iteration. -On HALT exists, exits with code 5 ("halt active; remove -~/.config/cross-agent-comms/HALT to resume") without writing or pushing -further. - -Worst case: one in-flight send completes its current rsync step within a few -seconds before halt kicks in for the next step. New sends are blocked -immediately. No `pkill` needed — the per-iteration check stops things -naturally. - -If the HALT file exists but is unreadable (permissions wrong), fail-closed — -treat as if HALT is set. Safer than fail-open. - -See `cross-agent-halt.md` for the full halt mechanism. - -## Examples - -```bash -# Same-machine send -cross-agent-send homelab.career /tmp/my-message.org - -# Cross-machine send via Tailscale -cross-agent-send velox.career /tmp/my-message.org - -# Test send without signing (receiver will reject) -cross-agent-send homelab.career /tmp/test.org --no-sign - -# Override retry count for a flaky link -cross-agent-send velox.career /tmp/my-message.org --retries 10 - -# After a delivery failure, inspect the marker -cat ~/.local/state/cross-agent-comms/failed-sends/*.json | jq . -``` - -## Exit codes - -| Code | Meaning | -|---|---| -| 0 | Sent successfully. | -| 1 | General error (parse failure, signing failure, etc.). | -| 2 | Destination not found in peers.toml. | -| 3 | Cross-machine delivery failed after retries. Marker file written. | -| 4 | Frontmatter validation failed. | - -## See also - -- `cross-agent-discover` — validate destinations before sending. -- `cross-agent-watch` — receiver-side notification. -- `cross-agent-status` — see what's queued. -- `cross-agent-comms.org` — protocol spec, the "what" the script implements. diff --git a/.ai/scripts/cross-agent-comms/cross-agent-status b/.ai/scripts/cross-agent-comms/cross-agent-status deleted file mode 100755 index 4eee75b..0000000 --- a/.ai/scripts/cross-agent-comms/cross-agent-status +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env python3 -"""Point-in-time snapshot of pending cross-agent messages across local projects. - -See cross-agent-status.md. Pending = messages in inbox/from-agents/ whose -CONVERSATION_ID has no MESSAGE_TYPE: release at a later #+TIMESTAMP. - -HALT: prints a prominent banner before normal output, but continues to enumerate. -""" - -from __future__ import annotations - -import argparse -import glob -import json -import os -import re -import sys -from pathlib import Path - -CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms" -HALT_FILE = CONFIG_DIR / "HALT" -DEFAULT_GLOB = str(Path.home() / "projects" / "*" / "inbox" / "from-agents") + "/" - - -def parse_frontmatter(path: Path) -> dict[str, str]: - try: - text = path.read_text() - except OSError: - return {} - fm: dict[str, str] = {} - for line in text.splitlines(): - line = line.rstrip() - if not line: - if fm: - break - continue - m = re.match(r"#\+([A-Z_]+):\s*(.*)", line) - if m: - fm[m.group(1)] = m.group(2).strip() - elif fm: - break - return fm - - -def project_name_from_path(path: str) -> str: - """Walk up from path to find ~/projects/<name>/...""" - home = str(Path.home()) - parts = Path(path).parts - for i, part in enumerate(parts): - if part == "projects" and i + 1 < len(parts) and str(Path(*parts[: i + 1])) == os.path.join(home, "projects"): - return parts[i + 1] - # Fallback: dir three levels up from the .org file (project/inbox/from-agents/file.org) - return Path(path).parent.parent.parent.name - - -def scan_project(inbox_dir: Path) -> tuple[int, str | None, int | None]: - """Return (pending_count, most_recent_filename_or_None, most_recent_age_seconds_or_None).""" - if not inbox_dir.is_dir(): - return 0, None, None - - # Group .org files by CONVERSATION_ID, also collect release timestamps per conv. - org_files = sorted(inbox_dir.glob("*.org")) - if not org_files: - return 0, None, None - - by_conv: dict[str, list[tuple[str, str, Path]]] = {} # conv_id -> [(timestamp, msg_type, path)] - for f in org_files: - fm = parse_frontmatter(f) - conv = fm.get("CONVERSATION_ID") - ts = fm.get("TIMESTAMP") - mt = fm.get("MESSAGE_TYPE") - if not conv or not ts or not mt: - # Malformed file: count as pending under conv "_unparseable". - by_conv.setdefault("_unparseable", []).append(("", "request", f)) - continue - by_conv.setdefault(conv, []).append((ts, mt, f)) - - pending_files: list[Path] = [] - for conv, entries in by_conv.items(): - entries.sort(key=lambda e: e[0]) - # Find the latest release timestamp. - release_ts = None - for ts, mt, _f in entries: - if mt == "release" and (release_ts is None or ts > release_ts): - release_ts = ts - for ts, mt, f in entries: - if mt == "release": - continue - if release_ts is not None and ts <= release_ts: - continue - pending_files.append(f) - - if not pending_files: - return 0, None, None - - # Most-recent by mtime (proxy for arrival order). - most_recent = max(pending_files, key=lambda p: p.stat().st_mtime) - import time - age = int(time.time() - most_recent.stat().st_mtime) - return len(pending_files), most_recent.name, age - - -def fmt_age(seconds: int | None) -> str: - if seconds is None: - return "—" - if seconds < 60: - return f"{seconds}s ago" - if seconds < 3600: - return f"{seconds // 60} min ago" - if seconds < 86400: - return f"{seconds // 3600} hr ago" - return f"{seconds // 86400} day(s) ago" - - -def render_banner_if_halt() -> None: - if not HALT_FILE.exists(): - return - try: - reason = HALT_FILE.read_text().strip() - except OSError: - reason = "(HALT file unreadable; treated as halted)" - print("⚠ HALT ACTIVE — cross-agent comms paused") - if reason: - print(f" reason: {reason}") - print(f" clear: rm {HALT_FILE} (or: cross-agent-resume)") - print() - - -def main() -> int: - parser = argparse.ArgumentParser(description="Snapshot of pending cross-agent messages across local projects.") - parser.add_argument("--json", action="store_true", help="Emit JSON output") - parser.add_argument("--projects-glob", default=DEFAULT_GLOB, - help=f"Glob for project from-agents dirs (default: {DEFAULT_GLOB})") - args = parser.parse_args() - - render_banner_if_halt() - - matched = sorted(glob.glob(args.projects_glob)) - rows = [] - for path in matched: - inbox = Path(path) - if not inbox.is_dir(): - continue - proj = project_name_from_path(path) - count, most_recent, age = scan_project(inbox) - rows.append({ - "name": proj, - "pending_count": count, - "most_recent": ( - {"filename": most_recent, "age_seconds": age} - if most_recent else None - ), - }) - - # Sort: pending-first, then alphabetical by name. - rows.sort(key=lambda r: (-r["pending_count"], r["name"])) - - if args.json: - import datetime as _dt - payload = { - "scanned_at": _dt.datetime.now(_dt.timezone.utc).isoformat(), - "halt_active": HALT_FILE.exists(), - "projects": rows, - } - print(json.dumps(payload, indent=2)) - return 0 - - if not rows: - print("No projects with inbox/from-agents/ found — 0 pending.") - return 0 - - # Human-readable table. - name_w = max(len("project"), max(len(r["name"]) for r in rows)) - print(f"{'project':<{name_w}} pending most-recent") - for r in rows: - most_recent_str = "—" - if r["most_recent"]: - most_recent_str = f"{r['most_recent']['filename']} ({fmt_age(r['most_recent']['age_seconds'])})" - print(f"{r['name']:<{name_w}} {r['pending_count']:<7} {most_recent_str}") - - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/.ai/scripts/cross-agent-comms/cross-agent-status.md b/.ai/scripts/cross-agent-comms/cross-agent-status.md deleted file mode 100644 index 070330c..0000000 --- a/.ai/scripts/cross-agent-comms/cross-agent-status.md +++ /dev/null @@ -1,139 +0,0 @@ -# cross-agent-status - -**Purpose.** Point-in-time snapshot of pending cross-agent messages across -every project on this machine. Run from any terminal. No daemon required. - -This is the user-pull layer of the cold-start story — `cross-agent-watch` -pushes notifications, `cross-agent-status` lets the user query. - -## Usage - -``` -cross-agent-status [--json] [--projects-glob <glob>] -``` - -No args required. - -### Flags - -| Flag | Default | Purpose | -|---|---|---| -| `--json` | off (table) | Output as JSON for scripting. | -| `--projects-glob <glob>` | `~/projects/*/inbox/from-agents/` | Override which directories to scan. | - -## Output - -### Default (table) - -``` -$ cross-agent-status -project pending most-recent -career 0 — -claude-templates 0 — -clipper 0 — -homelab 1 20260427T085611Z-from-career-question.org (3 min ago) -finances 0 — -... (other 9 projects) -``` - -Sort: pending-first, then alphabetical. - -### `--json` - -```json -{ - "scanned_at": "2026-04-27T04:13:00-05:00", - "projects": [ - { - "name": "homelab", - "pending_count": 1, - "most_recent": { - "filename": "20260427T085611Z-from-career-question.org", - "age_seconds": 180 - } - }, - ... - ] -} -``` - -## Pending semantics - -A message is "pending" if it sits in `inbox/from-agents/` AND no -`MESSAGE_TYPE: release` exists for the same `CONVERSATION_ID` after it. - -Concretely: - -1. Scan each project's `inbox/from-agents/` for `.org` files. -2. Group by `CONVERSATION_ID` from frontmatter. -3. For each conversation, find the highest-`#+TIMESTAMP` message with - `MESSAGE_TYPE: release`. -4. Messages with `#+TIMESTAMP` after that release (or in conversations with no - release) count as pending. - -Files without parseable frontmatter are counted as pending and noted in the -output (single warning row per project). - -## Failure modes - -| Symptom | Likely cause | Fix | -|---|---|---| -| Project missing from output | Project's `.ai/` directory exists but `inbox/from-agents/` does not | Created lazily on first cross-agent message; `mkdir -p` to surface in output. | -| All projects show "0 pending" but you know one has messages | Glob misresolved, OR all messages are post-release | `cross-agent-status --projects-glob` with explicit path to confirm. | -| Warning row "N files unparseable in <project>" | Message file has invalid frontmatter | Open the file, fix or move out. | - -## Performance - -Scans every `.org` file in every watched directory. For Craig's setup (14 -projects, single-digit messages each), runs in <100ms. If a project -accumulates hundreds of post-release messages, archive them per the persistence -guidance in the protocol spec. - -## HALT awareness - -Checks `~/.config/cross-agent-comms/HALT` at start. If HALT exists, prints a -prominent banner before normal output: - -``` -$ cross-agent-status -⚠ HALT ACTIVE — cross-agent comms paused - Reason: investigating runaway poll loop, 2026-04-27 - HALT file: ~/.config/cross-agent-comms/HALT - Resume with: cross-agent-resume - -(snapshot continues normally — HALT does not suppress visibility) - -project pending most-recent -career 0 — -homelab 1 20260427T085611Z-from-career-question.org (3 min ago) -... -``` - -Status is read-only, so it always runs. The banner ensures the user can't -miss that halt is active when checking inbox state. Reason text comes from -the HALT file's body; if empty, omit the reason line. - -If the HALT file exists but is unreadable, print a warning banner ("HALT -file present but unreadable; treat as halted") and continue with normal -output. - -See `cross-agent-halt.md` for the full halt mechanism. - -## Examples - -```bash -# Snapshot -cross-agent-status - -# JSON for piping -cross-agent-status --json | jq '.projects[] | select(.pending_count > 0)' - -# Single-project query -cross-agent-status --projects-glob ~/projects/work/inbox/from-agents/ -``` - -## See also - -- `cross-agent-watch` — push notifications on new arrivals. -- `cross-agent-discover` — enumerate available agents (cross-machine). -- `cross-agent-comms.org` — protocol spec. diff --git a/.ai/scripts/cross-agent-comms/cross-agent-watch b/.ai/scripts/cross-agent-comms/cross-agent-watch deleted file mode 100755 index f50ba26..0000000 --- a/.ai/scripts/cross-agent-comms/cross-agent-watch +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env bash -# cross-agent-watch — desktop-notify on new cross-agent messages. -# -# See cross-agent-watch.md. Watches every ~/projects/*/inbox/from-agents/ by -# default. inotifywait fires create + moved_to events; .tmp.* files are -# filtered out. HALT suppresses notifications but the watcher keeps running -# and logs each event with "(suppressed by HALT)". - -set -uo pipefail - -# Defaults. -PROJECTS_GLOB="${HOME}/projects/*/inbox/from-agents/" -LOG_FILE="${HOME}/.local/state/cross-agent-comms/watch.log" -HALT_FILE="${HOME}/.config/cross-agent-comms/HALT" -QUIET=0 -NO_NOTIFY=0 - -# Arg parsing. -while [[ $# -gt 0 ]]; do - case "$1" in - --projects-glob) - PROJECTS_GLOB="$2"; shift 2 ;; - --log) - LOG_FILE="$2"; shift 2 ;; - --quiet) - QUIET=1; shift ;; - --no-notify) - NO_NOTIFY=1; shift ;; - -h|--help) - cat <<EOF -Usage: cross-agent-watch [--projects-glob GLOB] [--log PATH] [--quiet] [--no-notify] - -Watches inbox/from-agents/ directories for new cross-agent messages and fires -desktop notifications. See cross-agent-watch.md for details. -EOF - exit 0 ;; - *) - echo "unknown flag: $1" >&2; exit 1 ;; - esac -done - -# Resolve glob to a concrete list of directories. -# shellcheck disable=SC2086 -DIRS=( $PROJECTS_GLOB ) -# Filter out non-existent paths (glob may include literal pattern when no match). -EXISTING=() -for d in "${DIRS[@]}"; do - if [[ -d "$d" ]]; then - EXISTING+=( "$d" ) - fi -done - -if [[ ${#EXISTING[@]} -eq 0 ]]; then - echo "cross-agent-watch: glob resolved 0 directories: $PROJECTS_GLOB" >&2 - exit 1 -fi - -# Ensure log dir exists. -mkdir -p "$(dirname "$LOG_FILE")" - -[[ $QUIET -eq 0 ]] && echo "cross-agent-watch: watching ${#EXISTING[@]} dir(s); log: $LOG_FILE" - -# Helper: project name from path like /home/.../projects/<name>/inbox/from-agents/... -project_name() { - local path="$1" - # Match ~/projects/<name>/... - if [[ "$path" =~ ${HOME}/projects/([^/]+)/ ]]; then - echo "${BASH_REMATCH[1]}" - else - basename "$(dirname "$(dirname "$path")")" - fi -} - -# Main loop. inotifywait emits one line per event in the format -# "<full-path>" because we passed --format '%w%f'. -inotifywait -m -e create,moved_to --format '%w%f' "${EXISTING[@]}" 2>/dev/null \ - | while IFS= read -r path; do - filename="$(basename "$path")" - - # Filter .tmp.* staging files. - case "$filename" in - .tmp.*) continue ;; - esac - - # Filter .asc sidecars — they land first per the atomic-write ordering; - # the .org event will fire after. - case "$filename" in - *.asc) continue ;; - esac - - proj="$(project_name "$path")" - iso="$(date -u "+%Y-%m-%dT%H:%M:%SZ")" - - if [[ -e "$HALT_FILE" ]]; then - printf '%s\t%s\t%s\t(suppressed by HALT)\n' "$iso" "$proj" "$filename" >> "$LOG_FILE" - [[ $QUIET -eq 0 ]] && echo "[$iso] $proj: $filename (suppressed by HALT)" - continue - fi - - printf '%s\t%s\t%s\n' "$iso" "$proj" "$filename" >> "$LOG_FILE" - [[ $QUIET -eq 0 ]] && echo "[$iso] $proj: $filename" - - if [[ $NO_NOTIFY -eq 0 ]]; then - notify info "Cross-agent message" "${proj}: ${filename}" --persist 2>/dev/null || true - fi - done diff --git a/.ai/scripts/cross-agent-comms/cross-agent-watch.md b/.ai/scripts/cross-agent-comms/cross-agent-watch.md deleted file mode 100644 index 04e8005..0000000 --- a/.ai/scripts/cross-agent-comms/cross-agent-watch.md +++ /dev/null @@ -1,130 +0,0 @@ -# cross-agent-watch - -**Purpose.** Long-running watcher that fires desktop notifications when new -cross-agent messages land in any project's `inbox/from-agents/` directory. -This is the primary cold-start mechanism: messages get noticed even when no -Claude session is active. - -## Usage - -``` -cross-agent-watch [--projects-glob <glob>] [--log <path>] -``` - -No args required. Defaults: - -- Watches `~/projects/*/inbox/from-agents/` (matches every project with the - cross-agent-comms convention). -- Logs each event to `~/.local/state/cross-agent-comms/watch.log`. - -### Flags - -| Flag | Default | Purpose | -|---|---|---| -| `--projects-glob <glob>` | `~/projects/*/inbox/from-agents/` | Override which directories to watch. Useful for testing on a single project. | -| `--log <path>` | `~/.local/state/cross-agent-comms/watch.log` | Override log location. Set to `/dev/null` to disable logging. | -| `--quiet` | off | Suppress stdout output. Notifications still fire. | -| `--no-notify` | off | Skip `notify` calls. Useful for testing the watcher loop without spamming notifications. | - -## Behavior - -1. Resolves the projects-glob to a concrete list of directories at startup. - New projects added to `~/projects/` after startup are NOT picked up — restart - the watcher to re-resolve. -2. Runs `inotifywait -m -e create,moved_to --format '%w%f'` against each - watched directory. -3. For each event, calls - `notify info "Cross-agent message" "<project>: <filename>" --persist`. The - `--persist` flag keeps the page on screen until dismissed, so an inbound - message that arrives while Craig is away from the desk isn't missed. -4. Appends an event line to the log: - `<ISO-8601-timestamp>\t<project>\t<filename>`. - -## Event filtering - -- Watches `create` AND `moved_to` events. The `moved_to` part is critical for - the atomic-write convention (`mktemp` + `mv` produces a `moved_to`, not a - `create`). -- Files starting with `.tmp.` are ignored — they're staging files from - in-progress writes that should never produce a notification. - -## Installation - -### Option A — tmux pane (personal, easy) - -Run in a tmux pane that survives session disconnects: - -``` -tmux new -d -s cross-agent-watch 'cross-agent-watch' -``` - -### Option B — systemd user service (production) - -Provided files: - -- `~/.config/systemd/user/cross-agent-watch.service` -- `~/.config/systemd/user/cross-agent-watch.path` - -Enable with: - -``` -systemctl --user enable --now cross-agent-watch.path -``` - -The path unit triggers the service unit on filesystem changes; the service -unit re-execs `cross-agent-watch` if it dies. Survives reboot. - -## Failure modes - -| Symptom | Likely cause | Fix | -|---|---|---| -| No notifications fire on new files | inotifywait not running, or glob resolved to zero dirs | Check `cross-agent-watch --projects-glob ... --quiet` exits non-zero immediately. Log shows `"resolved 0 directories"`. | -| Notifications fire on `.tmp.` files | Filter regression | Verify `inotifywait` events show the `.tmp.` files; if so check this script's filter logic. | -| Some files missed under rapid bursts | inotify queue overflow | Increase `fs.inotify.max_queued_events` sysctl. Default 16384 is usually fine. | -| Permission denied on a watched dir | Directory perms wrong | `chmod 700 <dir>` and confirm owner. | - -## HALT awareness - -Checks `~/.config/cross-agent-comms/HALT` on each iteration (each inotifywait -event fired). If HALT exists, the watcher continues running but **suppresses -the `notify` call**. The event is still logged, with `(suppressed by HALT)` -appended: - -``` -2026-04-27T04:42:00-05:00 career 20260427T094200Z-from-homelab-test.org (suppressed by HALT) -``` - -Logged-but-suppressed events are useful for the operator to see what would -have fired during the halt window — helpful for diagnosing whatever caused -the halt. - -When HALT clears, suppression stops; subsequent events fire normally. Backlog -events that arrived during halt are NOT replayed — they get picked up via -cold-start handling (status CLI, agent startup check, or the next agent -poll once polling resumes). - -If the HALT file exists but is unreadable, fail-closed (suppress) — safer -than fail-open. - -See `cross-agent-halt.md` for the full halt mechanism. - -## Examples - -```bash -# Watch all projects, log everything, fire notifications -cross-agent-watch - -# Test against a single project, no notifications, verbose -cross-agent-watch \ - --projects-glob "$HOME/projects/work/inbox/from-agents/" \ - --no-notify - -# Production-style: quiet stdout, log only -cross-agent-watch --quiet -``` - -## See also - -- `cross-agent-status` — point-in-time snapshot of pending messages. -- `cross-agent-send` — counterpart writer. -- `cross-agent-comms.org` — protocol spec. diff --git a/.ai/scripts/tests/test_cross_agent_discover.py b/.ai/scripts/tests/test_cross_agent_discover.py deleted file mode 100644 index f0d2bb7..0000000 --- a/.ai/scripts/tests/test_cross_agent_discover.py +++ /dev/null @@ -1,204 +0,0 @@ -"""Tests for cross-agent-discover (TDD: tests written before implementation).""" - -from __future__ import annotations - -import json -import os -import subprocess -import textwrap -from pathlib import Path - -import pytest - -SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-discover" - - -def _run(args: list[str], env: dict | None = None) -> subprocess.CompletedProcess: - return subprocess.run([str(SCRIPT), *args], capture_output=True, text=True, env=env) - - -@pytest.fixture -def fake_home(tmp_path, monkeypatch): - home = tmp_path / "home" - home.mkdir() - monkeypatch.setenv("HOME", str(home)) - return home - - -def _make_project(home: Path, name: str) -> Path: - proj = home / "projects" / name - (proj / ".ai").mkdir(parents=True) - return proj - - -def _write_peers_toml(home: Path, content: str) -> Path: - cfg = home / ".config" / "cross-agent-comms" - cfg.mkdir(parents=True, exist_ok=True) - peers = cfg / "peers.toml" - peers.write_text(content) - return peers - - -def test_discover_help(fake_home): - result = _run(["--help"], env={**os.environ, "HOME": str(fake_home)}) - assert result.returncode == 0 - assert "discover" in result.stdout.lower() or "enumerate" in result.stdout.lower() - - -def test_discover_local_only_no_projects(fake_home): - """Empty home → reports zero local projects, zero peers.""" - result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)}) - assert result.returncode == 0 - # No crash; mentions local somehow. - assert "local" in result.stdout.lower() or "0 project" in result.stdout.lower() - - -def test_discover_lists_local_projects(fake_home): - _make_project(fake_home, "homelab") - _make_project(fake_home, "career") - _make_project(fake_home, "claude-templates") - result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)}) - assert result.returncode == 0 - assert "homelab" in result.stdout - assert "career" in result.stdout - assert "claude-templates" in result.stdout - - -def test_discover_excludes_dirs_without_ai_subdir(fake_home): - """Directories under ~/projects/ that lack .ai/ are NOT projects.""" - _make_project(fake_home, "real-project") - (fake_home / "projects" / "not-a-project").mkdir(parents=True) - result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)}) - assert result.returncode == 0 - assert "real-project" in result.stdout - assert "not-a-project" not in result.stdout - - -def test_discover_no_peers_toml_just_local(fake_home): - _make_project(fake_home, "homelab") - result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)}) - assert result.returncode == 0 - # No peers section since no toml. - assert "homelab" in result.stdout - - -def test_discover_lists_peers_from_toml(fake_home): - _write_peers_toml(fake_home, textwrap.dedent("""\ - [peers.velox] - host = "velox" - ssh_user = "cjennings" - - [peers.bastion] - host = "bastion.local" - ssh_user = "cjennings" - """)) - _make_project(fake_home, "homelab") - result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)}) - assert result.returncode == 0 - assert "velox" in result.stdout - assert "bastion" in result.stdout - - -def test_discover_malformed_peers_toml_errors_clearly(fake_home): - _write_peers_toml(fake_home, "not valid toml at all = = =") - result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)}) - assert result.returncode != 0 - assert "peers.toml" in result.stderr or "TOML" in result.stderr or "parse" in result.stderr.lower() - - -def test_discover_json_output_schema(fake_home): - _make_project(fake_home, "homelab") - _make_project(fake_home, "career") - _write_peers_toml(fake_home, textwrap.dedent("""\ - [peers.velox] - host = "velox" - """)) - result = _run(["--json", "--no-cache"], env={**os.environ, "HOME": str(fake_home)}) - assert result.returncode == 0 - payload = json.loads(result.stdout) - assert "local" in payload - assert "peers" in payload - assert isinstance(payload["local"], list) - assert isinstance(payload["peers"], list) - assert "homelab" in payload["local"] - assert "career" in payload["local"] - velox = next((p for p in payload["peers"] if p["name"] == "velox"), None) - assert velox is not None - # Reachability is a key — value depends on actual SSH state. - assert "reachable" in velox - - -def test_discover_peer_scope(fake_home): - _write_peers_toml(fake_home, textwrap.dedent("""\ - [peers.velox] - host = "velox" - - [peers.bastion] - host = "bastion.local" - """)) - result = _run(["--peer", "velox", "--no-cache", "--json"], env={**os.environ, "HOME": str(fake_home)}) - assert result.returncode == 0 - payload = json.loads(result.stdout) - peer_names = [p["name"] for p in payload["peers"]] - assert "velox" in peer_names - assert "bastion" not in peer_names - - -def test_discover_unreachable_peer_marked(fake_home): - """A peer with a definitely-unreachable host gets reachable=False.""" - _write_peers_toml(fake_home, textwrap.dedent("""\ - [peers.bogus] - host = "definitely-not-a-real-host.invalid" - ssh_user = "nobody" - """)) - result = _run(["--no-cache", "--json"], env={**os.environ, "HOME": str(fake_home)}, ) - assert result.returncode == 0 - payload = json.loads(result.stdout) - bogus = next((p for p in payload["peers"] if p["name"] == "bogus"), None) - assert bogus is not None - assert bogus["reachable"] is False - - -def test_discover_cache_hit_within_window(fake_home): - """Second invocation within 5 min reads cache (skip the SSH probe).""" - _make_project(fake_home, "homelab") - # First call populates cache. - result1 = _run(["--json"], env={**os.environ, "HOME": str(fake_home)}) - assert result1.returncode == 0 - cache = fake_home / ".cache" / "cross-agent-comms" / "discovery.json" - assert cache.exists() - # Tamper with the cache to a marker only the cache path can produce. - payload = json.loads(cache.read_text()) - payload["_test_marker"] = True - cache.write_text(json.dumps(payload)) - # Second call (no --no-cache) should return the tampered payload. - result2 = _run(["--json"], env={**os.environ, "HOME": str(fake_home)}) - assert result2.returncode == 0 - payload2 = json.loads(result2.stdout) - assert payload2.get("_test_marker") is True - - -def test_discover_no_cache_flag_bypasses(fake_home): - """--no-cache ignores even a fresh cache.""" - _make_project(fake_home, "homelab") - cache_dir = fake_home / ".cache" / "cross-agent-comms" - cache_dir.mkdir(parents=True) - cache_dir.joinpath("discovery.json").write_text(json.dumps({ - "_test_marker": True, "local": [], "peers": [] - })) - result = _run(["--no-cache", "--json"], env={**os.environ, "HOME": str(fake_home)}) - assert result.returncode == 0 - payload = json.loads(result.stdout) - # Cache marker should NOT appear in fresh result. - assert payload.get("_test_marker") is None or payload.get("_test_marker") is False - assert "homelab" in payload["local"] - - -def test_discover_halt_shows_banner(fake_home): - halt = fake_home / ".config" / "cross-agent-comms" / "HALT" - halt.parent.mkdir(parents=True) - halt.write_text("halted") - _make_project(fake_home, "homelab") - result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)}) - assert result.returncode == 0 # discover continues to print under HALT - assert "HALT" in result.stdout diff --git a/.ai/scripts/tests/test_cross_agent_halt.py b/.ai/scripts/tests/test_cross_agent_halt.py deleted file mode 100644 index f8bf0b3..0000000 --- a/.ai/scripts/tests/test_cross_agent_halt.py +++ /dev/null @@ -1,204 +0,0 @@ -"""Tests for cross-agent-halt and cross-agent-resume (TDD).""" - -from __future__ import annotations - -import os -import subprocess -import textwrap -from pathlib import Path - -import pytest - -HALT_SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-halt" -RESUME_SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-resume" - - -def _run(script: Path, args: list[str], env: dict | None = None) -> subprocess.CompletedProcess: - return subprocess.run([str(script), *args], capture_output=True, text=True, env=env) - - -@pytest.fixture -def isolated_env(tmp_path, monkeypatch): - """Isolated HOME + a fake systemctl that records calls without acting.""" - fake_home = tmp_path / "home" - fake_home.mkdir() - fake_bin = tmp_path / "bin" - fake_bin.mkdir() - # Fake systemctl: no-op, exit 0. - fake_systemctl = fake_bin / "systemctl" - fake_systemctl.write_text("#!/usr/bin/env bash\nexit 0\n") - fake_systemctl.chmod(0o755) - # Fake ssh: succeed only for known-good host. - fake_ssh = fake_bin / "ssh" - fake_ssh.write_text(textwrap.dedent("""\ - #!/usr/bin/env bash - # Find the destination arg (skip flags). - target="" - for arg in "$@"; do - case "$arg" in - -*|*=*) ;; - *@*|localhost|*.local|*.invalid) target="$arg"; break ;; - *) target="$arg"; break ;; - esac - done - case "$target" in - *invalid*|*unreachable*) exit 255 ;; - *) exit 0 ;; - esac - """)) - fake_ssh.chmod(0o755) - - monkeypatch.setenv("HOME", str(fake_home)) - # Prepend our fake bin so systemctl + ssh are intercepted, but keep real /bin etc. - monkeypatch.setenv("PATH", f"{fake_bin}:{os.environ.get('PATH', '')}") - return fake_home - - -# ---- cross-agent-halt ---- - - -def test_halt_help(isolated_env): - result = _run(HALT_SCRIPT, ["--help"], env={**os.environ, "HOME": str(isolated_env), - "PATH": os.environ["PATH"]}) - assert result.returncode == 0 - assert "halt" in result.stdout.lower() - - -def test_halt_creates_halt_file(isolated_env): - halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT" - assert not halt_file.exists() - result = _run(HALT_SCRIPT, [], env={**os.environ, "HOME": str(isolated_env), - "PATH": os.environ["PATH"]}) - assert result.returncode == 0 - assert halt_file.exists() - - -def test_halt_with_reason_writes_body(isolated_env): - result = _run(HALT_SCRIPT, ["pausing for incident review"], - env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]}) - assert result.returncode == 0 - halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT" - assert halt_file.exists() - assert "pausing for incident review" in halt_file.read_text() - - -def test_halt_idempotent(isolated_env): - """Running halt twice doesn't error.""" - halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT" - r1 = _run(HALT_SCRIPT, [], env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]}) - assert r1.returncode == 0 - assert halt_file.exists() - r2 = _run(HALT_SCRIPT, [], env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]}) - assert r2.returncode == 0 - assert halt_file.exists() - - -def test_halt_does_not_pkill(isolated_env): - """Per design: halt does NOT call pkill. Verify by checking no pkill process gets launched.""" - # Replace pkill in PATH with something that fails loudly so we'd see if halt invoked it. - fake_bin = isolated_env.parent / "bin" - pkill = fake_bin / "pkill" - pkill.write_text("#!/usr/bin/env bash\necho 'PKILL CALLED' >&2\nexit 99\n") - pkill.chmod(0o755) - result = _run(HALT_SCRIPT, [], env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]}) - assert result.returncode == 0 - assert "PKILL CALLED" not in result.stderr - - -def test_halt_tailnet_reports_per_peer(isolated_env): - """--tailnet iterates peers.toml and reports per-peer status.""" - cfg = isolated_env / ".config" / "cross-agent-comms" - cfg.mkdir(parents=True) - (cfg / "peers.toml").write_text(textwrap.dedent("""\ - [peers.velox] - host = "velox" - ssh_user = "cjennings" - - [peers.bogus] - host = "definitely-unreachable.invalid" - ssh_user = "cjennings" - """)) - result = _run(HALT_SCRIPT, ["--tailnet"], - env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]}) - # Partial halt → exit 1. - assert result.returncode == 1 - assert "velox" in result.stdout - assert "bogus" in result.stdout - # ✓ marker for velox, ✗ for bogus. - assert "✓" in result.stdout - assert "✗" in result.stdout - assert "PARTIAL" in result.stdout or "partial" in result.stdout.lower() - - -def test_halt_tailnet_all_reachable_exits_zero(isolated_env): - cfg = isolated_env / ".config" / "cross-agent-comms" - cfg.mkdir(parents=True) - (cfg / "peers.toml").write_text(textwrap.dedent("""\ - [peers.velox] - host = "velox" - ssh_user = "cjennings" - """)) - result = _run(HALT_SCRIPT, ["--tailnet"], - env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]}) - assert result.returncode == 0 - assert "velox" in result.stdout - - -# ---- cross-agent-resume ---- - - -def test_resume_help(isolated_env): - result = _run(RESUME_SCRIPT, ["--help"], - env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]}) - assert result.returncode == 0 - assert "resume" in result.stdout.lower() - - -def test_resume_removes_halt_file(isolated_env): - halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT" - halt_file.parent.mkdir(parents=True) - halt_file.write_text("halted") - assert halt_file.exists() - result = _run(RESUME_SCRIPT, [], - env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]}) - assert result.returncode == 0 - assert not halt_file.exists() - - -def test_resume_when_no_halt_active_succeeds(isolated_env): - """No HALT to clear is not an error.""" - result = _run(RESUME_SCRIPT, [], - env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]}) - assert result.returncode == 0 - - -def test_resume_prints_per_session_instructions(isolated_env): - """Resume must surface that polling does NOT auto-resume.""" - halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT" - halt_file.parent.mkdir(parents=True) - halt_file.write_text("halted") - result = _run(RESUME_SCRIPT, [], - env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]}) - assert result.returncode == 0 - out = result.stdout.lower() - assert "polling" in out - assert "auto" in out or "explicit" in out or "session" in out - - -def test_resume_tailnet_partial_failure_exit_1(isolated_env): - cfg = isolated_env / ".config" / "cross-agent-comms" - cfg.mkdir(parents=True) - (cfg / "peers.toml").write_text(textwrap.dedent("""\ - [peers.velox] - host = "velox" - - [peers.bogus] - host = "unreachable-host.invalid" - """)) - halt_file = cfg / "HALT" - halt_file.write_text("halted") - result = _run(RESUME_SCRIPT, ["--tailnet"], - env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]}) - assert result.returncode == 1 - assert "velox" in result.stdout - assert "bogus" in result.stdout diff --git a/.ai/scripts/tests/test_cross_agent_recv.py b/.ai/scripts/tests/test_cross_agent_recv.py deleted file mode 100644 index 27c53a5..0000000 --- a/.ai/scripts/tests/test_cross_agent_recv.py +++ /dev/null @@ -1,176 +0,0 @@ -"""Tests for cross-agent-recv.""" - -from __future__ import annotations - -import json -import os -import subprocess -from pathlib import Path - -import pytest - -SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-recv" - - -def _make_message(path: Path, *, conv_id: str = "test-conv", seq: int = 1, msg_type: str = "request", - proto_version: str = "5", title: str = "Test", requires_tools: str | None = None, - body: str = "Body.\n") -> Path: - fm_lines = [ - f"#+TITLE: {title}", - f"#+CONVERSATION_ID: {conv_id}", - f"#+MESSAGE_TYPE: {msg_type}", - f"#+SEQUENCE: {seq}", - "#+TIMESTAMP: 2026-04-27T05:00:00-05:00", - f"#+PROTOCOL_VERSION: {proto_version}", - ] - if requires_tools: - fm_lines.append(f"#+REQUIRES_TOOLS: {requires_tools}") - path.write_text("\n".join(fm_lines) + "\n\n" + body) - return path - - -def _run(args: list[str], env: dict | None = None) -> subprocess.CompletedProcess: - return subprocess.run([str(SCRIPT), *args], capture_output=True, text=True, env=env) - - -@pytest.fixture -def isolated_env(tmp_path, monkeypatch): - fake_home = tmp_path / "home" - fake_home.mkdir() - monkeypatch.setenv("HOME", str(fake_home)) - return fake_home - - -def test_recv_help(isolated_env): - result = _run(["--help"], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 0 - assert "Receive and decide" in result.stdout - - -def test_recv_missing_file_rejects(isolated_env, tmp_path): - result = _run([str(tmp_path / "nope.org")], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 3 # reject - - -def test_recv_malformed_frontmatter_rejects(isolated_env, tmp_path): - bad = tmp_path / "bad.org" - bad.write_text("not org-mode at all\n") - result = _run([str(bad), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 3 - assert "decision: reject" in result.stdout - - -def test_recv_missing_required_field_rejects(isolated_env, tmp_path): - msg = tmp_path / "msg.org" - # Missing PROTOCOL_VERSION among others. - msg.write_text("#+TITLE: x\n#+CONVERSATION_ID: c\n\nBody.\n") - result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 3 - assert "missing required" in result.stdout - - -def test_recv_protocol_version_mismatch_query(isolated_env, tmp_path): - msg = _make_message(tmp_path / "msg.org", proto_version="4") - result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 2 # query - assert "PROTOCOL_VERSION mismatch" in result.stdout - - -def test_recv_invalid_message_type_rejects(isolated_env, tmp_path): - msg = _make_message(tmp_path / "msg.org", msg_type="banana") - result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 3 - assert "invalid MESSAGE_TYPE" in result.stdout - - -def test_recv_missing_signature_rejects(isolated_env, tmp_path): - """When verify is on, a missing .asc sibling rejects.""" - msg = _make_message(tmp_path / "msg.org") - # No .asc sidecar. - result = _run([str(msg)], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 3 - assert "signature file missing" in result.stdout - - -def test_recv_valid_processes(isolated_env, tmp_path): - """A valid message with --no-verify and no dedup match → process.""" - msg = _make_message(tmp_path / "msg.org") - result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 0 # process - assert "decision: process" in result.stdout - assert "sha256:" in result.stdout - - -def test_recv_dedup_against_identical_existing(isolated_env, tmp_path): - """Same content + same SEQUENCE in same dir → dedup.""" - inbox = tmp_path / "inbox" - inbox.mkdir() - first = _make_message(inbox / "20260427T100000Z-from-x-c.org", conv_id="c", seq=5) - # Second message with same content — name differs (canonical-style would have different timestamp). - second = _make_message(inbox / "20260427T100100Z-from-x-c.org", conv_id="c", seq=5) - # Bodies must be byte-identical for hash equality. - second.write_bytes(first.read_bytes()) - result = _run([str(second), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 1 # dedup - assert "decision: dedup" in result.stdout - - -def test_recv_collision_with_different_content_processes(isolated_env, tmp_path): - """Same SEQUENCE + same CONVERSATION_ID but different content → process both.""" - inbox = tmp_path / "inbox" - inbox.mkdir() - _make_message(inbox / "20260427T100000Z-from-x-c.org", conv_id="c", seq=5, body="First body.\n") - second = _make_message(inbox / "20260427T100100Z-from-x-c.org", conv_id="c", seq=5, body="Different body.\n") - result = _run([str(second), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 0 # process - assert "decision: process" in result.stdout - - -def test_recv_requires_tools_missing_query(isolated_env, tmp_path): - """REQUIRES_TOOLS naming a definitely-missing binary → query.""" - msg = _make_message(tmp_path / "msg.org", requires_tools="definitely-not-installed-xyzzy-9000") - result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 2 # query - assert "required tools unavailable" in result.stdout - - -def test_recv_requires_tools_present_processes(isolated_env, tmp_path): - """REQUIRES_TOOLS naming a real binary → process.""" - msg = _make_message(tmp_path / "msg.org", requires_tools="ls,cat") - result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 0 - assert "decision: process" in result.stdout - - -def test_recv_json_output(isolated_env, tmp_path): - msg = _make_message(tmp_path / "msg.org") - result = _run([str(msg), "--no-verify", "--json"], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 0 - payload = json.loads(result.stdout) - assert payload["decision"] == "process" - assert payload["message_type"] == "request" - assert payload["conversation_id"] == "test-conv" - - -def test_recv_halt_blocks(isolated_env, tmp_path): - halt = isolated_env / ".config" / "cross-agent-comms" / "HALT" - halt.parent.mkdir(parents=True) - halt.write_text("halted\n") - msg = _make_message(tmp_path / "msg.org") - result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 5 - assert "halt active" in result.stderr.lower() - - -def test_recv_halt_leaves_message_in_place(isolated_env, tmp_path): - """Per spec: under HALT, recv must NOT move/dedup/reject — leave file in place.""" - halt = isolated_env / ".config" / "cross-agent-comms" / "HALT" - halt.parent.mkdir(parents=True) - halt.write_text("halted\n") - msg = _make_message(tmp_path / "msg.org") - pre_content = msg.read_text() - result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 5 - # File still exists with same content. - assert msg.exists() - assert msg.read_text() == pre_content diff --git a/.ai/scripts/tests/test_cross_agent_send.py b/.ai/scripts/tests/test_cross_agent_send.py deleted file mode 100644 index f716e95..0000000 --- a/.ai/scripts/tests/test_cross_agent_send.py +++ /dev/null @@ -1,210 +0,0 @@ -"""Tests for cross-agent-send. - -Subprocess-based: treat the script as a black-box CLI and assert on its -exit codes, stdout, and the files it produces. -""" - -from __future__ import annotations - -import os -import subprocess -import textwrap -from pathlib import Path - -import pytest - -SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-send" - - -def _make_message(tmp_path: Path, conv_id: str = "test-conv", seq: int = 1, msg_type: str = "request", - proto_version: str = "5") -> Path: - msg = tmp_path / "msg.org" - msg.write_text(textwrap.dedent(f"""\ - #+TITLE: Test message - #+CONVERSATION_ID: {conv_id} - #+MESSAGE_TYPE: {msg_type} - #+SEQUENCE: {seq} - #+TIMESTAMP: 2026-04-27T05:00:00-05:00 - #+PROTOCOL_VERSION: {proto_version} - - Body. - """)) - return msg - - -def _run(args: list[str], env: dict | None = None, cwd: Path | None = None) -> subprocess.CompletedProcess: - return subprocess.run( - [str(SCRIPT), *args], - capture_output=True, - text=True, - env=env, - cwd=cwd, - ) - - -@pytest.fixture -def isolated_env(tmp_path, monkeypatch): - """Redirect HOME so peers.toml, HALT, marker files are scoped to the test.""" - fake_home = tmp_path / "home" - fake_home.mkdir() - monkeypatch.setenv("HOME", str(fake_home)) - # Pre-create projects/ so derive_sender_project has somewhere to look. - (fake_home / "projects" / "homelab").mkdir(parents=True) - return fake_home - - -def test_send_help(isolated_env): - """--help works without side effects.""" - result = _run(["--help"], env={**os.environ, "HOME": str(isolated_env)}) - assert result.returncode == 0 - assert "Send a cross-agent message" in result.stdout - - -def test_send_missing_message_file(isolated_env): - """Nonexistent message file returns general error.""" - import socket - machine = socket.gethostname().split(".")[0] - result = _run( - [f"{machine}.homelab", str(isolated_env / "nonexistent.org")], - env={**os.environ, "HOME": str(isolated_env)}, - ) - assert result.returncode == 1 - assert "not found" in result.stderr.lower() - - -def test_send_invalid_destination_format(isolated_env, tmp_path): - """Destination without . returns dest-not-found exit code.""" - msg = _make_message(tmp_path) - result = _run( - ["bogus", str(msg)], - env={**os.environ, "HOME": str(isolated_env)}, - ) - assert result.returncode == 2 - assert "<machine>.<project>" in result.stderr or "destination" in result.stderr.lower() - - -def test_send_dest_not_in_peers(isolated_env, tmp_path): - """Cross-machine destination with no peers.toml entry exits 2.""" - msg = _make_message(tmp_path) - result = _run( - ["unknownmachine.homelab", str(msg)], - env={**os.environ, "HOME": str(isolated_env)}, - ) - assert result.returncode == 2 - assert "not found in peers" in result.stderr - - -def test_send_frontmatter_missing_required(isolated_env, tmp_path): - """Message missing required fields exits 4.""" - bad = tmp_path / "bad.org" - bad.write_text("#+TITLE: nope\n\nBody.\n") - import socket - machine = socket.gethostname().split(".")[0] - result = _run( - [f"{machine}.homelab", str(bad)], - env={**os.environ, "HOME": str(isolated_env)}, - ) - assert result.returncode == 4 - assert "missing required fields" in result.stderr - - -def test_send_invalid_message_type(isolated_env, tmp_path): - """Unknown MESSAGE_TYPE exits 4.""" - msg = _make_message(tmp_path, msg_type="frobnicate") - import socket - machine = socket.gethostname().split(".")[0] - result = _run( - [f"{machine}.homelab", str(msg)], - env={**os.environ, "HOME": str(isolated_env)}, - ) - assert result.returncode == 4 - assert "MESSAGE_TYPE" in result.stderr - - -def test_send_halt_blocks(isolated_env, tmp_path): - """When HALT exists, send refuses with exit 5.""" - halt = isolated_env / ".config" / "cross-agent-comms" / "HALT" - halt.parent.mkdir(parents=True) - halt.write_text("test halt\n") - msg = _make_message(tmp_path) - import socket - machine = socket.gethostname().split(".")[0] - result = _run( - [f"{machine}.homelab", str(msg)], - env={**os.environ, "HOME": str(isolated_env)}, - ) - assert result.returncode == 5 - assert "halt active" in result.stderr.lower() - - -def test_send_same_machine_no_sign_delivers(isolated_env, tmp_path): - """Same-machine delivery with --no-sign produces a canonically named file.""" - msg = _make_message(tmp_path, conv_id="my-conv") - import socket - machine = socket.gethostname().split(".")[0] - # Sender is derived from CWD walking up to ~/projects/<name>/ - cwd = isolated_env / "projects" / "homelab" - result = _run( - [f"{machine}.homelab", str(msg), "--no-sign"], - env={**os.environ, "HOME": str(isolated_env)}, - cwd=cwd, - ) - assert result.returncode == 0, f"stderr={result.stderr}" - inbox = isolated_env / "projects" / "homelab" / "inbox" / "from-agents" - files = list(inbox.glob("*-from-homelab-my-conv.org")) - assert len(files) == 1 - # No sig file with --no-sign. - assert not list(inbox.glob("*.asc")) - # Canonical filename pattern. - assert files[0].name.startswith("2026") and files[0].name.endswith("-from-homelab-my-conv.org") - - -def test_send_same_machine_signed_writes_asc(isolated_env, tmp_path): - """Signed delivery writes both .org and .asc.""" - msg = _make_message(tmp_path, conv_id="signed-conv") - import socket - machine = socket.gethostname().split(".")[0] - cwd = isolated_env / "projects" / "homelab" - # Use the real GPG keyring (not isolating GPG — Craig's existing keys are fine for tests). - real_env = {**os.environ, "HOME": str(isolated_env), "GNUPGHOME": str(Path.home() / ".gnupg")} - result = _run( - [f"{machine}.homelab", str(msg)], - env=real_env, - cwd=cwd, - ) - if result.returncode != 0: - pytest.skip(f"GPG signing unavailable in this environment: {result.stderr}") - inbox = isolated_env / "projects" / "homelab" / "inbox" / "from-agents" - org_files = list(inbox.glob("*-from-homelab-signed-conv.org")) - asc_files = list(inbox.glob("*-from-homelab-signed-conv.org.asc")) - assert len(org_files) == 1 - assert len(asc_files) == 1 - - -def test_send_filename_ignores_input_basename(isolated_env, tmp_path): - """User's input filename is ignored; canonical filename is generated.""" - weird = tmp_path / "weird-user-name.org" - weird.write_text(textwrap.dedent("""\ - #+TITLE: Title - #+CONVERSATION_ID: ignored-input - #+MESSAGE_TYPE: request - #+SEQUENCE: 1 - #+TIMESTAMP: 2026-04-27T05:00:00-05:00 - #+PROTOCOL_VERSION: 5 - - Body. - """)) - import socket - machine = socket.gethostname().split(".")[0] - cwd = isolated_env / "projects" / "homelab" - result = _run( - [f"{machine}.homelab", str(weird), "--no-sign"], - env={**os.environ, "HOME": str(isolated_env)}, - cwd=cwd, - ) - assert result.returncode == 0 - inbox = isolated_env / "projects" / "homelab" / "inbox" / "from-agents" - # No file named after the user's input. - assert not (inbox / "weird-user-name.org").exists() - # Canonical naming used. - assert list(inbox.glob("*-from-homelab-ignored-input.org")) diff --git a/.ai/scripts/tests/test_cross_agent_status.py b/.ai/scripts/tests/test_cross_agent_status.py deleted file mode 100644 index bb5b8ba..0000000 --- a/.ai/scripts/tests/test_cross_agent_status.py +++ /dev/null @@ -1,165 +0,0 @@ -"""Tests for cross-agent-status (TDD: tests written before implementation).""" - -from __future__ import annotations - -import json -import os -import subprocess -import textwrap -from pathlib import Path - -import pytest - -SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-status" - - -def _make_msg(path: Path, *, conv_id: str, seq: int, msg_type: str = "request", - proto_version: str = "5", timestamp: str = "2026-04-27T05:00:00-05:00") -> Path: - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(textwrap.dedent(f"""\ - #+TITLE: T - #+CONVERSATION_ID: {conv_id} - #+MESSAGE_TYPE: {msg_type} - #+SEQUENCE: {seq} - #+TIMESTAMP: {timestamp} - #+PROTOCOL_VERSION: {proto_version} - - Body. - """)) - return path - - -def _run(args: list[str], env: dict | None = None) -> subprocess.CompletedProcess: - return subprocess.run([str(SCRIPT), *args], capture_output=True, text=True, env=env) - - -@pytest.fixture -def fake_projects(tmp_path, monkeypatch): - """Create a fake ~/projects/<name>/inbox/from-agents/ tree under tmp_path.""" - home = tmp_path / "home" - home.mkdir() - monkeypatch.setenv("HOME", str(home)) - return home - - -def test_status_help(fake_projects): - result = _run(["--help"], env={**os.environ, "HOME": str(fake_projects)}) - assert result.returncode == 0 - assert "snapshot" in result.stdout.lower() or "pending" in result.stdout.lower() - - -def test_status_no_projects_clean_output(fake_projects): - result = _run([], env={**os.environ, "HOME": str(fake_projects)}) - assert result.returncode == 0 - # Empty machine prints either header-only table or "no projects" — accept either. - # No crash, no pending claims. - assert "pending" in result.stdout.lower() or result.stdout.strip() == "" - - -def test_status_one_pending_shows_up(fake_projects): - inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents" - _make_msg(inbox / "20260427T100000Z-from-career-fixup.org", conv_id="fixup", seq=1) - result = _run([], env={**os.environ, "HOME": str(fake_projects)}) - assert result.returncode == 0 - assert "homelab" in result.stdout - assert "1" in result.stdout # pending count - assert "20260427T100000Z-from-career-fixup.org" in result.stdout - - -def test_status_released_conversation_zero_pending(fake_projects): - """A conversation with a release message in it counts as 0 pending.""" - inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents" - _make_msg(inbox / "20260427T100000Z-from-career-done.org", conv_id="done", seq=1) - _make_msg(inbox / "20260427T100100Z-from-homelab-done.org", conv_id="done", seq=2, msg_type="release") - result = _run([], env={**os.environ, "HOME": str(fake_projects)}) - assert result.returncode == 0 - # Check the homelab row shows 0 pending. - lines = [ln for ln in result.stdout.splitlines() if "homelab" in ln] - # At least one homelab line should show 0 pending or "—". - assert any("0" in ln or "—" in ln for ln in lines) - - -def test_status_partial_release(fake_projects): - """Conversation with release + a later message → that later message counts as pending.""" - inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents" - _make_msg(inbox / "20260427T100000Z-from-career-x.org", conv_id="x", seq=1, - timestamp="2026-04-27T05:00:00-05:00") - _make_msg(inbox / "20260427T100100Z-from-homelab-x.org", conv_id="x", seq=2, msg_type="release", - timestamp="2026-04-27T05:01:00-05:00") - # New message AFTER release: starts a fresh thread that's pending. - _make_msg(inbox / "20260427T200000Z-from-career-x.org", conv_id="x", seq=3, - timestamp="2026-04-27T15:00:00-05:00") - result = _run([], env={**os.environ, "HOME": str(fake_projects)}) - assert result.returncode == 0 - homelab_line = next(ln for ln in result.stdout.splitlines() if "homelab" in ln) - assert "1" in homelab_line # the post-release message is pending - - -def test_status_multiple_projects(fake_projects): - inbox_a = fake_projects / "projects" / "homelab" / "inbox" / "from-agents" - inbox_b = fake_projects / "projects" / "career" / "inbox" / "from-agents" - _make_msg(inbox_a / "20260427T100000Z-from-x-a.org", conv_id="a", seq=1) - _make_msg(inbox_b / "20260427T100100Z-from-x-b.org", conv_id="b", seq=1) - _make_msg(inbox_b / "20260427T100200Z-from-x-c.org", conv_id="c", seq=1) - result = _run([], env={**os.environ, "HOME": str(fake_projects)}) - assert result.returncode == 0 - # career has 2 pending, homelab has 1. - career_line = next(ln for ln in result.stdout.splitlines() if "career" in ln) - homelab_line = next(ln for ln in result.stdout.splitlines() if "homelab" in ln) - assert "2" in career_line - assert "1" in homelab_line - - -def test_status_json_output(fake_projects): - inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents" - _make_msg(inbox / "20260427T100000Z-from-career-test.org", conv_id="test", seq=1) - result = _run(["--json"], env={**os.environ, "HOME": str(fake_projects)}) - assert result.returncode == 0 - payload = json.loads(result.stdout) - assert "projects" in payload - assert isinstance(payload["projects"], list) - homelab = next((p for p in payload["projects"] if p["name"] == "homelab"), None) - assert homelab is not None - assert homelab["pending_count"] == 1 - - -def test_status_sort_pending_first(fake_projects): - """Projects with pending messages sort before projects with 0.""" - (fake_projects / "projects" / "alpha" / "inbox" / "from-agents").mkdir(parents=True) - inbox_zeta = fake_projects / "projects" / "zeta" / "inbox" / "from-agents" - _make_msg(inbox_zeta / "20260427T100000Z-from-x-z.org", conv_id="z", seq=1) - result = _run([], env={**os.environ, "HOME": str(fake_projects)}) - assert result.returncode == 0 - lines = result.stdout.splitlines() - zeta_idx = next(i for i, ln in enumerate(lines) if "zeta" in ln) - alpha_idx = next(i for i, ln in enumerate(lines) if "alpha" in ln) - assert zeta_idx < alpha_idx, "pending project should sort before zero-pending project" - - -def test_status_halt_shows_banner(fake_projects): - halt = fake_projects / ".config" / "cross-agent-comms" / "HALT" - halt.parent.mkdir(parents=True) - halt.write_text("halted for test") - inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents" - _make_msg(inbox / "20260427T100000Z-from-x-x.org", conv_id="x", seq=1) - result = _run([], env={**os.environ, "HOME": str(fake_projects)}) - assert result.returncode == 0 # status continues to print under HALT - assert "HALT" in result.stdout - # Banner should mention the reason. - assert "halted for test" in result.stdout - - -def test_status_projects_glob_override(fake_projects): - inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents" - _make_msg(inbox / "20260427T100000Z-from-x-a.org", conv_id="a", seq=1) - other_inbox = fake_projects / "projects" / "career" / "inbox" / "from-agents" - _make_msg(other_inbox / "20260427T100100Z-from-x-b.org", conv_id="b", seq=1) - # Glob limits to homelab only. - result = _run( - ["--projects-glob", str(fake_projects / "projects" / "homelab" / "inbox" / "from-agents") + "/"], - env={**os.environ, "HOME": str(fake_projects)}, - ) - assert result.returncode == 0 - assert "homelab" in result.stdout - # career not in scope. - assert "career" not in result.stdout diff --git a/.ai/scripts/tests/test_cross_agent_watch.py b/.ai/scripts/tests/test_cross_agent_watch.py deleted file mode 100644 index 417cc19..0000000 --- a/.ai/scripts/tests/test_cross_agent_watch.py +++ /dev/null @@ -1,155 +0,0 @@ -"""Tests for cross-agent-watch. - -Black-box: spawn the script, drop files into a watched dir, read the log. -Tests use --no-notify to avoid firing real desktop notifications. -""" - -from __future__ import annotations - -import os -import subprocess -import time -from pathlib import Path - -import pytest - -SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-watch" - - -def _spawn(watched_dir: Path, log_path: Path, env: dict) -> subprocess.Popen: - return subprocess.Popen( - [ - str(SCRIPT), - "--projects-glob", str(watched_dir) + "/", - "--log", str(log_path), - "--no-notify", - "--quiet", - ], - stdout=subprocess.DEVNULL, - stderr=subprocess.PIPE, - env=env, - ) - - -def _wait_for_log_lines(log_path: Path, expected: int, timeout: float = 5.0) -> list[str]: - deadline = time.time() + timeout - while time.time() < deadline: - if log_path.exists(): - lines = [ln for ln in log_path.read_text().splitlines() if ln] - if len(lines) >= expected: - return lines - time.sleep(0.1) - if log_path.exists(): - return [ln for ln in log_path.read_text().splitlines() if ln] - return [] - - -@pytest.fixture -def isolated_env(tmp_path, monkeypatch): - fake_home = tmp_path / "home" - fake_home.mkdir() - monkeypatch.setenv("HOME", str(fake_home)) - return fake_home - - -def test_watch_help(isolated_env): - result = subprocess.run( - [str(SCRIPT), "--help"], - capture_output=True, text=True, - env={**os.environ, "HOME": str(isolated_env)}, - ) - assert result.returncode == 0 - assert "Usage:" in result.stdout - - -def test_watch_empty_glob_exits_nonzero(isolated_env): - """Glob resolving to zero dirs should exit non-zero with a clear message.""" - result = subprocess.run( - [str(SCRIPT), "--projects-glob", "/nonexistent/path/*/foo/", "--no-notify", "--quiet"], - capture_output=True, text=True, - env={**os.environ, "HOME": str(isolated_env)}, - timeout=3, - ) - assert result.returncode != 0 - assert "0 directories" in result.stderr - - -def test_watch_logs_org_file_create(isolated_env, tmp_path): - watched = tmp_path / "watched" - watched.mkdir() - log = tmp_path / "watch.log" - proc = _spawn(watched, log, {**os.environ, "HOME": str(isolated_env)}) - try: - # Give inotifywait a moment to attach. - time.sleep(0.3) - (watched / "test-msg.org").write_text("hello") - lines = _wait_for_log_lines(log, expected=1, timeout=3.0) - assert len(lines) >= 1 - assert "test-msg.org" in lines[-1] - finally: - proc.terminate() - proc.wait(timeout=2) - - -def test_watch_filters_tmp_files(isolated_env, tmp_path): - """Files starting with .tmp. must NOT trigger log entries.""" - watched = tmp_path / "watched" - watched.mkdir() - log = tmp_path / "watch.log" - proc = _spawn(watched, log, {**os.environ, "HOME": str(isolated_env)}) - try: - time.sleep(0.3) - (watched / ".tmp.staging-file.org").write_text("hello") - # Wait briefly to confirm nothing logs. - time.sleep(0.5) - if log.exists(): - content = log.read_text() - assert ".tmp.staging-file" not in content - # Then drop a real file to confirm watcher is alive. - (watched / "real.org").write_text("real") - lines = _wait_for_log_lines(log, expected=1, timeout=3.0) - assert any("real.org" in ln for ln in lines) - finally: - proc.terminate() - proc.wait(timeout=2) - - -def test_watch_filters_asc_sidecars(isolated_env, tmp_path): - """Only .org events fire; .asc sidecars are silent.""" - watched = tmp_path / "watched" - watched.mkdir() - log = tmp_path / "watch.log" - proc = _spawn(watched, log, {**os.environ, "HOME": str(isolated_env)}) - try: - time.sleep(0.3) - (watched / "msg.org.asc").write_text("sig") - time.sleep(0.5) - if log.exists(): - assert "msg.org.asc" not in log.read_text() - # .org event still works. - (watched / "msg.org").write_text("body") - lines = _wait_for_log_lines(log, expected=1, timeout=3.0) - assert any(ln.endswith("msg.org") for ln in lines) - finally: - proc.terminate() - proc.wait(timeout=2) - - -def test_watch_halt_suppresses_but_logs(isolated_env, tmp_path): - """When HALT is set, watcher logs the event with (suppressed by HALT) marker.""" - halt = isolated_env / ".config" / "cross-agent-comms" / "HALT" - halt.parent.mkdir(parents=True) - halt.write_text("halted") - watched = tmp_path / "watched" - watched.mkdir() - log = tmp_path / "watch.log" - proc = _spawn(watched, log, {**os.environ, "HOME": str(isolated_env)}) - try: - time.sleep(0.3) - (watched / "halted-event.org").write_text("body") - lines = _wait_for_log_lines(log, expected=1, timeout=3.0) - assert len(lines) >= 1 - assert "suppressed by HALT" in lines[-1] - finally: - proc.terminate() - proc.wait(timeout=2) |
