aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-x.ai/scripts/cross-agent-comms/cross-agent-discover230
-rw-r--r--.ai/scripts/cross-agent-comms/cross-agent-discover.md155
-rwxr-xr-x.ai/scripts/cross-agent-comms/cross-agent-halt134
-rw-r--r--.ai/scripts/cross-agent-comms/cross-agent-halt.md134
-rwxr-xr-x.ai/scripts/cross-agent-comms/cross-agent-recv250
-rw-r--r--.ai/scripts/cross-agent-comms/cross-agent-recv.md218
-rwxr-xr-x.ai/scripts/cross-agent-comms/cross-agent-resume145
-rw-r--r--.ai/scripts/cross-agent-comms/cross-agent-resume.md117
-rwxr-xr-x.ai/scripts/cross-agent-comms/cross-agent-send356
-rw-r--r--.ai/scripts/cross-agent-comms/cross-agent-send.md199
-rwxr-xr-x.ai/scripts/cross-agent-comms/cross-agent-status185
-rw-r--r--.ai/scripts/cross-agent-comms/cross-agent-status.md139
-rwxr-xr-x.ai/scripts/cross-agent-comms/cross-agent-watch106
-rw-r--r--.ai/scripts/cross-agent-comms/cross-agent-watch.md130
-rw-r--r--.ai/scripts/tests/test_cross_agent_discover.py204
-rw-r--r--.ai/scripts/tests/test_cross_agent_halt.py204
-rw-r--r--.ai/scripts/tests/test_cross_agent_recv.py176
-rw-r--r--.ai/scripts/tests/test_cross_agent_send.py210
-rw-r--r--.ai/scripts/tests/test_cross_agent_status.py165
-rw-r--r--.ai/scripts/tests/test_cross_agent_watch.py155
-rw-r--r--.ai/sessions/2026-06-16-23-37-cross-agent-comms-removal-and-batch-specs.org194
-rw-r--r--.ai/workflows/INDEX.org1
-rw-r--r--.ai/workflows/cross-agent-comms.org334
-rw-r--r--.ai/workflows/helper-mode.org2
-rw-r--r--.ai/workflows/startup.org17
-rwxr-xr-xclaude-templates/.ai/scripts/cross-agent-comms/cross-agent-discover230
-rw-r--r--claude-templates/.ai/scripts/cross-agent-comms/cross-agent-discover.md155
-rwxr-xr-xclaude-templates/.ai/scripts/cross-agent-comms/cross-agent-halt134
-rw-r--r--claude-templates/.ai/scripts/cross-agent-comms/cross-agent-halt.md134
-rwxr-xr-xclaude-templates/.ai/scripts/cross-agent-comms/cross-agent-recv250
-rw-r--r--claude-templates/.ai/scripts/cross-agent-comms/cross-agent-recv.md218
-rwxr-xr-xclaude-templates/.ai/scripts/cross-agent-comms/cross-agent-resume145
-rw-r--r--claude-templates/.ai/scripts/cross-agent-comms/cross-agent-resume.md117
-rwxr-xr-xclaude-templates/.ai/scripts/cross-agent-comms/cross-agent-send356
-rw-r--r--claude-templates/.ai/scripts/cross-agent-comms/cross-agent-send.md199
-rwxr-xr-xclaude-templates/.ai/scripts/cross-agent-comms/cross-agent-status185
-rw-r--r--claude-templates/.ai/scripts/cross-agent-comms/cross-agent-status.md139
-rwxr-xr-xclaude-templates/.ai/scripts/cross-agent-comms/cross-agent-watch106
-rw-r--r--claude-templates/.ai/scripts/cross-agent-comms/cross-agent-watch.md130
-rw-r--r--claude-templates/.ai/scripts/tests/test_cross_agent_discover.py204
-rw-r--r--claude-templates/.ai/scripts/tests/test_cross_agent_halt.py204
-rw-r--r--claude-templates/.ai/scripts/tests/test_cross_agent_recv.py176
-rw-r--r--claude-templates/.ai/scripts/tests/test_cross_agent_send.py210
-rw-r--r--claude-templates/.ai/scripts/tests/test_cross_agent_status.py165
-rw-r--r--claude-templates/.ai/scripts/tests/test_cross_agent_watch.py155
-rw-r--r--claude-templates/.ai/workflows/INDEX.org1
-rw-r--r--claude-templates/.ai/workflows/cross-agent-comms.org334
-rw-r--r--claude-templates/.ai/workflows/helper-mode.org2
-rw-r--r--claude-templates/.ai/workflows/startup.org17
-rw-r--r--docs/design/2026-05-28-generic-agent-runtime-spec.org4
50 files changed, 214 insertions, 7916 deletions
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-discover b/.ai/scripts/cross-agent-comms/cross-agent-discover
deleted file mode 100755
index 152cf27..0000000
--- a/.ai/scripts/cross-agent-comms/cross-agent-discover
+++ /dev/null
@@ -1,230 +0,0 @@
-#!/usr/bin/env python3
-"""Enumerate cross-agent destinations: local projects + tailnet peers.
-
-See cross-agent-discover.md. Local: scan ~/projects/*/.ai/. Peers: read
-peers.toml, SSH-probe each for reachability. --enumerate-remote optionally
-runs `ls -d ~/projects/*/.ai/` over SSH to list remote projects.
-
-Cache results for 5 min at ~/.cache/cross-agent-comms/discovery.json so
-repeated invocations don't re-probe.
-
-HALT: prints a banner; otherwise continues.
-"""
-
-from __future__ import annotations
-
-import argparse
-import datetime as _dt
-import json
-import os
-import subprocess
-import sys
-import time
-import tomllib
-from pathlib import Path
-
-CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms"
-PEERS_TOML = CONFIG_DIR / "peers.toml"
-HALT_FILE = CONFIG_DIR / "HALT"
-CACHE_DIR = Path.home() / ".cache" / "cross-agent-comms"
-CACHE_FILE = CACHE_DIR / "discovery.json"
-CACHE_TTL_SECONDS = 300
-
-EXIT_OK = 0
-EXIT_GENERAL = 1
-EXIT_PEERS_TOML = 1
-
-
-def err(msg: str) -> None:
- print(msg, file=sys.stderr)
-
-
-def render_banner_if_halt() -> None:
- if not HALT_FILE.exists():
- return
- try:
- reason = HALT_FILE.read_text().strip()
- except OSError:
- reason = "(HALT file unreadable; treated as halted)"
- print("⚠ HALT ACTIVE — cross-agent comms paused")
- if reason:
- print(f" reason: {reason}")
- print()
-
-
-def enumerate_local_projects() -> list[str]:
- projects_dir = Path.home() / "projects"
- if not projects_dir.is_dir():
- return []
- found = []
- for child in sorted(projects_dir.iterdir()):
- if child.is_dir() and (child / ".ai").is_dir():
- found.append(child.name)
- return found
-
-
-def load_peers() -> dict:
- if not PEERS_TOML.exists():
- return {"peers": {}}
- try:
- return tomllib.loads(PEERS_TOML.read_text())
- except (tomllib.TOMLDecodeError, OSError) as e:
- err(f"cannot parse peers.toml: {e}")
- sys.exit(EXIT_PEERS_TOML)
-
-
-def probe_peer_reachability(host: str, ssh_user: str | None) -> tuple[bool, str | None]:
- """Run a short SSH probe with BatchMode=yes (no interactive prompt)."""
- target = f"{ssh_user}@{host}" if ssh_user else host
- try:
- result = subprocess.run(
- ["ssh", "-o", "ConnectTimeout=2", "-o", "BatchMode=yes", target, "true"],
- capture_output=True,
- text=True,
- timeout=5,
- )
- except (FileNotFoundError, subprocess.TimeoutExpired):
- return False, "ssh probe failed"
- if result.returncode == 0:
- return True, None
- return False, (result.stderr.strip().splitlines() or [f"exit {result.returncode}"])[-1]
-
-
-def enumerate_remote_projects(host: str, ssh_user: str | None) -> list[str] | None:
- target = f"{ssh_user}@{host}" if ssh_user else host
- try:
- result = subprocess.run(
- [
- "ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes", target,
- "ls -d ~/projects/*/.ai/ 2>/dev/null",
- ],
- capture_output=True,
- text=True,
- timeout=10,
- )
- except (FileNotFoundError, subprocess.TimeoutExpired):
- return None
- if result.returncode != 0:
- return None
- projects = []
- for line in result.stdout.splitlines():
- # Each line looks like /home/<user>/projects/<name>/.ai/
- parts = line.rstrip("/").split("/")
- if len(parts) >= 2 and parts[-1] == ".ai":
- projects.append(parts[-2])
- return projects
-
-
-def read_cache() -> dict | None:
- if not CACHE_FILE.exists():
- return None
- try:
- age = time.time() - CACHE_FILE.stat().st_mtime
- if age > CACHE_TTL_SECONDS:
- return None
- return json.loads(CACHE_FILE.read_text())
- except (OSError, json.JSONDecodeError):
- return None
-
-
-def write_cache(payload: dict) -> None:
- CACHE_DIR.mkdir(parents=True, exist_ok=True)
- CACHE_FILE.write_text(json.dumps(payload, indent=2))
-
-
-def discover(peer_filter: str | None, enumerate_remote: bool) -> dict:
- local = enumerate_local_projects()
- peers_cfg = load_peers().get("peers", {})
-
- peers_out = []
- for name, cfg in sorted(peers_cfg.items()):
- if peer_filter and name != peer_filter:
- continue
- host = cfg.get("host", name)
- ssh_user = cfg.get("ssh_user")
- reachable, error = probe_peer_reachability(host, ssh_user)
- entry = {
- "name": name,
- "host": host,
- "reachable": reachable,
- }
- if not reachable:
- entry["error"] = error
- if enumerate_remote and reachable:
- entry["projects"] = enumerate_remote_projects(host, ssh_user) or []
- peers_out.append(entry)
-
- return {
- "scanned_at": _dt.datetime.now(_dt.timezone.utc).isoformat(),
- "halt_active": HALT_FILE.exists(),
- "local": local,
- "peers": peers_out,
- }
-
-
-def render_table(payload: dict, enumerate_remote: bool) -> None:
- local = payload.get("local", [])
- print(f"Local ({_local_hostname()}):")
- if local:
- wrapped = ", ".join(local)
- print(f" {wrapped} [{len(local)} project{'s' if len(local) != 1 else ''}]")
- else:
- print(" (no projects with .ai/ found)")
- print()
-
- peers = payload.get("peers", [])
- if not peers:
- print("Peers (from peers.toml):")
- print(" (no peers configured)")
- return
-
- print("Peers (from ~/.config/cross-agent-comms/peers.toml):")
- for p in peers:
- marker = "✓ reachable" if p.get("reachable") else f"✗ UNREACHABLE ({p.get('error', 'unknown')})"
- print(f" {p['name']:<16} {p['host']:<24} {marker}")
- if enumerate_remote and p.get("projects"):
- wrapped = ", ".join(p["projects"])
- print(f" projects: {wrapped}")
-
-
-def _local_hostname() -> str:
- import socket
- return socket.gethostname().split(".")[0]
-
-
-def main() -> int:
- parser = argparse.ArgumentParser(description="Discover cross-agent destinations.")
- parser.add_argument("--enumerate-remote", action="store_true",
- help="SSH into each peer and list ~/projects/*/.ai/")
- parser.add_argument("--no-cache", action="store_true", help="Skip cache; force fresh probe")
- parser.add_argument("--peer", help="Limit to a single peer name from peers.toml")
- parser.add_argument("--json", action="store_true", help="Machine-readable output")
- args = parser.parse_args()
-
- render_banner_if_halt()
-
- payload = None
- if not args.no_cache:
- cached = read_cache()
- if cached is not None:
- # Honor --peer filter on cached payload.
- if args.peer:
- cached["peers"] = [p for p in cached.get("peers", []) if p["name"] == args.peer]
- payload = cached
-
- if payload is None:
- payload = discover(args.peer, args.enumerate_remote)
- if not args.no_cache and not args.peer:
- # Only cache full (unfiltered) discoveries.
- write_cache(payload)
-
- if args.json:
- print(json.dumps(payload, indent=2))
- return EXIT_OK
-
- render_table(payload, args.enumerate_remote)
- return EXIT_OK
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-discover.md b/.ai/scripts/cross-agent-comms/cross-agent-discover.md
deleted file mode 100644
index 95134bb..0000000
--- a/.ai/scripts/cross-agent-comms/cross-agent-discover.md
+++ /dev/null
@@ -1,155 +0,0 @@
-# cross-agent-discover
-
-**Purpose.** Enumerate available cross-agent destinations — local projects on
-this machine and remote projects on tailnet peers. Validates SSH reachability
-for cross-machine destinations before reporting them as usable.
-
-## Usage
-
-```
-cross-agent-discover [--enumerate-remote] [--no-cache] [--peer <name>]
-```
-
-No args required for the common case (local enumeration + peer reachability).
-
-### Flags
-
-| Flag | Default | Purpose |
-|---|---|---|
-| `--enumerate-remote` | off | SSH into each peer and list projects under `~/projects/*/.ai/`. Off by default because SSH adds latency; turn on when you want to see what's available on a remote machine you haven't fully configured. |
-| `--no-cache` | off | Skip the 5-minute cache; force fresh discovery. |
-| `--peer <name>` | (all) | Limit to a single peer from `peers.toml`. |
-| `--json` | off | Machine-readable output. |
-
-## Output
-
-### Default
-
-```
-$ cross-agent-discover
-Local (ratio):
- career, claude-templates, clipper, danneel, documents, elibrary,
- finances, health, homelab, jr-estate, kit, little-elisper,
- philosophy, website [14 projects]
-
-Peers (from ~/.config/cross-agent-comms/peers.toml):
- velox.local reachable (last seen 2 sec ago)
- bastion.local UNREACHABLE (ssh exit 255: connection refused)
-```
-
-### With `--enumerate-remote`
-
-```
-$ cross-agent-discover --enumerate-remote
-Local (ratio):
- ... (as above)
-
-velox.local (reachable):
- career, homelab [2 projects]
-```
-
-## Configuration
-
-Reads `~/.config/cross-agent-comms/peers.toml`:
-
-```toml
-# Each peer is a remote machine reachable via SSH (typically over Tailscale).
-
-[peers.velox]
-host = "velox.local"
-ssh_user = "cjennings"
-
-[peers.bastion]
-host = "bastion.local"
-ssh_user = "cjennings"
-```
-
-Peers entries describe machines, NOT projects. Projects are enumerated
-on-demand under `~/projects/*/.ai/` either locally or via SSH.
-
-## Cache
-
-Successful discovery results are cached at
-`~/.cache/cross-agent-comms/discovery.json` for 5 minutes. Repeated invocations
-within the window read from cache.
-
-`--no-cache` forces a fresh probe. Useful when adding a new peer or after a
-network change.
-
-## SSH reachability check
-
-For each peer, runs:
-
-```
-ssh -o ConnectTimeout=2 -o BatchMode=yes <user>@<host> true
-```
-
-`BatchMode=yes` prevents interactive password prompts — peers that don't have
-key-based auth set up are reported as UNREACHABLE.
-
-If `--enumerate-remote` is set, on success runs:
-
-```
-ssh <user>@<host> 'ls -d ~/projects/*/.ai/ 2>/dev/null'
-```
-
-## Failure modes
-
-| Symptom | Likely cause | Fix |
-|---|---|---|
-| Peer reported UNREACHABLE | Tailscale not connected, SSH key not authorized, host firewalled | `tailscale status`; `ssh -v <peer>` to debug. |
-| Local list is empty | Glob misresolved, or `~/projects/` doesn't exist | Check `ls -d ~/projects/*/.ai/`. |
-| `--enumerate-remote` slow | Cold cache, slow tailnet, many peers | First run is slow, subsequent runs hit cache. Use `--peer <name>` to scope. |
-| Peer unexpectedly missing from output | Not in `peers.toml`, or `peers.toml` malformed | `cat ~/.config/cross-agent-comms/peers.toml` and validate. |
-
-## HALT awareness
-
-Checks `~/.config/cross-agent-comms/HALT` at start. If HALT exists, prints a
-prominent banner before normal output:
-
-```
-$ cross-agent-discover
-⚠ HALT ACTIVE — cross-agent comms paused
- Reason: <reason from HALT file body, if any>
- Resume with: cross-agent-resume
-
-(enumeration continues normally — HALT does not suppress visibility)
-
-Local (ratio):
- career, claude-templates, ...
-
-Peers:
- velox.local reachable
-```
-
-Discover is read-only. Like `cross-agent-status`, it always runs so the user
-keeps visibility into what destinations exist regardless of halt state. The
-banner makes the halt state impossible to miss.
-
-If the HALT file exists but is unreadable, print a warning banner and
-continue.
-
-See `cross-agent-halt.md` for the full halt mechanism.
-
-## Examples
-
-```bash
-# Common: see what's available
-cross-agent-discover
-
-# Force fresh probe after network change
-cross-agent-discover --no-cache
-
-# What's on velox specifically
-cross-agent-discover --peer velox --enumerate-remote
-
-# Pipe to grep
-cross-agent-discover --json | jq '.peers[] | select(.reachable)'
-```
-
-## See also
-
-- `cross-agent-send` — uses `peers.toml` for routing destinations.
-- `cross-agent-status` — local pending messages.
-- `cross-agent-comms.org` — protocol spec, `* Limitations` section
- explains the cross-machine model.
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-halt b/.ai/scripts/cross-agent-comms/cross-agent-halt
deleted file mode 100755
index df25115..0000000
--- a/.ai/scripts/cross-agent-comms/cross-agent-halt
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/usr/bin/env python3
-"""Failsafe halt for cross-agent comms.
-
-See cross-agent-halt.md. Touches ~/.config/cross-agent-comms/HALT and stops
-the cross-agent-watch systemd user service. With --tailnet, propagates the
-HALT file to every peer in peers.toml via SSH; reports per-peer status with
-non-zero exit on partial halt.
-
-Does NOT pkill in-flight scripts — they detect HALT on next iteration and
-stop themselves.
-"""
-
-from __future__ import annotations
-
-import argparse
-import subprocess
-import sys
-import tomllib
-from pathlib import Path
-
-CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms"
-HALT_FILE = CONFIG_DIR / "HALT"
-PEERS_TOML = CONFIG_DIR / "peers.toml"
-
-EXIT_OK = 0
-EXIT_PARTIAL = 1
-
-
-def err(msg: str) -> None:
- print(msg, file=sys.stderr)
-
-
-def write_halt_file(reason: str) -> None:
- CONFIG_DIR.mkdir(parents=True, exist_ok=True)
- HALT_FILE.write_text((reason + "\n") if reason else "")
-
-
-def stop_watcher_service() -> None:
- """Best-effort stop of the systemd watcher service. Failures are logged but not fatal."""
- try:
- subprocess.run(
- ["systemctl", "--user", "stop", "cross-agent-watch.path"],
- capture_output=True, text=True, timeout=5,
- )
- except (FileNotFoundError, subprocess.TimeoutExpired):
- # Watcher service may not be installed — fine.
- pass
-
-
-def load_peers() -> dict:
- if not PEERS_TOML.exists():
- return {}
- try:
- return tomllib.loads(PEERS_TOML.read_text())
- except (tomllib.TOMLDecodeError, OSError) as e:
- err(f"cannot parse peers.toml: {e}")
- return {}
-
-
-def ssh_touch_halt(host: str, ssh_user: str | None, reason: str) -> tuple[bool, str]:
- target = f"{ssh_user}@{host}" if ssh_user else host
- # Build the remote command. Quote the reason carefully.
- remote_cmd = (
- f"mkdir -p ~/.config/cross-agent-comms && "
- f"printf %s {_sh_quote(reason)} > ~/.config/cross-agent-comms/HALT"
- )
- try:
- result = subprocess.run(
- ["ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes", target, remote_cmd],
- capture_output=True, text=True, timeout=10,
- )
- except (FileNotFoundError, subprocess.TimeoutExpired):
- return False, "ssh unavailable or timed out"
- if result.returncode == 0:
- return True, "HALT file written"
- return False, (result.stderr.strip().splitlines() or [f"exit {result.returncode}"])[-1]
-
-
-def _sh_quote(s: str) -> str:
- return "'" + s.replace("'", "'\"'\"'") + "'"
-
-
-def main() -> int:
- parser = argparse.ArgumentParser(description="Halt all cross-agent comms on this machine (and optionally tailnet).")
- parser.add_argument("reason", nargs="?", default="", help="Optional human-readable reason")
- parser.add_argument("--tailnet", action="store_true",
- help="Propagate HALT to every peer in peers.toml")
- args = parser.parse_args()
-
- # Local halt.
- write_halt_file(args.reason)
- stop_watcher_service()
- print("Halting locally ✓ (HALT file written)")
-
- if not args.tailnet:
- print()
- print(f"Halt active. Remove {HALT_FILE} or run cross-agent-resume to clear.")
- print("Agent polling will stop within ~5 min (one cadence cycle).")
- return EXIT_OK
-
- peers = load_peers().get("peers", {})
- if not peers:
- print()
- print("No peers configured in peers.toml — local-only halt complete.")
- return EXIT_OK
-
- print()
- successes = 1 # local already counted
- failures = []
- for name, cfg in sorted(peers.items()):
- host = cfg.get("host", name)
- ssh_user = cfg.get("ssh_user")
- ok, detail = ssh_touch_halt(host, ssh_user, args.reason)
- marker = "✓" if ok else "✗"
- print(f"Halting {host:<28} {marker} ({detail})")
- if ok:
- successes += 1
- else:
- failures.append(f"{name} ({host}): {detail}")
-
- print()
- total = len(peers) + 1
- if failures:
- print(f"PARTIAL HALT: {successes}/{total} machines halted.")
- for f in failures:
- print(f" - {f}")
- print("Resolve the failures or manually halt each machine.")
- return EXIT_PARTIAL
- print(f"Halt active across {total} machine(s).")
- return EXIT_OK
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-halt.md b/.ai/scripts/cross-agent-comms/cross-agent-halt.md
deleted file mode 100644
index b817fbc..0000000
--- a/.ai/scripts/cross-agent-comms/cross-agent-halt.md
+++ /dev/null
@@ -1,134 +0,0 @@
-# cross-agent-halt
-
-**Purpose.** Failsafe stop for all cross-agent activity on the local machine
-(or, with `--tailnet`, across all configured peers). Creates the HALT file
-that every component in the protocol checks; within one polling cadence
-(~5 min) all polling, sending, watching, and receiving stops.
-
-This is the user's emergency brake. Use when something is misbehaving and
-visiting individual sessions is too slow.
-
-## Usage
-
-```
-cross-agent-halt [reason] [--tailnet] [--no-stop-watcher]
-```
-
-### Positional argument
-
-| Position | Meaning | Example |
-|---|---|---|
-| 1 | Optional human-readable reason for the halt. Written into the HALT file's body. Helps future-you remember why you stopped things. | `"investigating runaway poll loop, 2026-04-27"` |
-
-### Flags
-
-| Flag | Default | Purpose |
-|---|---|---|
-| `--tailnet` | local only | Propagate halt to every peer in `peers.toml` via SSH over Tailscale. |
-| `--no-stop-watcher` | (stops watcher) | Skip stopping the `cross-agent-watch.path` systemd unit. Useful if the watcher is intentionally separate from comms (rare). |
-
-## Behavior
-
-### Local halt (default)
-
-1. Write the HALT file: `~/.config/cross-agent-comms/HALT`. If a `[reason]` was
- passed, write it as the file's body. Otherwise the file is empty (existence
- alone triggers halt).
-2. Stop the watcher service: `systemctl --user stop cross-agent-watch.path`
- (and the corresponding `.service` if running).
-3. Print a summary:
- ```
- ✓ HALT file written: ~/.config/cross-agent-comms/HALT
- ✓ Watcher service stopped (cross-agent-watch.path)
- - In-flight sends will complete their current rsync step (~seconds), then
- stop. New sends are blocked.
- - Active agent polling sessions stop within one cadence (~5 min).
- - Use `cross-agent-resume` to clear HALT.
- Per-session polling does NOT auto-resume — you re-engage each session by
- telling its agent to resume polling.
- ```
-4. Exit 0.
-
-### Cross-tailnet halt (`--tailnet`)
-
-1. Apply local halt steps 1-2 first.
-2. Read `peers.toml` for the list of remote machines.
-3. For each peer, SSH and write the HALT file:
- ```
- ssh <user>@<host> "echo '<reason>' > ~/.config/cross-agent-comms/HALT && \
- systemctl --user stop cross-agent-watch.path"
- ```
-4. Track per-peer success/failure. Print results:
- ```
- Halting velox.local ✓ (HALT file written)
- Halting bastion.local ✗ (ssh exit 255: no route to host)
- Halting locally ✓ (HALT file written)
-
- PARTIAL HALT: 2/3 machines halted. bastion.local needs manual halt.
- ```
-5. Exit 0 if all peers halted; exit 1 if any peer failed (so scripts can
- detect partial halt). The local halt always succeeds — even on `--tailnet`,
- if remote peers fail, local is still halted.
-
-## What "halt active" means for each component
-
-| Component | Behavior under HALT |
-|---|---|
-| `cross-agent-send` | Refuses to send. Exits 5 with "halt active; remove ~/.config/cross-agent-comms/HALT to resume." Checks HALT at start AND between each retry/rsync step, so an in-flight send completes its current step then stops. |
-| `cross-agent-recv` | Refuses to verify or dedup. Exits 5 with same message. Inbound files are **left in place** — not moved, not rejected — so resume picks them up cleanly via cold-start. |
-| `cross-agent-watch` | Continues running but suppresses notifications. Logs each event with `(suppressed by HALT)` so the operator can see what would have fired. |
-| `cross-agent-status` | Prints prominent `⚠ HALT ACTIVE` banner before normal output. Continues to enumerate (read-only). |
-| `cross-agent-discover` | Same banner. Continues (read-only). |
-| Agent polling loops | Check HALT on every wake. If set: write a final `progress` note to any active conversation ("HALT fired locally; pausing"), surface "(HALT active; cross-agent comms paused)" in every user response, and stop rescheduling. Polling decays naturally within one cadence. |
-| Conversation initiator | Refuses to write sequence 1 of any new conversation. Surfaces refusal to user. |
-| Startup workflow (Phase A) | Checks HALT at session boot. If set, surfaces immediately and skips cross-agent inbox checks. |
-
-## Failure modes
-
-| Symptom | Cause | Fix |
-|---|---|---|
-| `~/.config/cross-agent-comms/HALT` already exists | Halt was already active | OK — running halt again refreshes the reason text. Safe. |
-| `systemctl --user stop` fails | Watcher service not installed, or systemd not available | The HALT file is still written — components that check HALT will still stop. The systemctl failure surfaces as a non-fatal warning. |
-| `--tailnet` halts some peers but not others | One or more peers unreachable | Exit 1 with per-peer status. Manually halt the unreachable peers (visit each machine, `touch ~/.config/cross-agent-comms/HALT`), or fix the network and re-run. |
-| Permission denied writing the HALT file | `~/.config/cross-agent-comms/` doesn't exist or is owned by another user | `mkdir -p ~/.config/cross-agent-comms/`; check ownership. |
-
-## What halt does NOT do
-
-- Does not kill running Claude sessions. Polling stops within ~5 min, but the
- session itself stays alive and can be re-engaged after resume.
-- Does not delete pending messages. Inbound files in `inbox/from-agents/`
- remain; they get processed when polling resumes.
-- Does not abort in-flight rsync push mid-byte. Atomic-write semantics
- guarantee in-flight messages either complete cleanly or leave only `.tmp.*`
- files (which receivers ignore).
-
-## Examples
-
-```bash
-# Quick halt with no reason
-cross-agent-halt
-
-# Halt with a memo
-cross-agent-halt "runaway poll loop in homelab session, debugging"
-
-# Halt all tailnet peers + local
-cross-agent-halt --tailnet "shutting down for system update"
-
-# Halt protocol comms but leave the watcher service running
-cross-agent-halt --no-stop-watcher
-```
-
-## Recovery
-
-Always pair with `cross-agent-resume` when the situation is resolved:
-
-```bash
-cross-agent-resume # local
-cross-agent-resume --tailnet # all peers
-```
-
-## See also
-
-- `cross-agent-resume` — counterpart that clears HALT.
-- `cross-agent-status` — see HALT state at a glance.
-- `cross-agent-comms.org` — protocol spec, `* Halt mechanism` section.
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-recv b/.ai/scripts/cross-agent-comms/cross-agent-recv
deleted file mode 100755
index b67533a..0000000
--- a/.ai/scripts/cross-agent-comms/cross-agent-recv
+++ /dev/null
@@ -1,250 +0,0 @@
-#!/usr/bin/env python3
-"""Cross-agent message receiver.
-
-See cross-agent-recv.md for the full contract. Reads one message file and
-emits a structured decision the agent acts on:
-
- process | dedup | query | reject
-
-Decision exit codes:
- 0 = process 1 = dedup 2 = query 3 = reject
-
-When HALT is set, the script refuses to verify or dedup and leaves the
-inbound file in place — resume picks it up via cold-start.
-"""
-
-from __future__ import annotations
-
-import argparse
-import hashlib
-import json
-import re
-import shutil
-import subprocess
-import sys
-from pathlib import Path
-
-CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms"
-HALT_FILE = CONFIG_DIR / "HALT"
-EXPECTED_PROTOCOL_VERSION = "5"
-
-REQUIRED_FRONTMATTER = ["TITLE", "CONVERSATION_ID", "MESSAGE_TYPE", "SEQUENCE", "TIMESTAMP", "PROTOCOL_VERSION"]
-VALID_MESSAGE_TYPES = {"request", "progress", "query", "pushback", "complete", "release", "escalate"}
-
-DEC_PROCESS = "process"
-DEC_DEDUP = "dedup"
-DEC_QUERY = "query"
-DEC_REJECT = "reject"
-
-EXIT_FOR_DECISION = {
- DEC_PROCESS: 0,
- DEC_DEDUP: 1,
- DEC_QUERY: 2,
- DEC_REJECT: 3,
-}
-
-EXIT_HALT = 5
-
-
-def err(msg: str) -> None:
- print(msg, file=sys.stderr)
-
-
-def check_halt() -> None:
- if HALT_FILE.exists():
- try:
- reason = HALT_FILE.read_text().strip()
- except OSError:
- err("halt active (HALT file present but unreadable; treated as halted)")
- sys.exit(EXIT_HALT)
- msg = "halt active; leaving inbound message in place (resume will pick up)"
- if reason:
- msg = f"{msg}: {reason}"
- err(msg)
- sys.exit(EXIT_HALT)
-
-
-def parse_frontmatter(path: Path) -> dict[str, str]:
- try:
- text = path.read_text()
- except OSError as e:
- return {"_parse_error": f"cannot read: {e}"}
- fm: dict[str, str] = {}
- for line in text.splitlines():
- line = line.rstrip()
- if not line:
- if fm:
- break
- continue
- m = re.match(r"#\+([A-Z_]+):\s*(.*)", line)
- if m:
- fm[m.group(1)] = m.group(2).strip()
- elif fm:
- break
- return fm
-
-
-def emit_decision(
- decision: str,
- reason: str | None,
- fm: dict[str, str],
- sha256: str | None,
- args: argparse.Namespace,
-) -> int:
- payload = {
- "decision": decision,
- "reason": reason,
- "message_type": fm.get("MESSAGE_TYPE"),
- "conversation_id": fm.get("CONVERSATION_ID"),
- "sequence": fm.get("SEQUENCE"),
- "timestamp": fm.get("TIMESTAMP"),
- "sha256": sha256,
- }
- if args.json:
- print(json.dumps(payload, indent=None if args.compact_json else 2))
- else:
- print(f"decision: {decision}")
- if reason:
- print(f"reason: {reason}")
- for k in ("message_type", "conversation_id", "sequence", "timestamp"):
- v = payload[k]
- if v is not None:
- print(f"{k}: {v}")
- if sha256:
- print(f"sha256: {sha256}")
- return EXIT_FOR_DECISION[decision]
-
-
-def gpg_verify(message_path: Path, sig_path: Path) -> tuple[bool, str]:
- try:
- result = subprocess.run(
- ["gpg", "--verify", str(sig_path), str(message_path)],
- capture_output=True,
- text=True,
- )
- except FileNotFoundError:
- return False, "gpg not installed"
- if result.returncode == 0:
- return True, ""
- return False, result.stderr.strip().splitlines()[-1] if result.stderr.strip() else f"exit {result.returncode}"
-
-
-def sha256_of(path: Path) -> str:
- h = hashlib.sha256()
- with path.open("rb") as f:
- for chunk in iter(lambda: f.read(65536), b""):
- h.update(chunk)
- return h.hexdigest()
-
-
-def find_dedup_match(message_path: Path, fm: dict[str, str], my_hash: str) -> tuple[str, str | None]:
- """Scan the message's directory for same-CONVERSATION_ID/SEQUENCE files.
-
- Returns (decision, reason) — decision is DEC_DEDUP for an exact-hash match,
- or DEC_PROCESS when no match or hash differs (sequence collision is OK).
- """
- parent = message_path.parent
- conv_id = fm["CONVERSATION_ID"]
- sequence = fm["SEQUENCE"]
- for sibling in parent.iterdir():
- if sibling == message_path or not sibling.is_file() or sibling.suffix != ".org":
- continue
- sib_fm = parse_frontmatter(sibling)
- if sib_fm.get("CONVERSATION_ID") != conv_id or sib_fm.get("SEQUENCE") != sequence:
- continue
- # Same conv-id + same sequence — check hash.
- if sha256_of(sibling) == my_hash:
- return DEC_DEDUP, f"identical retry of {sibling.name}"
- return DEC_PROCESS, None
-
-
-def check_requires_tools(fm: dict[str, str]) -> tuple[bool, list[str]]:
- """REQUIRES_TOOLS is a comma-separated list of tool names.
-
- For v5, "tool available" is a heuristic: an executable on PATH whose name
- matches the tool slug. MCP availability is currently out of scope (no
- portable way to query it from a CLI).
- """
- tools_field = fm.get("REQUIRES_TOOLS")
- if not tools_field:
- return True, []
- tools = [t.strip() for t in tools_field.split(",") if t.strip()]
- missing = [t for t in tools if shutil.which(t) is None]
- return len(missing) == 0, missing
-
-
-def main() -> int:
- parser = argparse.ArgumentParser(description="Receive and decide on a cross-agent message.")
- parser.add_argument("message_file", type=Path)
- parser.add_argument("--no-verify", action="store_true", help="Skip GPG verification (testing only)")
- parser.add_argument("--no-dedup", action="store_true", help="Skip SHA-256 dedup against existing files")
- parser.add_argument("--protocol-version", default=EXPECTED_PROTOCOL_VERSION,
- help="Override expected protocol version (default: 5)")
- parser.add_argument("--json", action="store_true", help="Emit JSON output")
- parser.add_argument("--compact-json", action="store_true", help="Compact JSON (no indent)")
- args = parser.parse_args()
-
- check_halt()
-
- if not args.message_file.is_file():
- err(f"message file not found: {args.message_file}")
- return EXIT_FOR_DECISION[DEC_REJECT]
-
- fm = parse_frontmatter(args.message_file)
- if "_parse_error" in fm:
- return emit_decision(DEC_REJECT, fm["_parse_error"], {}, None, args)
-
- # Step 1: frontmatter sanity-check.
- missing = [k for k in REQUIRED_FRONTMATTER if k not in fm]
- if missing:
- return emit_decision(
- DEC_REJECT, f"frontmatter missing required fields: {', '.join(missing)}", fm, None, args
- )
- if fm["MESSAGE_TYPE"] not in VALID_MESSAGE_TYPES:
- return emit_decision(
- DEC_REJECT, f"invalid MESSAGE_TYPE: {fm['MESSAGE_TYPE']!r}", fm, None, args
- )
-
- # Step 2: PROTOCOL_VERSION check.
- if fm["PROTOCOL_VERSION"] != args.protocol_version:
- return emit_decision(
- DEC_QUERY,
- f"PROTOCOL_VERSION mismatch: expected {args.protocol_version}, got {fm['PROTOCOL_VERSION']}",
- fm,
- None,
- args,
- )
-
- # Step 3: GPG verify.
- if not args.no_verify:
- sig_path = args.message_file.with_suffix(args.message_file.suffix + ".asc")
- if not sig_path.is_file():
- return emit_decision(DEC_REJECT, f"signature file missing: {sig_path.name}", fm, None, args)
- ok, gpg_err = gpg_verify(args.message_file, sig_path)
- if not ok:
- return emit_decision(DEC_REJECT, f"gpg verify failed: {gpg_err}", fm, None, args)
-
- # Step 4: SHA-256 dedup.
- my_hash = sha256_of(args.message_file)
- if not args.no_dedup:
- decision, reason = find_dedup_match(args.message_file, fm, my_hash)
- if decision == DEC_DEDUP:
- return emit_decision(DEC_DEDUP, reason, fm, my_hash, args)
-
- # Step 5: REQUIRES_TOOLS check.
- ok, missing_tools = check_requires_tools(fm)
- if not ok:
- return emit_decision(
- DEC_QUERY,
- f"required tools unavailable: {', '.join(missing_tools)}",
- fm,
- my_hash,
- args,
- )
-
- # Step 6: process.
- return emit_decision(DEC_PROCESS, None, fm, my_hash, args)
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-recv.md b/.ai/scripts/cross-agent-comms/cross-agent-recv.md
deleted file mode 100644
index 247a27a..0000000
--- a/.ai/scripts/cross-agent-comms/cross-agent-recv.md
+++ /dev/null
@@ -1,218 +0,0 @@
-# cross-agent-recv
-
-**Purpose.** The canonical receiver-side processor. Reads a single incoming
-message file and reports a structured decision the agent acts on:
-process / dedup / query / reject.
-
-The script handles only mechanical checks (frontmatter, signature, dedup,
-version, tools). Substance-level decisions like `pushback` ("I disagree with
-this request") happen one layer up — after the agent reads the message body
-the script returns as `process`-able.
-
-This is the read-side counterpart to `cross-agent-send`. Together they are the
-two halves of the per-message contract. The agent's polling loop calls
-`cross-agent-recv` on every new file in `inbox/from-agents/` and dispatches on
-the decision.
-
-Without this script, every receiver implementation re-invents GPG verify +
-frontmatter sanity-check + SHA-256 dedup. With it, behavior is consistent
-across projects.
-
-## Usage
-
-```
-cross-agent-recv <message-file>
-```
-
-Single positional argument: a `.org` file in `inbox/from-agents/`. The matching
-`.asc` signature file must be present alongside it.
-
-### Flags
-
-| Flag | Default | Purpose |
-|---|---|---|
-| `--no-verify` | (verify on) | Skip GPG verification. Testing only. |
-| `--no-dedup` | (dedup on) | Skip SHA-256 dedup against existing files. Testing only. |
-| `--protocol-version <N>` | 5 | Override the expected protocol version. Useful for testing forward-compatibility checks. |
-| `--json` | off | Output decision as JSON for easier parsing by the agent. |
-
-## Behavior
-
-Runs the receiver checks in order. First failure determines the decision.
-
-### Step 1 — Frontmatter sanity-check
-
-Parse the message's org-mode frontmatter. Required fields:
-
-- `#+TITLE`
-- `#+CONVERSATION_ID`
-- `#+MESSAGE_TYPE` (must be one of: `request`, `progress`, `query`, `pushback`,
- `complete`, `release`, `escalate`)
-- `#+SEQUENCE` (integer)
-- `#+TIMESTAMP` (ISO 8601 with explicit offset)
-- `#+PROTOCOL_VERSION` (must match the expected version; default 5)
-
-Any required field missing, malformed, or the protocol version mismatched →
-decision = `reject` (frontmatter) or `query` (version mismatch — see below).
-
-### Step 2 — Protocol-version check
-
-If `PROTOCOL_VERSION` doesn't match the expected:
-
-- Decision = `query`. Action: receiver should write a `query` reply asking the
- sender to upgrade to the expected protocol version.
-
-### Step 3 — Signature verification
-
-Look for `<message-file>.asc` alongside the `.org`. If missing or `gpg
---verify` fails:
-
-- Decision = `reject` (signature). Surface to user; do not act.
-
-The `.asc` file MUST be present when the `.org` is — `cross-agent-send`
-guarantees this with its strict ordering (`.asc` lands first). If the `.asc`
-is missing despite the `.org` being present, the sender violated atomic-write
-ordering or the file was tampered with in transit.
-
-### Step 4 — SHA-256 dedup
-
-Compute SHA-256 of the message file. Scan the same directory for existing
-files matching `CONVERSATION_ID + SEQUENCE`:
-
-- No match → decision = `process` (new message, dispatch by type).
-- Match with **identical** SHA-256 → decision = `dedup` (silent retry; do not
- reprocess).
-- Match with **different** SHA-256 → decision = `process` (sequence collision
- with non-identical content; both are legitimate, ordered by `#+TIMESTAMP`).
-
-### Step 5 — REQUIRES_TOOLS optional check
-
-If the message has a `#+REQUIRES_TOOLS` field, verify each named tool/MCP is
-available in the receiver's environment.
-
-- All available → `process`.
-- One or more missing → decision = `query`. The agent should write a `query`
- reply naming the missing tools, asking the sender to reframe the request to
- avoid them.
-
-### Step 6 — Dispatch decision
-
-If all checks pass, decision = `process` with the parsed `MESSAGE_TYPE` so the
-agent's main loop knows which handler to invoke.
-
-## Output
-
-### Default (human-readable)
-
-```
-$ cross-agent-recv inbox/from-agents/20260427T091015Z-from-homelab-prep-fixup.org
-decision: process
-message_type: request
-conversation_id: prep-fixup
-sequence: 6
-sha256: a1b2c3d4...
-```
-
-### `--json`
-
-```json
-{
- "decision": "process",
- "reason": null,
- "message_type": "request",
- "conversation_id": "prep-fixup",
- "sequence": 6,
- "timestamp": "2026-04-27T04:11:42-05:00",
- "sha256": "a1b2c3d4..."
-}
-```
-
-For decisions other than `process`, `reason` carries a human-readable
-explanation:
-
-```json
-{
- "decision": "query",
- "reason": "PROTOCOL_VERSION mismatch: expected 5, got 4",
- "conversation_id": "prep-fixup",
- "sequence": 6
-}
-```
-
-## Decision exit codes
-
-| Decision | Exit code | Agent action |
-|---|---|---|
-| `process` | 0 | Dispatch to the message-type handler |
-| `dedup` | 1 | Silent — do nothing further |
-| `query` | 2 | Write a `query` reply (see `reason` for what to ask) |
-| `reject` | 3 | Surface to user; do not auto-reply |
-
-The agent reads stdout/JSON to learn the decision; it can also key off exit
-code for simpler bash-style dispatching.
-
-## Failure modes
-
-| Symptom | Cause | Fix |
-|---|---|---|
-| `decision: reject (frontmatter)` | Required field missing or malformed | Open the message; fix or surface to user. The sender should not have produced this file. |
-| `decision: reject (signature)` | `.asc` missing, GPG verify failed, or signer unknown | Check that `.asc` exists alongside `.org`. If yes, run `gpg --verify <msg>.asc <msg>` manually for diagnostic output. |
-| `decision: query (PROTOCOL_VERSION)` | Sender on older/newer protocol | Reply with a `query` asking sender to upgrade. Both sides should align before continuing. |
-| `decision: query (REQUIRES_TOOLS)` | Receiver lacks one of the named tools | Reply with a `query` naming the missing tools; sender should reframe to avoid. |
-| `decision: dedup` | Already-processed identical retry | No action. The script handled it correctly. |
-
-## HALT awareness
-
-Checks `~/.config/cross-agent-comms/HALT` at the start of every invocation. If
-HALT exists, exits with code 5 ("halt active; remove
-~/.config/cross-agent-comms/HALT to resume") without verifying, deduping, or
-returning a decision.
-
-**The inbound file is left in place** — not moved, not rejected, not
-deduped. When HALT clears and polling resumes, the file gets picked up via
-the normal cold-start handling (whichever surfaces first: watcher
-notification, startup workflow check, or the next agent poll). Reversibility
-is preserved.
-
-If the HALT file exists but is unreadable, fail-closed — treat as if HALT is
-set.
-
-See `cross-agent-halt.md` for the full halt mechanism.
-
-## Examples
-
-```bash
-# Basic invocation in an agent's polling loop
-for msg in inbox/from-agents/*.org; do
- decision=$(cross-agent-recv --json "$msg")
- case "$(echo "$decision" | jq -r '.decision')" in
- process) handle_message "$msg" ;;
- dedup) ;; # silent
- query) write_query_reply "$msg" "$decision" ;;
- reject) surface_to_user "$msg" "$decision" ;;
- esac
-done
-
-# Test signature verification only
-cross-agent-recv --no-dedup inbox/from-agents/test-msg.org
-
-# Test against a future protocol version
-cross-agent-recv --protocol-version 6 inbox/from-agents/future-msg.org
-```
-
-## Performance
-
-The script is fast (single SHA-256 compute, single GPG verify, frontmatter
-parse). For typical messages (single-digit KB), runs in well under 100ms.
-Dedup-scan is O(N) over files in the directory; if a project's
-`inbox/from-agents/` accumulates hundreds of files, archive released
-conversations to keep the scan fast.
-
-## See also
-
-- `cross-agent-send` — counterpart writer.
-- `cross-agent-watch` — fires when a new message arrives; agent then calls
- `cross-agent-recv` to process it.
-- `cross-agent-status` — pending-message snapshot (uses similar
- released-vs-unreleased logic, but doesn't process individual messages).
-- `cross-agent-comms.org` — protocol spec, the "what" the script implements.
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-resume b/.ai/scripts/cross-agent-comms/cross-agent-resume
deleted file mode 100755
index 1fb83bc..0000000
--- a/.ai/scripts/cross-agent-comms/cross-agent-resume
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-"""Resume cross-agent comms after a halt.
-
-See cross-agent-resume.md. Removes ~/.config/cross-agent-comms/HALT and
-restarts the cross-agent-watch systemd user service. With --tailnet,
-propagates the removal to every peer in peers.toml via SSH; reports
-per-peer status with non-zero exit on partial resume.
-
-Per the asymmetry rule: clearing HALT does NOT auto-resume agent polling.
-Each session must explicitly re-engage.
-"""
-
-from __future__ import annotations
-
-import argparse
-import subprocess
-import sys
-import tomllib
-from pathlib import Path
-
-CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms"
-HALT_FILE = CONFIG_DIR / "HALT"
-PEERS_TOML = CONFIG_DIR / "peers.toml"
-
-EXIT_OK = 0
-EXIT_PARTIAL = 1
-
-
-def err(msg: str) -> None:
- print(msg, file=sys.stderr)
-
-
-def remove_halt_file() -> bool:
- """Returns True if HALT was removed, False if it didn't exist."""
- if HALT_FILE.exists():
- try:
- HALT_FILE.unlink()
- return True
- except OSError as e:
- err(f"could not remove HALT: {e}")
- return False
- return False
-
-
-def start_watcher_service() -> None:
- """Best-effort start of the systemd watcher path unit."""
- try:
- subprocess.run(
- ["systemctl", "--user", "start", "cross-agent-watch.path"],
- capture_output=True, text=True, timeout=5,
- )
- except (FileNotFoundError, subprocess.TimeoutExpired):
- pass
-
-
-def load_peers() -> dict:
- if not PEERS_TOML.exists():
- return {}
- try:
- return tomllib.loads(PEERS_TOML.read_text())
- except (tomllib.TOMLDecodeError, OSError) as e:
- err(f"cannot parse peers.toml: {e}")
- return {}
-
-
-def ssh_remove_halt(host: str, ssh_user: str | None) -> tuple[bool, str]:
- target = f"{ssh_user}@{host}" if ssh_user else host
- remote_cmd = "rm -f ~/.config/cross-agent-comms/HALT"
- try:
- result = subprocess.run(
- ["ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes", target, remote_cmd],
- capture_output=True, text=True, timeout=10,
- )
- except (FileNotFoundError, subprocess.TimeoutExpired):
- return False, "ssh unavailable or timed out"
- if result.returncode == 0:
- return True, "HALT cleared"
- return False, (result.stderr.strip().splitlines() or [f"exit {result.returncode}"])[-1]
-
-
-def print_re_engage_instructions() -> None:
- print()
- print("Halt cleared. Watcher restarted.")
- print()
- print("Agent polling does NOT auto-resume — per the failsafe asymmetry rule,")
- print("agents stay paused until you explicitly re-engage each session.")
- print("Open the relevant Claude session and tell the agent to resume polling")
- print("for its conversation.")
-
-
-def main() -> int:
- parser = argparse.ArgumentParser(description="Resume cross-agent comms after a halt.")
- parser.add_argument("--tailnet", action="store_true",
- help="Propagate HALT removal to every peer in peers.toml")
- args = parser.parse_args()
-
- removed = remove_halt_file()
- start_watcher_service()
- if removed:
- print("Resuming locally ✓ (HALT cleared)")
- else:
- print("Resuming locally ✓ (no HALT was active)")
-
- if not args.tailnet:
- print_re_engage_instructions()
- return EXIT_OK
-
- peers = load_peers().get("peers", {})
- if not peers:
- print()
- print("No peers configured in peers.toml — local-only resume complete.")
- print_re_engage_instructions()
- return EXIT_OK
-
- print()
- successes = 1
- failures = []
- for name, cfg in sorted(peers.items()):
- host = cfg.get("host", name)
- ssh_user = cfg.get("ssh_user")
- ok, detail = ssh_remove_halt(host, ssh_user)
- marker = "✓" if ok else "✗"
- print(f"Resuming {host:<27} {marker} ({detail})")
- if ok:
- successes += 1
- else:
- failures.append(f"{name} ({host}): {detail}")
-
- print()
- total = len(peers) + 1
- if failures:
- print(f"PARTIAL RESUME: {successes}/{total} machines cleared.")
- for f in failures:
- print(f" - {f}")
- print("Resolve the failures or manually clear HALT on each machine.")
- print_re_engage_instructions()
- return EXIT_PARTIAL
-
- print(f"Resume complete across {total} machine(s).")
- print_re_engage_instructions()
- return EXIT_OK
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-resume.md b/.ai/scripts/cross-agent-comms/cross-agent-resume.md
deleted file mode 100644
index 8aa8357..0000000
--- a/.ai/scripts/cross-agent-comms/cross-agent-resume.md
+++ /dev/null
@@ -1,117 +0,0 @@
-# cross-agent-resume
-
-**Purpose.** Clear the HALT file and restart the watcher service. Counterpart
-to `cross-agent-halt`. Resuming agent polling is **explicit per-session** —
-this script doesn't auto-revive halted polling loops; you tell each session
-to re-engage.
-
-## Usage
-
-```
-cross-agent-resume [--tailnet]
-```
-
-### Flags
-
-| Flag | Default | Purpose |
-|---|---|---|
-| `--tailnet` | local only | Clear HALT on every peer in `peers.toml` via SSH over Tailscale. |
-
-## Behavior
-
-### Local resume (default)
-
-1. Remove the HALT file: `rm -f ~/.config/cross-agent-comms/HALT`. (Use `-f`
- so a missing file isn't an error — running resume when not halted is safe.)
-2. Restart the watcher service: `systemctl --user start cross-agent-watch.path`.
-3. Print a summary:
- ```
- ✓ HALT file removed
- ✓ Watcher service started (cross-agent-watch.path)
- - cross-agent-send and cross-agent-recv will accept new operations.
- - Inbound messages held during halt will be picked up by the watcher.
- - Agent polling does NOT auto-resume. To re-engage polling in a paused
- session, open that Claude session and tell the agent to resume.
- ```
-4. Exit 0.
-
-### Cross-tailnet resume (`--tailnet`)
-
-1. Apply local resume steps 1-2 first.
-2. Read `peers.toml` for the list of remote machines.
-3. For each peer, SSH:
- ```
- ssh <user>@<host> "rm -f ~/.config/cross-agent-comms/HALT && \
- systemctl --user start cross-agent-watch.path"
- ```
-4. Track per-peer success/failure:
- ```
- Resuming velox.local ✓ (HALT cleared, watcher started)
- Resuming bastion.local ✗ (ssh exit 255: no route to host)
- Resuming locally ✓
-
- PARTIAL RESUME: 2/3 machines resumed. bastion.local still halted.
- ```
-5. Exit 0 if all peers resumed; exit 1 on any failure.
-
-## Why agent polling doesn't auto-resume
-
-Two reasons the asymmetry is deliberate:
-
-1. *Auto-resume could silently invert intentional kills.* If you halted
- because a session was misbehaving, removing HALT shouldn't quietly revive
- that session's polling. You re-engage explicitly so you're aware of which
- sessions came back online.
-
-2. *You may want to inspect before resuming.* After a halt, you might want to
- read pending messages, fix configuration, or kill a particular Claude
- session entirely. Per-session resume forces that pause.
-
-## Re-engaging polling in a Claude session
-
-After `cross-agent-resume`, open the relevant Claude session and say something
-like:
-
-```
-HALT is cleared; resume polling.
-```
-
-The agent will check the HALT file (now absent), re-create its polling
-schedule, and continue the in-flight conversation from wherever it left off.
-The conversation file is intact; the receiver will pick up any new messages
-that arrived during the halt window.
-
-## Failure modes
-
-| Symptom | Cause | Fix |
-|---|---|---|
-| HALT file doesn't exist | Already resumed (or never halted) | OK — `-f` makes this a no-op. |
-| `systemctl --user start` fails | Watcher service not installed | Install per `cross-agent-watch.md`'s systemd recipe. |
-| `--tailnet` resumes some peers but not others | Same as halt: peer unreachable | Per-peer status reported; resolve manually for unreachable peers. |
-| Permission denied removing HALT file | File owned by another user | Check ownership; HALT files should be owned by the running user. |
-
-## Examples
-
-```bash
-# Local resume after a halt
-cross-agent-resume
-
-# Resume all tailnet peers + local
-cross-agent-resume --tailnet
-```
-
-## Recovery flow
-
-After a halt:
-
-1. Investigate whatever caused the halt (runaway loop, bad config, etc.).
-2. Fix the underlying issue.
-3. Run `cross-agent-resume`.
-4. Open each Claude session that was polling and tell its agent to re-engage.
-5. Confirm operation with `cross-agent-status`.
-
-## See also
-
-- `cross-agent-halt` — counterpart that creates the HALT file.
-- `cross-agent-status` — verify HALT cleared and see pending messages.
-- `cross-agent-comms.org` — protocol spec, `* Halt mechanism` section.
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-send b/.ai/scripts/cross-agent-comms/cross-agent-send
deleted file mode 100755
index 68c010a..0000000
--- a/.ai/scripts/cross-agent-comms/cross-agent-send
+++ /dev/null
@@ -1,356 +0,0 @@
-#!/usr/bin/env python3
-"""Cross-agent message sender.
-
-See cross-agent-send.md for the full contract. Briefly:
-
-- Destination as <machine>.<project>; resolved via peers.toml.
-- Same-machine: cp to receiver's inbox/from-agents/ with atomic rename.
-- Cross-machine: rsync over SSH (typically Tailscale) with retry+backoff.
-- GPG-signs by default; .asc renames before .org so receivers never see
- a .org without its sibling signature.
-- Generates the canonical filename; user's input filename is ignored.
-- Honors the HALT file: refuses to send and exits with code 5 when set.
-"""
-
-from __future__ import annotations
-
-import argparse
-import datetime as _dt
-import json
-import os
-import re
-import shutil
-import socket
-import subprocess
-import sys
-import tempfile
-import time
-import tomllib
-from pathlib import Path
-
-CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms"
-PEERS_TOML = CONFIG_DIR / "peers.toml"
-HALT_FILE = CONFIG_DIR / "HALT"
-STATE_DIR = Path.home() / ".local" / "state" / "cross-agent-comms"
-FAILED_SENDS_DIR = STATE_DIR / "failed-sends"
-
-EXIT_OK = 0
-EXIT_GENERAL = 1
-EXIT_DEST_NOT_FOUND = 2
-EXIT_CROSS_MACHINE_FAILED = 3
-EXIT_FRONTMATTER = 4
-EXIT_HALT = 5
-
-REQUIRED_FRONTMATTER = ["CONVERSATION_ID", "MESSAGE_TYPE", "SEQUENCE", "TIMESTAMP", "PROTOCOL_VERSION"]
-VALID_MESSAGE_TYPES = {"request", "progress", "query", "pushback", "complete", "release", "escalate"}
-
-
-def err(msg: str) -> None:
- print(msg, file=sys.stderr)
-
-
-def check_halt() -> None:
- """Exit with code 5 if HALT file exists."""
- if HALT_FILE.exists():
- try:
- reason = HALT_FILE.read_text().strip()
- except OSError:
- # Fail-closed on unreadable HALT.
- err("halt active (HALT file present but unreadable; treated as halted)")
- err(f"remove {HALT_FILE} to resume")
- sys.exit(EXIT_HALT)
- msg = "halt active"
- if reason:
- msg += f": {reason}"
- err(msg)
- err(f"remove {HALT_FILE} to resume")
- sys.exit(EXIT_HALT)
-
-
-def parse_frontmatter(path: Path) -> dict[str, str]:
- """Extract org-mode #+KEY: value frontmatter from the top of the file."""
- try:
- text = path.read_text()
- except OSError as e:
- err(f"cannot read message file: {e}")
- sys.exit(EXIT_GENERAL)
-
- frontmatter: dict[str, str] = {}
- for line in text.splitlines():
- line = line.rstrip()
- if not line:
- # Blank line ends the frontmatter block.
- if frontmatter:
- break
- continue
- m = re.match(r"#\+([A-Z_]+):\s*(.*)", line)
- if m:
- frontmatter[m.group(1)] = m.group(2).strip()
- else:
- # First non-frontmatter line ends parsing.
- if frontmatter:
- break
- return frontmatter
-
-
-def validate_frontmatter(fm: dict[str, str]) -> None:
- missing = [k for k in REQUIRED_FRONTMATTER if k not in fm]
- if missing:
- err(f"frontmatter missing required fields: {', '.join(missing)}")
- sys.exit(EXIT_FRONTMATTER)
- if fm["MESSAGE_TYPE"] not in VALID_MESSAGE_TYPES:
- err(f"invalid MESSAGE_TYPE: {fm['MESSAGE_TYPE']!r}; expected one of {sorted(VALID_MESSAGE_TYPES)}")
- sys.exit(EXIT_FRONTMATTER)
- try:
- int(fm["SEQUENCE"])
- except ValueError:
- err(f"SEQUENCE must be an integer; got {fm['SEQUENCE']!r}")
- sys.exit(EXIT_FRONTMATTER)
-
-
-def load_peers() -> dict:
- if not PEERS_TOML.exists():
- return {}
- try:
- return tomllib.loads(PEERS_TOML.read_text())
- except (tomllib.TOMLDecodeError, OSError) as e:
- err(f"cannot read {PEERS_TOML}: {e}")
- sys.exit(EXIT_GENERAL)
-
-
-def resolve_destination(dest: str, peers: dict) -> tuple[str, str, str | None, str | None]:
- """Resolve <machine>.<project> to (machine, project, host, ssh_user).
-
- host is None for same-machine destinations.
- """
- if "." not in dest:
- err(f"destination must be <machine>.<project>; got {dest!r}")
- sys.exit(EXIT_DEST_NOT_FOUND)
- machine, project = dest.split(".", 1)
-
- local_hostname = socket.gethostname().split(".")[0]
- is_local = machine == local_hostname or machine == "local"
-
- host = None
- ssh_user = None
- if not is_local:
- peer_cfg = peers.get("peers", {}).get(machine)
- if peer_cfg is None:
- available = list(peers.get("peers", {}).keys())
- err(f"destination not found in peers.toml; available peers: {available or '(none)'}")
- sys.exit(EXIT_DEST_NOT_FOUND)
- host = peer_cfg.get("host", machine)
- ssh_user = peer_cfg.get("ssh_user", os.environ.get("USER"))
-
- return machine, project, host, ssh_user
-
-
-def resolve_inbox_path(project: str, peers: dict) -> str:
- """Inbox path on the receiver. Defaults to ~/projects/<project>/inbox/from-agents."""
- proj_cfg = peers.get("projects", {}).get(project)
- if proj_cfg and "inbox_path" in proj_cfg:
- return os.path.expanduser(proj_cfg["inbox_path"])
- return f"~/projects/{project}/inbox/from-agents"
-
-
-def derive_sender_project() -> str:
- """Walk up from CWD looking for ~/projects/<name>/.
-
- Returns the project name if found; falls back to the basename of CWD.
- """
- cwd = Path.cwd().resolve()
- projects_root = (Path.home() / "projects").resolve()
- try:
- rel = cwd.relative_to(projects_root)
- return rel.parts[0]
- except ValueError:
- return cwd.name
-
-
-def generate_canonical_filename(sender: str, conv_id: str) -> str:
- """YYYYMMDDTHHMMSSZ-from-<sender>-<conv-id>.org"""
- now = _dt.datetime.now(_dt.timezone.utc)
- timestamp = now.strftime("%Y%m%dT%H%M%SZ")
- return f"{timestamp}-from-{sender}-{conv_id}.org"
-
-
-def sign(message_path: Path, sig_path: Path, key: str | None) -> None:
- """gpg --detach-sign --armor --output <sig> [--local-user <key>] <message>"""
- cmd = ["gpg", "--detach-sign", "--armor", "--yes", "--output", str(sig_path)]
- if key:
- cmd.extend(["--local-user", key])
- cmd.append(str(message_path))
- try:
- result = subprocess.run(cmd, capture_output=True, text=True)
- except FileNotFoundError:
- err("gpg not found; install gnupg or use --no-sign for testing")
- sys.exit(EXIT_GENERAL)
- if result.returncode != 0:
- err(f"signing failed: {result.stderr.strip()}")
- sys.exit(EXIT_GENERAL)
-
-
-def same_machine_deliver(message_path: Path, sig_path: Path | None, target_dir: Path, canonical_name: str) -> None:
- """Atomic-write delivery: stage .asc, mv to final, then stage .org, mv to final."""
- target_dir.mkdir(parents=True, exist_ok=True)
- final_msg = target_dir / canonical_name
- final_sig = target_dir / f"{canonical_name}.asc"
-
- if sig_path is not None:
- # Stage .asc first, mv to final, THEN stage .org and mv to final.
- with tempfile.NamedTemporaryFile(
- mode="wb", dir=target_dir, prefix=f".tmp.{canonical_name}.asc.", delete=False
- ) as tmp:
- tmp.write(sig_path.read_bytes())
- tmp_sig_path = Path(tmp.name)
- os.replace(tmp_sig_path, final_sig)
-
- # Re-check HALT between .asc and .org per the layered-checks rule.
- check_halt()
-
- with tempfile.NamedTemporaryFile(
- mode="wb", dir=target_dir, prefix=f".tmp.{canonical_name}.", delete=False
- ) as tmp:
- tmp.write(message_path.read_bytes())
- tmp_msg_path = Path(tmp.name)
- os.replace(tmp_msg_path, final_msg)
-
-
-def cross_machine_deliver(
- message_path: Path,
- sig_path: Path | None,
- canonical_name: str,
- host: str,
- ssh_user: str,
- inbox_path: str,
- retries: int,
-) -> bool:
- """rsync push the .asc first (if signed), re-check HALT, then push the .org.
-
- Returns True on success, False on persistent failure (after retries).
- """
- # Stage local copies with the canonical name so rsync sets the right
- # destination filename.
- with tempfile.TemporaryDirectory(prefix="cross-agent-send-") as staging:
- staging_dir = Path(staging)
- local_msg = staging_dir / canonical_name
- local_msg.write_bytes(message_path.read_bytes())
- local_sig = None
- if sig_path is not None:
- local_sig = staging_dir / f"{canonical_name}.asc"
- local_sig.write_bytes(sig_path.read_bytes())
-
- backoffs = [5, 30, 120]
- # Step 1: push .asc first if signed.
- if local_sig is not None:
- if not _rsync_with_retries(local_sig, host, ssh_user, inbox_path, retries, backoffs):
- return False
-
- # Re-check HALT between .asc and .org per the layered-checks rule.
- check_halt()
-
- # Step 2: push .org.
- if not _rsync_with_retries(local_msg, host, ssh_user, inbox_path, retries, backoffs):
- return False
-
- return True
-
-
-def _rsync_with_retries(
- src: Path, host: str, ssh_user: str, inbox_path: str, retries: int, backoffs: list[int]
-) -> bool:
- target = f"{ssh_user}@{host}:{inbox_path}/"
- last_err = ""
- for attempt in range(retries + 1):
- if attempt > 0:
- check_halt()
- wait = backoffs[min(attempt - 1, len(backoffs) - 1)]
- err(f"rsync attempt {attempt} failed: {last_err}; retrying in {wait}s")
- time.sleep(wait)
- try:
- result = subprocess.run(
- ["rsync", "-a", str(src), target],
- capture_output=True,
- text=True,
- )
- except FileNotFoundError:
- err("rsync not found; install rsync")
- return False
- if result.returncode == 0:
- return True
- last_err = result.stderr.strip() or f"exit {result.returncode}"
- err(f"rsync failed after {retries + 1} attempts: {last_err}")
- return False
-
-
-def write_failed_send_marker(dest: str, message_path: Path, error: str, retry_log: list[str]) -> None:
- FAILED_SENDS_DIR.mkdir(parents=True, exist_ok=True)
- timestamp = _dt.datetime.now(_dt.timezone.utc).strftime("%Y%m%dT%H%M%SZ")
- safe_basename = re.sub(r"[^A-Za-z0-9._-]", "_", message_path.name)
- marker = FAILED_SENDS_DIR / f"{timestamp}-{dest.replace('.', '-')}-{safe_basename}.json"
- marker.write_text(json.dumps(
- {
- "timestamp": timestamp,
- "destination": dest,
- "message_path": str(message_path),
- "error": error,
- "retry_log": retry_log,
- },
- indent=2,
- ))
- err(f"marker written: {marker}")
-
-
-def main() -> int:
- parser = argparse.ArgumentParser(description="Send a cross-agent message.")
- parser.add_argument("destination", help="Destination as <machine>.<project>")
- parser.add_argument("message_file", type=Path, help="Path to the message body file")
- parser.add_argument("--no-sign", action="store_true", help="Skip GPG signing (testing only)")
- parser.add_argument("--retries", type=int, default=3, help="Retry count for cross-machine sends")
- parser.add_argument("--key", help="GPG key id to sign with (default: user's primary)")
- args = parser.parse_args()
-
- check_halt()
-
- if not args.message_file.is_file():
- err(f"message file not found: {args.message_file}")
- return EXIT_GENERAL
-
- fm = parse_frontmatter(args.message_file)
- validate_frontmatter(fm)
-
- peers = load_peers()
- machine, project, host, ssh_user = resolve_destination(args.destination, peers)
- inbox_path = resolve_inbox_path(project, peers)
-
- sender = derive_sender_project()
- canonical_name = generate_canonical_filename(sender, fm["CONVERSATION_ID"])
-
- sig_tmp = None
- if not args.no_sign:
- sig_tmp = args.message_file.with_suffix(args.message_file.suffix + ".asc.tmp")
- sign(args.message_file, sig_tmp, args.key)
-
- try:
- if host is None:
- # Same-machine delivery.
- target_dir = Path(os.path.expanduser(inbox_path))
- same_machine_deliver(args.message_file, sig_tmp, target_dir, canonical_name)
- print(f"sent: {target_dir}/{canonical_name}")
- return EXIT_OK
- else:
- ok = cross_machine_deliver(
- args.message_file, sig_tmp, canonical_name, host, ssh_user, inbox_path, args.retries
- )
- if ok:
- print(f"sent: {ssh_user}@{host}:{inbox_path}/{canonical_name}")
- return EXIT_OK
- write_failed_send_marker(args.destination, args.message_file, "rsync failed after retries", [])
- return EXIT_CROSS_MACHINE_FAILED
- finally:
- if sig_tmp is not None and sig_tmp.exists():
- sig_tmp.unlink()
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-send.md b/.ai/scripts/cross-agent-comms/cross-agent-send.md
deleted file mode 100644
index 29bfb24..0000000
--- a/.ai/scripts/cross-agent-comms/cross-agent-send.md
+++ /dev/null
@@ -1,199 +0,0 @@
-# cross-agent-send
-
-**Purpose.** Send a cross-agent message file to a specific destination. Handles
-peer-config lookup, GPG signing, atomic write (same-machine) or rsync push
-(cross-machine), retry-with-backoff, and failure surfacing.
-
-This is the canonical writer. The protocol spec defers all writer mechanics to
-this script.
-
-## Usage
-
-```
-cross-agent-send <destination> <message-file> [--no-sign] [--retries N]
-```
-
-### Positional arguments
-
-| Position | Meaning | Example |
-|---|---|---|
-| 1 | Destination as `<machine>.<project>` | `homelab.career`, `velox.career` |
-| 2 | Message file (already-formatted `.org`) | `/tmp/my-message.org` |
-
-### Flags
-
-| Flag | Default | Purpose |
-|---|---|---|
-| `--no-sign` | (signing on) | Skip GPG signing. Use only for testing; receivers reject unsigned messages by default. |
-| `--retries N` | 3 | Override retry count for cross-machine sends. |
-| `--key <key-id>` | (user's primary key) | GPG key to sign with. Resolution order: `--key` flag, `GPG_USER` env, `git config user.signingkey`, then the first secret key in the keyring. |
-
-## Behavior
-
-### Filename generation (script-controlled)
-
-The script generates the canonical destination filename from the message's
-frontmatter and sender context. The user's input filename is ignored — pass any
-path, the script names the destination correctly:
-
-```
-<UTC-now>T<HHMMSS>Z-from-<sender-slug>-<short-conv-id>.org
-```
-
-`<sender-slug>` comes from the sender machine's project name (config or
-hostname-based). `<short-conv-id>` is read from the message's
-`#+CONVERSATION_ID` frontmatter field. UTC timestamp is generated at send time.
-
-The script also performs the **sender-side max-seen scan** before writing: it
-reads the receiver's `from-agents/` directory, finds the highest existing
-sequence in this conversation across both sender prefixes, and (best-effort)
-suggests `max(seen) + 1` for the next sequence. The user/agent is responsible
-for setting `#+SEQUENCE` in the message body; the script only advises.
-
-### Same-machine destinations
-
-Resolved when the destination's machine matches the current hostname (or is
-not in `peers.toml` as a remote). Steps:
-
-1. Parse frontmatter; extract `CONVERSATION_ID` and `TIMESTAMP`. Validate per
- the *Validation before send* section below.
-2. Generate canonical filename per *Filename generation* above.
-3. Sign: `gpg --detach-sign --armor --output <canonical>.asc --local-user <key> <input>`.
-4. Compute target: read `peers.toml` for the project's `inbox_path`. If
- missing, fall back to `~/projects/<project>/inbox/from-agents/`.
-5. **Atomic write with strict ordering** (signature must precede message):
- - Stage `.asc`: write to `<target>/.tmp.XXXXXX-<canonical>.asc`,
- then `mv` to `<target>/<canonical>.asc`.
- - **Then** stage `.org`: write to `<target>/.tmp.XXXXXX-<canonical>`,
- then `mv` to `<target>/<canonical>`.
- - Receivers only act on `.org` files; staging the `.asc` first guarantees
- the signature is present when the receiver opens the message. Out-of-order
- would race: receiver could read the `.org` before the `.asc` lands and
- fail GPG verify even though the sender did everything right.
-6. Exit 0 on success. Exit non-zero if any step fails.
-
-### Cross-machine destinations
-
-Steps:
-
-1. Parse + generate canonical filename, as same-machine steps 1-2.
-2. Sign locally to `<input>.asc` (or a tmp staging file).
-3. rsync push **with the same .asc-first ordering**:
- - `rsync -a <input>.asc <ssh-user>@<host>:<inbox_path>/<canonical>.asc`
- - **Then** `rsync -a <input> <ssh-user>@<host>:<inbox_path>/<canonical>`
- rsync writes to a hidden temp file then renames atomically by default
- (`--inplace` would defeat this; do not pass it).
-4. Retry on failure: 5s, 30s, 120s backoff, then surface error.
-5. On persistent failure: write a marker file to
- `~/.local/state/cross-agent-comms/failed-sends/<timestamp>-<dest>-<canonical>.json`
- containing the destination, message path, error, and retry log. Exit non-zero.
-
-### Validation before send
-
-- Destination resolves via `peers.toml` (or local fallback). If neither, exit
- immediately with `destination not found in peers.toml; available: <list>`.
-- Message file must be readable, non-empty, and have valid org-mode frontmatter
- with **all** of the following required fields:
- - `#+TITLE`
- - `#+CONVERSATION_ID`
- - `#+MESSAGE_TYPE`
- - `#+SEQUENCE`
- - `#+TIMESTAMP`
- - `#+PROTOCOL_VERSION` (must equal `5` for v5)
-
- If any required field is missing or malformed, exit immediately with a parse
- error naming the offending field.
-
-- Optional fields the script recognizes and passes through (no special
- handling beyond preservation):
- - `#+REQUIRES_TOOLS` — comma-separated tool/MCP slugs the receiver needs.
- - `#+RELEASE_STATUS` — valid only on `MESSAGE_TYPE: release`. Values per
- spec: `complete`, `cancelled`, `withdrawn-after-pushback`,
- `abandoned-after-escalation`.
- - `#+WORKFLOW_VERSION` — sender's version of the cross-agent-comms workflow
- file. Currently advisory; receiver may warn on mismatch but does not block.
-
-## Configuration
-
-Reads `~/.config/cross-agent-comms/peers.toml` for peer routing:
-
-```toml
-[peers.velox]
-host = "velox.local"
-ssh_user = "cjennings"
-
-# Optional: per-project inbox-path overrides for non-default layouts.
-[projects.work]
-inbox_path = "~/projects/work/inbox/from-agents"
-
-[projects.homelab]
-inbox_path = "~/projects/homelab/inbox/from-agents"
-```
-
-If a project entry is omitted, defaults to `~/projects/<project>/inbox/from-agents`.
-
-## Failure modes
-
-| Symptom | Cause | Fix |
-|---|---|---|
-| `destination not found in peers.toml` | Misspelled destination, or peer not configured | Run `cross-agent-discover` to see available destinations. |
-| `signing failed: no secret key` | GPG key missing or not in keyring | `gpg --list-secret-keys` to confirm. Override with `--key <id>`. |
-| `signing failed: pinentry timed out` | Headless session, GUI pinentry unavailable | Confirm `pinentry-program` in `gpg-agent.conf` matches available pinentry. Per protocols.org, GUI pinentry works from Claude Code. |
-| `rsync exit 255` | SSH unreachable | `cross-agent-discover --peer <name>` to confirm reachability. |
-| `rsync exit 23` | Permission denied at destination | Check destination directory perms (`chmod 700`) and ownership. |
-| Marker file written to `failed-sends/` | Persistent cross-machine failure | Inspect the marker's `error` field. After fixing, retry: `cross-agent-send <dest> <msg>` (the marker is for visibility; it does not auto-retry). |
-| Receiver complains "unsigned message" | `--no-sign` was used in production | Don't use `--no-sign` outside testing. |
-
-## HALT awareness
-
-Checks `~/.config/cross-agent-comms/HALT` at the start of every send AND
-between the `.asc` and `.org` rsync calls AND between each retry iteration.
-On HALT exists, exits with code 5 ("halt active; remove
-~/.config/cross-agent-comms/HALT to resume") without writing or pushing
-further.
-
-Worst case: one in-flight send completes its current rsync step within a few
-seconds before halt kicks in for the next step. New sends are blocked
-immediately. No `pkill` needed — the per-iteration check stops things
-naturally.
-
-If the HALT file exists but is unreadable (permissions wrong), fail-closed —
-treat as if HALT is set. Safer than fail-open.
-
-See `cross-agent-halt.md` for the full halt mechanism.
-
-## Examples
-
-```bash
-# Same-machine send
-cross-agent-send homelab.career /tmp/my-message.org
-
-# Cross-machine send via Tailscale
-cross-agent-send velox.career /tmp/my-message.org
-
-# Test send without signing (receiver will reject)
-cross-agent-send homelab.career /tmp/test.org --no-sign
-
-# Override retry count for a flaky link
-cross-agent-send velox.career /tmp/my-message.org --retries 10
-
-# After a delivery failure, inspect the marker
-cat ~/.local/state/cross-agent-comms/failed-sends/*.json | jq .
-```
-
-## Exit codes
-
-| Code | Meaning |
-|---|---|
-| 0 | Sent successfully. |
-| 1 | General error (parse failure, signing failure, etc.). |
-| 2 | Destination not found in peers.toml. |
-| 3 | Cross-machine delivery failed after retries. Marker file written. |
-| 4 | Frontmatter validation failed. |
-
-## See also
-
-- `cross-agent-discover` — validate destinations before sending.
-- `cross-agent-watch` — receiver-side notification.
-- `cross-agent-status` — see what's queued.
-- `cross-agent-comms.org` — protocol spec, the "what" the script implements.
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-status b/.ai/scripts/cross-agent-comms/cross-agent-status
deleted file mode 100755
index 4eee75b..0000000
--- a/.ai/scripts/cross-agent-comms/cross-agent-status
+++ /dev/null
@@ -1,185 +0,0 @@
-#!/usr/bin/env python3
-"""Point-in-time snapshot of pending cross-agent messages across local projects.
-
-See cross-agent-status.md. Pending = messages in inbox/from-agents/ whose
-CONVERSATION_ID has no MESSAGE_TYPE: release at a later #+TIMESTAMP.
-
-HALT: prints a prominent banner before normal output, but continues to enumerate.
-"""
-
-from __future__ import annotations
-
-import argparse
-import glob
-import json
-import os
-import re
-import sys
-from pathlib import Path
-
-CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms"
-HALT_FILE = CONFIG_DIR / "HALT"
-DEFAULT_GLOB = str(Path.home() / "projects" / "*" / "inbox" / "from-agents") + "/"
-
-
-def parse_frontmatter(path: Path) -> dict[str, str]:
- try:
- text = path.read_text()
- except OSError:
- return {}
- fm: dict[str, str] = {}
- for line in text.splitlines():
- line = line.rstrip()
- if not line:
- if fm:
- break
- continue
- m = re.match(r"#\+([A-Z_]+):\s*(.*)", line)
- if m:
- fm[m.group(1)] = m.group(2).strip()
- elif fm:
- break
- return fm
-
-
-def project_name_from_path(path: str) -> str:
- """Walk up from path to find ~/projects/<name>/..."""
- home = str(Path.home())
- parts = Path(path).parts
- for i, part in enumerate(parts):
- if part == "projects" and i + 1 < len(parts) and str(Path(*parts[: i + 1])) == os.path.join(home, "projects"):
- return parts[i + 1]
- # Fallback: dir three levels up from the .org file (project/inbox/from-agents/file.org)
- return Path(path).parent.parent.parent.name
-
-
-def scan_project(inbox_dir: Path) -> tuple[int, str | None, int | None]:
- """Return (pending_count, most_recent_filename_or_None, most_recent_age_seconds_or_None)."""
- if not inbox_dir.is_dir():
- return 0, None, None
-
- # Group .org files by CONVERSATION_ID, also collect release timestamps per conv.
- org_files = sorted(inbox_dir.glob("*.org"))
- if not org_files:
- return 0, None, None
-
- by_conv: dict[str, list[tuple[str, str, Path]]] = {} # conv_id -> [(timestamp, msg_type, path)]
- for f in org_files:
- fm = parse_frontmatter(f)
- conv = fm.get("CONVERSATION_ID")
- ts = fm.get("TIMESTAMP")
- mt = fm.get("MESSAGE_TYPE")
- if not conv or not ts or not mt:
- # Malformed file: count as pending under conv "_unparseable".
- by_conv.setdefault("_unparseable", []).append(("", "request", f))
- continue
- by_conv.setdefault(conv, []).append((ts, mt, f))
-
- pending_files: list[Path] = []
- for conv, entries in by_conv.items():
- entries.sort(key=lambda e: e[0])
- # Find the latest release timestamp.
- release_ts = None
- for ts, mt, _f in entries:
- if mt == "release" and (release_ts is None or ts > release_ts):
- release_ts = ts
- for ts, mt, f in entries:
- if mt == "release":
- continue
- if release_ts is not None and ts <= release_ts:
- continue
- pending_files.append(f)
-
- if not pending_files:
- return 0, None, None
-
- # Most-recent by mtime (proxy for arrival order).
- most_recent = max(pending_files, key=lambda p: p.stat().st_mtime)
- import time
- age = int(time.time() - most_recent.stat().st_mtime)
- return len(pending_files), most_recent.name, age
-
-
-def fmt_age(seconds: int | None) -> str:
- if seconds is None:
- return "—"
- if seconds < 60:
- return f"{seconds}s ago"
- if seconds < 3600:
- return f"{seconds // 60} min ago"
- if seconds < 86400:
- return f"{seconds // 3600} hr ago"
- return f"{seconds // 86400} day(s) ago"
-
-
-def render_banner_if_halt() -> None:
- if not HALT_FILE.exists():
- return
- try:
- reason = HALT_FILE.read_text().strip()
- except OSError:
- reason = "(HALT file unreadable; treated as halted)"
- print("⚠ HALT ACTIVE — cross-agent comms paused")
- if reason:
- print(f" reason: {reason}")
- print(f" clear: rm {HALT_FILE} (or: cross-agent-resume)")
- print()
-
-
-def main() -> int:
- parser = argparse.ArgumentParser(description="Snapshot of pending cross-agent messages across local projects.")
- parser.add_argument("--json", action="store_true", help="Emit JSON output")
- parser.add_argument("--projects-glob", default=DEFAULT_GLOB,
- help=f"Glob for project from-agents dirs (default: {DEFAULT_GLOB})")
- args = parser.parse_args()
-
- render_banner_if_halt()
-
- matched = sorted(glob.glob(args.projects_glob))
- rows = []
- for path in matched:
- inbox = Path(path)
- if not inbox.is_dir():
- continue
- proj = project_name_from_path(path)
- count, most_recent, age = scan_project(inbox)
- rows.append({
- "name": proj,
- "pending_count": count,
- "most_recent": (
- {"filename": most_recent, "age_seconds": age}
- if most_recent else None
- ),
- })
-
- # Sort: pending-first, then alphabetical by name.
- rows.sort(key=lambda r: (-r["pending_count"], r["name"]))
-
- if args.json:
- import datetime as _dt
- payload = {
- "scanned_at": _dt.datetime.now(_dt.timezone.utc).isoformat(),
- "halt_active": HALT_FILE.exists(),
- "projects": rows,
- }
- print(json.dumps(payload, indent=2))
- return 0
-
- if not rows:
- print("No projects with inbox/from-agents/ found — 0 pending.")
- return 0
-
- # Human-readable table.
- name_w = max(len("project"), max(len(r["name"]) for r in rows))
- print(f"{'project':<{name_w}} pending most-recent")
- for r in rows:
- most_recent_str = "—"
- if r["most_recent"]:
- most_recent_str = f"{r['most_recent']['filename']} ({fmt_age(r['most_recent']['age_seconds'])})"
- print(f"{r['name']:<{name_w}} {r['pending_count']:<7} {most_recent_str}")
-
- return 0
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-status.md b/.ai/scripts/cross-agent-comms/cross-agent-status.md
deleted file mode 100644
index 070330c..0000000
--- a/.ai/scripts/cross-agent-comms/cross-agent-status.md
+++ /dev/null
@@ -1,139 +0,0 @@
-# cross-agent-status
-
-**Purpose.** Point-in-time snapshot of pending cross-agent messages across
-every project on this machine. Run from any terminal. No daemon required.
-
-This is the user-pull layer of the cold-start story — `cross-agent-watch`
-pushes notifications, `cross-agent-status` lets the user query.
-
-## Usage
-
-```
-cross-agent-status [--json] [--projects-glob <glob>]
-```
-
-No args required.
-
-### Flags
-
-| Flag | Default | Purpose |
-|---|---|---|
-| `--json` | off (table) | Output as JSON for scripting. |
-| `--projects-glob <glob>` | `~/projects/*/inbox/from-agents/` | Override which directories to scan. |
-
-## Output
-
-### Default (table)
-
-```
-$ cross-agent-status
-project pending most-recent
-career 0 —
-claude-templates 0 —
-clipper 0 —
-homelab 1 20260427T085611Z-from-career-question.org (3 min ago)
-finances 0 —
-... (other 9 projects)
-```
-
-Sort: pending-first, then alphabetical.
-
-### `--json`
-
-```json
-{
- "scanned_at": "2026-04-27T04:13:00-05:00",
- "projects": [
- {
- "name": "homelab",
- "pending_count": 1,
- "most_recent": {
- "filename": "20260427T085611Z-from-career-question.org",
- "age_seconds": 180
- }
- },
- ...
- ]
-}
-```
-
-## Pending semantics
-
-A message is "pending" if it sits in `inbox/from-agents/` AND no
-`MESSAGE_TYPE: release` exists for the same `CONVERSATION_ID` after it.
-
-Concretely:
-
-1. Scan each project's `inbox/from-agents/` for `.org` files.
-2. Group by `CONVERSATION_ID` from frontmatter.
-3. For each conversation, find the highest-`#+TIMESTAMP` message with
- `MESSAGE_TYPE: release`.
-4. Messages with `#+TIMESTAMP` after that release (or in conversations with no
- release) count as pending.
-
-Files without parseable frontmatter are counted as pending and noted in the
-output (single warning row per project).
-
-## Failure modes
-
-| Symptom | Likely cause | Fix |
-|---|---|---|
-| Project missing from output | Project's `.ai/` directory exists but `inbox/from-agents/` does not | Created lazily on first cross-agent message; `mkdir -p` to surface in output. |
-| All projects show "0 pending" but you know one has messages | Glob misresolved, OR all messages are post-release | `cross-agent-status --projects-glob` with explicit path to confirm. |
-| Warning row "N files unparseable in <project>" | Message file has invalid frontmatter | Open the file, fix or move out. |
-
-## Performance
-
-Scans every `.org` file in every watched directory. For Craig's setup (14
-projects, single-digit messages each), runs in <100ms. If a project
-accumulates hundreds of post-release messages, archive them per the persistence
-guidance in the protocol spec.
-
-## HALT awareness
-
-Checks `~/.config/cross-agent-comms/HALT` at start. If HALT exists, prints a
-prominent banner before normal output:
-
-```
-$ cross-agent-status
-⚠ HALT ACTIVE — cross-agent comms paused
- Reason: investigating runaway poll loop, 2026-04-27
- HALT file: ~/.config/cross-agent-comms/HALT
- Resume with: cross-agent-resume
-
-(snapshot continues normally — HALT does not suppress visibility)
-
-project pending most-recent
-career 0 —
-homelab 1 20260427T085611Z-from-career-question.org (3 min ago)
-...
-```
-
-Status is read-only, so it always runs. The banner ensures the user can't
-miss that halt is active when checking inbox state. Reason text comes from
-the HALT file's body; if empty, omit the reason line.
-
-If the HALT file exists but is unreadable, print a warning banner ("HALT
-file present but unreadable; treat as halted") and continue with normal
-output.
-
-See `cross-agent-halt.md` for the full halt mechanism.
-
-## Examples
-
-```bash
-# Snapshot
-cross-agent-status
-
-# JSON for piping
-cross-agent-status --json | jq '.projects[] | select(.pending_count > 0)'
-
-# Single-project query
-cross-agent-status --projects-glob ~/projects/work/inbox/from-agents/
-```
-
-## See also
-
-- `cross-agent-watch` — push notifications on new arrivals.
-- `cross-agent-discover` — enumerate available agents (cross-machine).
-- `cross-agent-comms.org` — protocol spec.
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-watch b/.ai/scripts/cross-agent-comms/cross-agent-watch
deleted file mode 100755
index f50ba26..0000000
--- a/.ai/scripts/cross-agent-comms/cross-agent-watch
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/usr/bin/env bash
-# cross-agent-watch — desktop-notify on new cross-agent messages.
-#
-# See cross-agent-watch.md. Watches every ~/projects/*/inbox/from-agents/ by
-# default. inotifywait fires create + moved_to events; .tmp.* files are
-# filtered out. HALT suppresses notifications but the watcher keeps running
-# and logs each event with "(suppressed by HALT)".
-
-set -uo pipefail
-
-# Defaults.
-PROJECTS_GLOB="${HOME}/projects/*/inbox/from-agents/"
-LOG_FILE="${HOME}/.local/state/cross-agent-comms/watch.log"
-HALT_FILE="${HOME}/.config/cross-agent-comms/HALT"
-QUIET=0
-NO_NOTIFY=0
-
-# Arg parsing.
-while [[ $# -gt 0 ]]; do
- case "$1" in
- --projects-glob)
- PROJECTS_GLOB="$2"; shift 2 ;;
- --log)
- LOG_FILE="$2"; shift 2 ;;
- --quiet)
- QUIET=1; shift ;;
- --no-notify)
- NO_NOTIFY=1; shift ;;
- -h|--help)
- cat <<EOF
-Usage: cross-agent-watch [--projects-glob GLOB] [--log PATH] [--quiet] [--no-notify]
-
-Watches inbox/from-agents/ directories for new cross-agent messages and fires
-desktop notifications. See cross-agent-watch.md for details.
-EOF
- exit 0 ;;
- *)
- echo "unknown flag: $1" >&2; exit 1 ;;
- esac
-done
-
-# Resolve glob to a concrete list of directories.
-# shellcheck disable=SC2086
-DIRS=( $PROJECTS_GLOB )
-# Filter out non-existent paths (glob may include literal pattern when no match).
-EXISTING=()
-for d in "${DIRS[@]}"; do
- if [[ -d "$d" ]]; then
- EXISTING+=( "$d" )
- fi
-done
-
-if [[ ${#EXISTING[@]} -eq 0 ]]; then
- echo "cross-agent-watch: glob resolved 0 directories: $PROJECTS_GLOB" >&2
- exit 1
-fi
-
-# Ensure log dir exists.
-mkdir -p "$(dirname "$LOG_FILE")"
-
-[[ $QUIET -eq 0 ]] && echo "cross-agent-watch: watching ${#EXISTING[@]} dir(s); log: $LOG_FILE"
-
-# Helper: project name from path like /home/.../projects/<name>/inbox/from-agents/...
-project_name() {
- local path="$1"
- # Match ~/projects/<name>/...
- if [[ "$path" =~ ${HOME}/projects/([^/]+)/ ]]; then
- echo "${BASH_REMATCH[1]}"
- else
- basename "$(dirname "$(dirname "$path")")"
- fi
-}
-
-# Main loop. inotifywait emits one line per event in the format
-# "<full-path>" because we passed --format '%w%f'.
-inotifywait -m -e create,moved_to --format '%w%f' "${EXISTING[@]}" 2>/dev/null \
- | while IFS= read -r path; do
- filename="$(basename "$path")"
-
- # Filter .tmp.* staging files.
- case "$filename" in
- .tmp.*) continue ;;
- esac
-
- # Filter .asc sidecars — they land first per the atomic-write ordering;
- # the .org event will fire after.
- case "$filename" in
- *.asc) continue ;;
- esac
-
- proj="$(project_name "$path")"
- iso="$(date -u "+%Y-%m-%dT%H:%M:%SZ")"
-
- if [[ -e "$HALT_FILE" ]]; then
- printf '%s\t%s\t%s\t(suppressed by HALT)\n' "$iso" "$proj" "$filename" >> "$LOG_FILE"
- [[ $QUIET -eq 0 ]] && echo "[$iso] $proj: $filename (suppressed by HALT)"
- continue
- fi
-
- printf '%s\t%s\t%s\n' "$iso" "$proj" "$filename" >> "$LOG_FILE"
- [[ $QUIET -eq 0 ]] && echo "[$iso] $proj: $filename"
-
- if [[ $NO_NOTIFY -eq 0 ]]; then
- notify info "Cross-agent message" "${proj}: ${filename}" --persist 2>/dev/null || true
- fi
- done
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-watch.md b/.ai/scripts/cross-agent-comms/cross-agent-watch.md
deleted file mode 100644
index 04e8005..0000000
--- a/.ai/scripts/cross-agent-comms/cross-agent-watch.md
+++ /dev/null
@@ -1,130 +0,0 @@
-# cross-agent-watch
-
-**Purpose.** Long-running watcher that fires desktop notifications when new
-cross-agent messages land in any project's `inbox/from-agents/` directory.
-This is the primary cold-start mechanism: messages get noticed even when no
-Claude session is active.
-
-## Usage
-
-```
-cross-agent-watch [--projects-glob <glob>] [--log <path>]
-```
-
-No args required. Defaults:
-
-- Watches `~/projects/*/inbox/from-agents/` (matches every project with the
- cross-agent-comms convention).
-- Logs each event to `~/.local/state/cross-agent-comms/watch.log`.
-
-### Flags
-
-| Flag | Default | Purpose |
-|---|---|---|
-| `--projects-glob <glob>` | `~/projects/*/inbox/from-agents/` | Override which directories to watch. Useful for testing on a single project. |
-| `--log <path>` | `~/.local/state/cross-agent-comms/watch.log` | Override log location. Set to `/dev/null` to disable logging. |
-| `--quiet` | off | Suppress stdout output. Notifications still fire. |
-| `--no-notify` | off | Skip `notify` calls. Useful for testing the watcher loop without spamming notifications. |
-
-## Behavior
-
-1. Resolves the projects-glob to a concrete list of directories at startup.
- New projects added to `~/projects/` after startup are NOT picked up — restart
- the watcher to re-resolve.
-2. Runs `inotifywait -m -e create,moved_to --format '%w%f'` against each
- watched directory.
-3. For each event, calls
- `notify info "Cross-agent message" "<project>: <filename>" --persist`. The
- `--persist` flag keeps the page on screen until dismissed, so an inbound
- message that arrives while Craig is away from the desk isn't missed.
-4. Appends an event line to the log:
- `<ISO-8601-timestamp>\t<project>\t<filename>`.
-
-## Event filtering
-
-- Watches `create` AND `moved_to` events. The `moved_to` part is critical for
- the atomic-write convention (`mktemp` + `mv` produces a `moved_to`, not a
- `create`).
-- Files starting with `.tmp.` are ignored — they're staging files from
- in-progress writes that should never produce a notification.
-
-## Installation
-
-### Option A — tmux pane (personal, easy)
-
-Run in a tmux pane that survives session disconnects:
-
-```
-tmux new -d -s cross-agent-watch 'cross-agent-watch'
-```
-
-### Option B — systemd user service (production)
-
-Provided files:
-
-- `~/.config/systemd/user/cross-agent-watch.service`
-- `~/.config/systemd/user/cross-agent-watch.path`
-
-Enable with:
-
-```
-systemctl --user enable --now cross-agent-watch.path
-```
-
-The path unit triggers the service unit on filesystem changes; the service
-unit re-execs `cross-agent-watch` if it dies. Survives reboot.
-
-## Failure modes
-
-| Symptom | Likely cause | Fix |
-|---|---|---|
-| No notifications fire on new files | inotifywait not running, or glob resolved to zero dirs | Check `cross-agent-watch --projects-glob ... --quiet` exits non-zero immediately. Log shows `"resolved 0 directories"`. |
-| Notifications fire on `.tmp.` files | Filter regression | Verify `inotifywait` events show the `.tmp.` files; if so check this script's filter logic. |
-| Some files missed under rapid bursts | inotify queue overflow | Increase `fs.inotify.max_queued_events` sysctl. Default 16384 is usually fine. |
-| Permission denied on a watched dir | Directory perms wrong | `chmod 700 <dir>` and confirm owner. |
-
-## HALT awareness
-
-Checks `~/.config/cross-agent-comms/HALT` on each iteration (each inotifywait
-event fired). If HALT exists, the watcher continues running but **suppresses
-the `notify` call**. The event is still logged, with `(suppressed by HALT)`
-appended:
-
-```
-2026-04-27T04:42:00-05:00 career 20260427T094200Z-from-homelab-test.org (suppressed by HALT)
-```
-
-Logged-but-suppressed events are useful for the operator to see what would
-have fired during the halt window — helpful for diagnosing whatever caused
-the halt.
-
-When HALT clears, suppression stops; subsequent events fire normally. Backlog
-events that arrived during halt are NOT replayed — they get picked up via
-cold-start handling (status CLI, agent startup check, or the next agent
-poll once polling resumes).
-
-If the HALT file exists but is unreadable, fail-closed (suppress) — safer
-than fail-open.
-
-See `cross-agent-halt.md` for the full halt mechanism.
-
-## Examples
-
-```bash
-# Watch all projects, log everything, fire notifications
-cross-agent-watch
-
-# Test against a single project, no notifications, verbose
-cross-agent-watch \
- --projects-glob "$HOME/projects/work/inbox/from-agents/" \
- --no-notify
-
-# Production-style: quiet stdout, log only
-cross-agent-watch --quiet
-```
-
-## See also
-
-- `cross-agent-status` — point-in-time snapshot of pending messages.
-- `cross-agent-send` — counterpart writer.
-- `cross-agent-comms.org` — protocol spec.
diff --git a/.ai/scripts/tests/test_cross_agent_discover.py b/.ai/scripts/tests/test_cross_agent_discover.py
deleted file mode 100644
index f0d2bb7..0000000
--- a/.ai/scripts/tests/test_cross_agent_discover.py
+++ /dev/null
@@ -1,204 +0,0 @@
-"""Tests for cross-agent-discover (TDD: tests written before implementation)."""
-
-from __future__ import annotations
-
-import json
-import os
-import subprocess
-import textwrap
-from pathlib import Path
-
-import pytest
-
-SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-discover"
-
-
-def _run(args: list[str], env: dict | None = None) -> subprocess.CompletedProcess:
- return subprocess.run([str(SCRIPT), *args], capture_output=True, text=True, env=env)
-
-
-@pytest.fixture
-def fake_home(tmp_path, monkeypatch):
- home = tmp_path / "home"
- home.mkdir()
- monkeypatch.setenv("HOME", str(home))
- return home
-
-
-def _make_project(home: Path, name: str) -> Path:
- proj = home / "projects" / name
- (proj / ".ai").mkdir(parents=True)
- return proj
-
-
-def _write_peers_toml(home: Path, content: str) -> Path:
- cfg = home / ".config" / "cross-agent-comms"
- cfg.mkdir(parents=True, exist_ok=True)
- peers = cfg / "peers.toml"
- peers.write_text(content)
- return peers
-
-
-def test_discover_help(fake_home):
- result = _run(["--help"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- assert "discover" in result.stdout.lower() or "enumerate" in result.stdout.lower()
-
-
-def test_discover_local_only_no_projects(fake_home):
- """Empty home → reports zero local projects, zero peers."""
- result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- # No crash; mentions local somehow.
- assert "local" in result.stdout.lower() or "0 project" in result.stdout.lower()
-
-
-def test_discover_lists_local_projects(fake_home):
- _make_project(fake_home, "homelab")
- _make_project(fake_home, "career")
- _make_project(fake_home, "claude-templates")
- result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- assert "homelab" in result.stdout
- assert "career" in result.stdout
- assert "claude-templates" in result.stdout
-
-
-def test_discover_excludes_dirs_without_ai_subdir(fake_home):
- """Directories under ~/projects/ that lack .ai/ are NOT projects."""
- _make_project(fake_home, "real-project")
- (fake_home / "projects" / "not-a-project").mkdir(parents=True)
- result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- assert "real-project" in result.stdout
- assert "not-a-project" not in result.stdout
-
-
-def test_discover_no_peers_toml_just_local(fake_home):
- _make_project(fake_home, "homelab")
- result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- # No peers section since no toml.
- assert "homelab" in result.stdout
-
-
-def test_discover_lists_peers_from_toml(fake_home):
- _write_peers_toml(fake_home, textwrap.dedent("""\
- [peers.velox]
- host = "velox"
- ssh_user = "cjennings"
-
- [peers.bastion]
- host = "bastion.local"
- ssh_user = "cjennings"
- """))
- _make_project(fake_home, "homelab")
- result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- assert "velox" in result.stdout
- assert "bastion" in result.stdout
-
-
-def test_discover_malformed_peers_toml_errors_clearly(fake_home):
- _write_peers_toml(fake_home, "not valid toml at all = = =")
- result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode != 0
- assert "peers.toml" in result.stderr or "TOML" in result.stderr or "parse" in result.stderr.lower()
-
-
-def test_discover_json_output_schema(fake_home):
- _make_project(fake_home, "homelab")
- _make_project(fake_home, "career")
- _write_peers_toml(fake_home, textwrap.dedent("""\
- [peers.velox]
- host = "velox"
- """))
- result = _run(["--json", "--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- payload = json.loads(result.stdout)
- assert "local" in payload
- assert "peers" in payload
- assert isinstance(payload["local"], list)
- assert isinstance(payload["peers"], list)
- assert "homelab" in payload["local"]
- assert "career" in payload["local"]
- velox = next((p for p in payload["peers"] if p["name"] == "velox"), None)
- assert velox is not None
- # Reachability is a key — value depends on actual SSH state.
- assert "reachable" in velox
-
-
-def test_discover_peer_scope(fake_home):
- _write_peers_toml(fake_home, textwrap.dedent("""\
- [peers.velox]
- host = "velox"
-
- [peers.bastion]
- host = "bastion.local"
- """))
- result = _run(["--peer", "velox", "--no-cache", "--json"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- payload = json.loads(result.stdout)
- peer_names = [p["name"] for p in payload["peers"]]
- assert "velox" in peer_names
- assert "bastion" not in peer_names
-
-
-def test_discover_unreachable_peer_marked(fake_home):
- """A peer with a definitely-unreachable host gets reachable=False."""
- _write_peers_toml(fake_home, textwrap.dedent("""\
- [peers.bogus]
- host = "definitely-not-a-real-host.invalid"
- ssh_user = "nobody"
- """))
- result = _run(["--no-cache", "--json"], env={**os.environ, "HOME": str(fake_home)}, )
- assert result.returncode == 0
- payload = json.loads(result.stdout)
- bogus = next((p for p in payload["peers"] if p["name"] == "bogus"), None)
- assert bogus is not None
- assert bogus["reachable"] is False
-
-
-def test_discover_cache_hit_within_window(fake_home):
- """Second invocation within 5 min reads cache (skip the SSH probe)."""
- _make_project(fake_home, "homelab")
- # First call populates cache.
- result1 = _run(["--json"], env={**os.environ, "HOME": str(fake_home)})
- assert result1.returncode == 0
- cache = fake_home / ".cache" / "cross-agent-comms" / "discovery.json"
- assert cache.exists()
- # Tamper with the cache to a marker only the cache path can produce.
- payload = json.loads(cache.read_text())
- payload["_test_marker"] = True
- cache.write_text(json.dumps(payload))
- # Second call (no --no-cache) should return the tampered payload.
- result2 = _run(["--json"], env={**os.environ, "HOME": str(fake_home)})
- assert result2.returncode == 0
- payload2 = json.loads(result2.stdout)
- assert payload2.get("_test_marker") is True
-
-
-def test_discover_no_cache_flag_bypasses(fake_home):
- """--no-cache ignores even a fresh cache."""
- _make_project(fake_home, "homelab")
- cache_dir = fake_home / ".cache" / "cross-agent-comms"
- cache_dir.mkdir(parents=True)
- cache_dir.joinpath("discovery.json").write_text(json.dumps({
- "_test_marker": True, "local": [], "peers": []
- }))
- result = _run(["--no-cache", "--json"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- payload = json.loads(result.stdout)
- # Cache marker should NOT appear in fresh result.
- assert payload.get("_test_marker") is None or payload.get("_test_marker") is False
- assert "homelab" in payload["local"]
-
-
-def test_discover_halt_shows_banner(fake_home):
- halt = fake_home / ".config" / "cross-agent-comms" / "HALT"
- halt.parent.mkdir(parents=True)
- halt.write_text("halted")
- _make_project(fake_home, "homelab")
- result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0 # discover continues to print under HALT
- assert "HALT" in result.stdout
diff --git a/.ai/scripts/tests/test_cross_agent_halt.py b/.ai/scripts/tests/test_cross_agent_halt.py
deleted file mode 100644
index f8bf0b3..0000000
--- a/.ai/scripts/tests/test_cross_agent_halt.py
+++ /dev/null
@@ -1,204 +0,0 @@
-"""Tests for cross-agent-halt and cross-agent-resume (TDD)."""
-
-from __future__ import annotations
-
-import os
-import subprocess
-import textwrap
-from pathlib import Path
-
-import pytest
-
-HALT_SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-halt"
-RESUME_SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-resume"
-
-
-def _run(script: Path, args: list[str], env: dict | None = None) -> subprocess.CompletedProcess:
- return subprocess.run([str(script), *args], capture_output=True, text=True, env=env)
-
-
-@pytest.fixture
-def isolated_env(tmp_path, monkeypatch):
- """Isolated HOME + a fake systemctl that records calls without acting."""
- fake_home = tmp_path / "home"
- fake_home.mkdir()
- fake_bin = tmp_path / "bin"
- fake_bin.mkdir()
- # Fake systemctl: no-op, exit 0.
- fake_systemctl = fake_bin / "systemctl"
- fake_systemctl.write_text("#!/usr/bin/env bash\nexit 0\n")
- fake_systemctl.chmod(0o755)
- # Fake ssh: succeed only for known-good host.
- fake_ssh = fake_bin / "ssh"
- fake_ssh.write_text(textwrap.dedent("""\
- #!/usr/bin/env bash
- # Find the destination arg (skip flags).
- target=""
- for arg in "$@"; do
- case "$arg" in
- -*|*=*) ;;
- *@*|localhost|*.local|*.invalid) target="$arg"; break ;;
- *) target="$arg"; break ;;
- esac
- done
- case "$target" in
- *invalid*|*unreachable*) exit 255 ;;
- *) exit 0 ;;
- esac
- """))
- fake_ssh.chmod(0o755)
-
- monkeypatch.setenv("HOME", str(fake_home))
- # Prepend our fake bin so systemctl + ssh are intercepted, but keep real /bin etc.
- monkeypatch.setenv("PATH", f"{fake_bin}:{os.environ.get('PATH', '')}")
- return fake_home
-
-
-# ---- cross-agent-halt ----
-
-
-def test_halt_help(isolated_env):
- result = _run(HALT_SCRIPT, ["--help"], env={**os.environ, "HOME": str(isolated_env),
- "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- assert "halt" in result.stdout.lower()
-
-
-def test_halt_creates_halt_file(isolated_env):
- halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- assert not halt_file.exists()
- result = _run(HALT_SCRIPT, [], env={**os.environ, "HOME": str(isolated_env),
- "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- assert halt_file.exists()
-
-
-def test_halt_with_reason_writes_body(isolated_env):
- result = _run(HALT_SCRIPT, ["pausing for incident review"],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- assert halt_file.exists()
- assert "pausing for incident review" in halt_file.read_text()
-
-
-def test_halt_idempotent(isolated_env):
- """Running halt twice doesn't error."""
- halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- r1 = _run(HALT_SCRIPT, [], env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert r1.returncode == 0
- assert halt_file.exists()
- r2 = _run(HALT_SCRIPT, [], env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert r2.returncode == 0
- assert halt_file.exists()
-
-
-def test_halt_does_not_pkill(isolated_env):
- """Per design: halt does NOT call pkill. Verify by checking no pkill process gets launched."""
- # Replace pkill in PATH with something that fails loudly so we'd see if halt invoked it.
- fake_bin = isolated_env.parent / "bin"
- pkill = fake_bin / "pkill"
- pkill.write_text("#!/usr/bin/env bash\necho 'PKILL CALLED' >&2\nexit 99\n")
- pkill.chmod(0o755)
- result = _run(HALT_SCRIPT, [], env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- assert "PKILL CALLED" not in result.stderr
-
-
-def test_halt_tailnet_reports_per_peer(isolated_env):
- """--tailnet iterates peers.toml and reports per-peer status."""
- cfg = isolated_env / ".config" / "cross-agent-comms"
- cfg.mkdir(parents=True)
- (cfg / "peers.toml").write_text(textwrap.dedent("""\
- [peers.velox]
- host = "velox"
- ssh_user = "cjennings"
-
- [peers.bogus]
- host = "definitely-unreachable.invalid"
- ssh_user = "cjennings"
- """))
- result = _run(HALT_SCRIPT, ["--tailnet"],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- # Partial halt → exit 1.
- assert result.returncode == 1
- assert "velox" in result.stdout
- assert "bogus" in result.stdout
- # ✓ marker for velox, ✗ for bogus.
- assert "✓" in result.stdout
- assert "✗" in result.stdout
- assert "PARTIAL" in result.stdout or "partial" in result.stdout.lower()
-
-
-def test_halt_tailnet_all_reachable_exits_zero(isolated_env):
- cfg = isolated_env / ".config" / "cross-agent-comms"
- cfg.mkdir(parents=True)
- (cfg / "peers.toml").write_text(textwrap.dedent("""\
- [peers.velox]
- host = "velox"
- ssh_user = "cjennings"
- """))
- result = _run(HALT_SCRIPT, ["--tailnet"],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- assert "velox" in result.stdout
-
-
-# ---- cross-agent-resume ----
-
-
-def test_resume_help(isolated_env):
- result = _run(RESUME_SCRIPT, ["--help"],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- assert "resume" in result.stdout.lower()
-
-
-def test_resume_removes_halt_file(isolated_env):
- halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- halt_file.parent.mkdir(parents=True)
- halt_file.write_text("halted")
- assert halt_file.exists()
- result = _run(RESUME_SCRIPT, [],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- assert not halt_file.exists()
-
-
-def test_resume_when_no_halt_active_succeeds(isolated_env):
- """No HALT to clear is not an error."""
- result = _run(RESUME_SCRIPT, [],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 0
-
-
-def test_resume_prints_per_session_instructions(isolated_env):
- """Resume must surface that polling does NOT auto-resume."""
- halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- halt_file.parent.mkdir(parents=True)
- halt_file.write_text("halted")
- result = _run(RESUME_SCRIPT, [],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- out = result.stdout.lower()
- assert "polling" in out
- assert "auto" in out or "explicit" in out or "session" in out
-
-
-def test_resume_tailnet_partial_failure_exit_1(isolated_env):
- cfg = isolated_env / ".config" / "cross-agent-comms"
- cfg.mkdir(parents=True)
- (cfg / "peers.toml").write_text(textwrap.dedent("""\
- [peers.velox]
- host = "velox"
-
- [peers.bogus]
- host = "unreachable-host.invalid"
- """))
- halt_file = cfg / "HALT"
- halt_file.write_text("halted")
- result = _run(RESUME_SCRIPT, ["--tailnet"],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 1
- assert "velox" in result.stdout
- assert "bogus" in result.stdout
diff --git a/.ai/scripts/tests/test_cross_agent_recv.py b/.ai/scripts/tests/test_cross_agent_recv.py
deleted file mode 100644
index 27c53a5..0000000
--- a/.ai/scripts/tests/test_cross_agent_recv.py
+++ /dev/null
@@ -1,176 +0,0 @@
-"""Tests for cross-agent-recv."""
-
-from __future__ import annotations
-
-import json
-import os
-import subprocess
-from pathlib import Path
-
-import pytest
-
-SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-recv"
-
-
-def _make_message(path: Path, *, conv_id: str = "test-conv", seq: int = 1, msg_type: str = "request",
- proto_version: str = "5", title: str = "Test", requires_tools: str | None = None,
- body: str = "Body.\n") -> Path:
- fm_lines = [
- f"#+TITLE: {title}",
- f"#+CONVERSATION_ID: {conv_id}",
- f"#+MESSAGE_TYPE: {msg_type}",
- f"#+SEQUENCE: {seq}",
- "#+TIMESTAMP: 2026-04-27T05:00:00-05:00",
- f"#+PROTOCOL_VERSION: {proto_version}",
- ]
- if requires_tools:
- fm_lines.append(f"#+REQUIRES_TOOLS: {requires_tools}")
- path.write_text("\n".join(fm_lines) + "\n\n" + body)
- return path
-
-
-def _run(args: list[str], env: dict | None = None) -> subprocess.CompletedProcess:
- return subprocess.run([str(SCRIPT), *args], capture_output=True, text=True, env=env)
-
-
-@pytest.fixture
-def isolated_env(tmp_path, monkeypatch):
- fake_home = tmp_path / "home"
- fake_home.mkdir()
- monkeypatch.setenv("HOME", str(fake_home))
- return fake_home
-
-
-def test_recv_help(isolated_env):
- result = _run(["--help"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 0
- assert "Receive and decide" in result.stdout
-
-
-def test_recv_missing_file_rejects(isolated_env, tmp_path):
- result = _run([str(tmp_path / "nope.org")], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 3 # reject
-
-
-def test_recv_malformed_frontmatter_rejects(isolated_env, tmp_path):
- bad = tmp_path / "bad.org"
- bad.write_text("not org-mode at all\n")
- result = _run([str(bad), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 3
- assert "decision: reject" in result.stdout
-
-
-def test_recv_missing_required_field_rejects(isolated_env, tmp_path):
- msg = tmp_path / "msg.org"
- # Missing PROTOCOL_VERSION among others.
- msg.write_text("#+TITLE: x\n#+CONVERSATION_ID: c\n\nBody.\n")
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 3
- assert "missing required" in result.stdout
-
-
-def test_recv_protocol_version_mismatch_query(isolated_env, tmp_path):
- msg = _make_message(tmp_path / "msg.org", proto_version="4")
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 2 # query
- assert "PROTOCOL_VERSION mismatch" in result.stdout
-
-
-def test_recv_invalid_message_type_rejects(isolated_env, tmp_path):
- msg = _make_message(tmp_path / "msg.org", msg_type="banana")
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 3
- assert "invalid MESSAGE_TYPE" in result.stdout
-
-
-def test_recv_missing_signature_rejects(isolated_env, tmp_path):
- """When verify is on, a missing .asc sibling rejects."""
- msg = _make_message(tmp_path / "msg.org")
- # No .asc sidecar.
- result = _run([str(msg)], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 3
- assert "signature file missing" in result.stdout
-
-
-def test_recv_valid_processes(isolated_env, tmp_path):
- """A valid message with --no-verify and no dedup match → process."""
- msg = _make_message(tmp_path / "msg.org")
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 0 # process
- assert "decision: process" in result.stdout
- assert "sha256:" in result.stdout
-
-
-def test_recv_dedup_against_identical_existing(isolated_env, tmp_path):
- """Same content + same SEQUENCE in same dir → dedup."""
- inbox = tmp_path / "inbox"
- inbox.mkdir()
- first = _make_message(inbox / "20260427T100000Z-from-x-c.org", conv_id="c", seq=5)
- # Second message with same content — name differs (canonical-style would have different timestamp).
- second = _make_message(inbox / "20260427T100100Z-from-x-c.org", conv_id="c", seq=5)
- # Bodies must be byte-identical for hash equality.
- second.write_bytes(first.read_bytes())
- result = _run([str(second), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 1 # dedup
- assert "decision: dedup" in result.stdout
-
-
-def test_recv_collision_with_different_content_processes(isolated_env, tmp_path):
- """Same SEQUENCE + same CONVERSATION_ID but different content → process both."""
- inbox = tmp_path / "inbox"
- inbox.mkdir()
- _make_message(inbox / "20260427T100000Z-from-x-c.org", conv_id="c", seq=5, body="First body.\n")
- second = _make_message(inbox / "20260427T100100Z-from-x-c.org", conv_id="c", seq=5, body="Different body.\n")
- result = _run([str(second), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 0 # process
- assert "decision: process" in result.stdout
-
-
-def test_recv_requires_tools_missing_query(isolated_env, tmp_path):
- """REQUIRES_TOOLS naming a definitely-missing binary → query."""
- msg = _make_message(tmp_path / "msg.org", requires_tools="definitely-not-installed-xyzzy-9000")
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 2 # query
- assert "required tools unavailable" in result.stdout
-
-
-def test_recv_requires_tools_present_processes(isolated_env, tmp_path):
- """REQUIRES_TOOLS naming a real binary → process."""
- msg = _make_message(tmp_path / "msg.org", requires_tools="ls,cat")
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 0
- assert "decision: process" in result.stdout
-
-
-def test_recv_json_output(isolated_env, tmp_path):
- msg = _make_message(tmp_path / "msg.org")
- result = _run([str(msg), "--no-verify", "--json"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 0
- payload = json.loads(result.stdout)
- assert payload["decision"] == "process"
- assert payload["message_type"] == "request"
- assert payload["conversation_id"] == "test-conv"
-
-
-def test_recv_halt_blocks(isolated_env, tmp_path):
- halt = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- halt.parent.mkdir(parents=True)
- halt.write_text("halted\n")
- msg = _make_message(tmp_path / "msg.org")
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 5
- assert "halt active" in result.stderr.lower()
-
-
-def test_recv_halt_leaves_message_in_place(isolated_env, tmp_path):
- """Per spec: under HALT, recv must NOT move/dedup/reject — leave file in place."""
- halt = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- halt.parent.mkdir(parents=True)
- halt.write_text("halted\n")
- msg = _make_message(tmp_path / "msg.org")
- pre_content = msg.read_text()
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 5
- # File still exists with same content.
- assert msg.exists()
- assert msg.read_text() == pre_content
diff --git a/.ai/scripts/tests/test_cross_agent_send.py b/.ai/scripts/tests/test_cross_agent_send.py
deleted file mode 100644
index f716e95..0000000
--- a/.ai/scripts/tests/test_cross_agent_send.py
+++ /dev/null
@@ -1,210 +0,0 @@
-"""Tests for cross-agent-send.
-
-Subprocess-based: treat the script as a black-box CLI and assert on its
-exit codes, stdout, and the files it produces.
-"""
-
-from __future__ import annotations
-
-import os
-import subprocess
-import textwrap
-from pathlib import Path
-
-import pytest
-
-SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-send"
-
-
-def _make_message(tmp_path: Path, conv_id: str = "test-conv", seq: int = 1, msg_type: str = "request",
- proto_version: str = "5") -> Path:
- msg = tmp_path / "msg.org"
- msg.write_text(textwrap.dedent(f"""\
- #+TITLE: Test message
- #+CONVERSATION_ID: {conv_id}
- #+MESSAGE_TYPE: {msg_type}
- #+SEQUENCE: {seq}
- #+TIMESTAMP: 2026-04-27T05:00:00-05:00
- #+PROTOCOL_VERSION: {proto_version}
-
- Body.
- """))
- return msg
-
-
-def _run(args: list[str], env: dict | None = None, cwd: Path | None = None) -> subprocess.CompletedProcess:
- return subprocess.run(
- [str(SCRIPT), *args],
- capture_output=True,
- text=True,
- env=env,
- cwd=cwd,
- )
-
-
-@pytest.fixture
-def isolated_env(tmp_path, monkeypatch):
- """Redirect HOME so peers.toml, HALT, marker files are scoped to the test."""
- fake_home = tmp_path / "home"
- fake_home.mkdir()
- monkeypatch.setenv("HOME", str(fake_home))
- # Pre-create projects/ so derive_sender_project has somewhere to look.
- (fake_home / "projects" / "homelab").mkdir(parents=True)
- return fake_home
-
-
-def test_send_help(isolated_env):
- """--help works without side effects."""
- result = _run(["--help"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 0
- assert "Send a cross-agent message" in result.stdout
-
-
-def test_send_missing_message_file(isolated_env):
- """Nonexistent message file returns general error."""
- import socket
- machine = socket.gethostname().split(".")[0]
- result = _run(
- [f"{machine}.homelab", str(isolated_env / "nonexistent.org")],
- env={**os.environ, "HOME": str(isolated_env)},
- )
- assert result.returncode == 1
- assert "not found" in result.stderr.lower()
-
-
-def test_send_invalid_destination_format(isolated_env, tmp_path):
- """Destination without . returns dest-not-found exit code."""
- msg = _make_message(tmp_path)
- result = _run(
- ["bogus", str(msg)],
- env={**os.environ, "HOME": str(isolated_env)},
- )
- assert result.returncode == 2
- assert "<machine>.<project>" in result.stderr or "destination" in result.stderr.lower()
-
-
-def test_send_dest_not_in_peers(isolated_env, tmp_path):
- """Cross-machine destination with no peers.toml entry exits 2."""
- msg = _make_message(tmp_path)
- result = _run(
- ["unknownmachine.homelab", str(msg)],
- env={**os.environ, "HOME": str(isolated_env)},
- )
- assert result.returncode == 2
- assert "not found in peers" in result.stderr
-
-
-def test_send_frontmatter_missing_required(isolated_env, tmp_path):
- """Message missing required fields exits 4."""
- bad = tmp_path / "bad.org"
- bad.write_text("#+TITLE: nope\n\nBody.\n")
- import socket
- machine = socket.gethostname().split(".")[0]
- result = _run(
- [f"{machine}.homelab", str(bad)],
- env={**os.environ, "HOME": str(isolated_env)},
- )
- assert result.returncode == 4
- assert "missing required fields" in result.stderr
-
-
-def test_send_invalid_message_type(isolated_env, tmp_path):
- """Unknown MESSAGE_TYPE exits 4."""
- msg = _make_message(tmp_path, msg_type="frobnicate")
- import socket
- machine = socket.gethostname().split(".")[0]
- result = _run(
- [f"{machine}.homelab", str(msg)],
- env={**os.environ, "HOME": str(isolated_env)},
- )
- assert result.returncode == 4
- assert "MESSAGE_TYPE" in result.stderr
-
-
-def test_send_halt_blocks(isolated_env, tmp_path):
- """When HALT exists, send refuses with exit 5."""
- halt = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- halt.parent.mkdir(parents=True)
- halt.write_text("test halt\n")
- msg = _make_message(tmp_path)
- import socket
- machine = socket.gethostname().split(".")[0]
- result = _run(
- [f"{machine}.homelab", str(msg)],
- env={**os.environ, "HOME": str(isolated_env)},
- )
- assert result.returncode == 5
- assert "halt active" in result.stderr.lower()
-
-
-def test_send_same_machine_no_sign_delivers(isolated_env, tmp_path):
- """Same-machine delivery with --no-sign produces a canonically named file."""
- msg = _make_message(tmp_path, conv_id="my-conv")
- import socket
- machine = socket.gethostname().split(".")[0]
- # Sender is derived from CWD walking up to ~/projects/<name>/
- cwd = isolated_env / "projects" / "homelab"
- result = _run(
- [f"{machine}.homelab", str(msg), "--no-sign"],
- env={**os.environ, "HOME": str(isolated_env)},
- cwd=cwd,
- )
- assert result.returncode == 0, f"stderr={result.stderr}"
- inbox = isolated_env / "projects" / "homelab" / "inbox" / "from-agents"
- files = list(inbox.glob("*-from-homelab-my-conv.org"))
- assert len(files) == 1
- # No sig file with --no-sign.
- assert not list(inbox.glob("*.asc"))
- # Canonical filename pattern.
- assert files[0].name.startswith("2026") and files[0].name.endswith("-from-homelab-my-conv.org")
-
-
-def test_send_same_machine_signed_writes_asc(isolated_env, tmp_path):
- """Signed delivery writes both .org and .asc."""
- msg = _make_message(tmp_path, conv_id="signed-conv")
- import socket
- machine = socket.gethostname().split(".")[0]
- cwd = isolated_env / "projects" / "homelab"
- # Use the real GPG keyring (not isolating GPG — Craig's existing keys are fine for tests).
- real_env = {**os.environ, "HOME": str(isolated_env), "GNUPGHOME": str(Path.home() / ".gnupg")}
- result = _run(
- [f"{machine}.homelab", str(msg)],
- env=real_env,
- cwd=cwd,
- )
- if result.returncode != 0:
- pytest.skip(f"GPG signing unavailable in this environment: {result.stderr}")
- inbox = isolated_env / "projects" / "homelab" / "inbox" / "from-agents"
- org_files = list(inbox.glob("*-from-homelab-signed-conv.org"))
- asc_files = list(inbox.glob("*-from-homelab-signed-conv.org.asc"))
- assert len(org_files) == 1
- assert len(asc_files) == 1
-
-
-def test_send_filename_ignores_input_basename(isolated_env, tmp_path):
- """User's input filename is ignored; canonical filename is generated."""
- weird = tmp_path / "weird-user-name.org"
- weird.write_text(textwrap.dedent("""\
- #+TITLE: Title
- #+CONVERSATION_ID: ignored-input
- #+MESSAGE_TYPE: request
- #+SEQUENCE: 1
- #+TIMESTAMP: 2026-04-27T05:00:00-05:00
- #+PROTOCOL_VERSION: 5
-
- Body.
- """))
- import socket
- machine = socket.gethostname().split(".")[0]
- cwd = isolated_env / "projects" / "homelab"
- result = _run(
- [f"{machine}.homelab", str(weird), "--no-sign"],
- env={**os.environ, "HOME": str(isolated_env)},
- cwd=cwd,
- )
- assert result.returncode == 0
- inbox = isolated_env / "projects" / "homelab" / "inbox" / "from-agents"
- # No file named after the user's input.
- assert not (inbox / "weird-user-name.org").exists()
- # Canonical naming used.
- assert list(inbox.glob("*-from-homelab-ignored-input.org"))
diff --git a/.ai/scripts/tests/test_cross_agent_status.py b/.ai/scripts/tests/test_cross_agent_status.py
deleted file mode 100644
index bb5b8ba..0000000
--- a/.ai/scripts/tests/test_cross_agent_status.py
+++ /dev/null
@@ -1,165 +0,0 @@
-"""Tests for cross-agent-status (TDD: tests written before implementation)."""
-
-from __future__ import annotations
-
-import json
-import os
-import subprocess
-import textwrap
-from pathlib import Path
-
-import pytest
-
-SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-status"
-
-
-def _make_msg(path: Path, *, conv_id: str, seq: int, msg_type: str = "request",
- proto_version: str = "5", timestamp: str = "2026-04-27T05:00:00-05:00") -> Path:
- path.parent.mkdir(parents=True, exist_ok=True)
- path.write_text(textwrap.dedent(f"""\
- #+TITLE: T
- #+CONVERSATION_ID: {conv_id}
- #+MESSAGE_TYPE: {msg_type}
- #+SEQUENCE: {seq}
- #+TIMESTAMP: {timestamp}
- #+PROTOCOL_VERSION: {proto_version}
-
- Body.
- """))
- return path
-
-
-def _run(args: list[str], env: dict | None = None) -> subprocess.CompletedProcess:
- return subprocess.run([str(SCRIPT), *args], capture_output=True, text=True, env=env)
-
-
-@pytest.fixture
-def fake_projects(tmp_path, monkeypatch):
- """Create a fake ~/projects/<name>/inbox/from-agents/ tree under tmp_path."""
- home = tmp_path / "home"
- home.mkdir()
- monkeypatch.setenv("HOME", str(home))
- return home
-
-
-def test_status_help(fake_projects):
- result = _run(["--help"], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- assert "snapshot" in result.stdout.lower() or "pending" in result.stdout.lower()
-
-
-def test_status_no_projects_clean_output(fake_projects):
- result = _run([], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- # Empty machine prints either header-only table or "no projects" — accept either.
- # No crash, no pending claims.
- assert "pending" in result.stdout.lower() or result.stdout.strip() == ""
-
-
-def test_status_one_pending_shows_up(fake_projects):
- inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents"
- _make_msg(inbox / "20260427T100000Z-from-career-fixup.org", conv_id="fixup", seq=1)
- result = _run([], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- assert "homelab" in result.stdout
- assert "1" in result.stdout # pending count
- assert "20260427T100000Z-from-career-fixup.org" in result.stdout
-
-
-def test_status_released_conversation_zero_pending(fake_projects):
- """A conversation with a release message in it counts as 0 pending."""
- inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents"
- _make_msg(inbox / "20260427T100000Z-from-career-done.org", conv_id="done", seq=1)
- _make_msg(inbox / "20260427T100100Z-from-homelab-done.org", conv_id="done", seq=2, msg_type="release")
- result = _run([], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- # Check the homelab row shows 0 pending.
- lines = [ln for ln in result.stdout.splitlines() if "homelab" in ln]
- # At least one homelab line should show 0 pending or "—".
- assert any("0" in ln or "—" in ln for ln in lines)
-
-
-def test_status_partial_release(fake_projects):
- """Conversation with release + a later message → that later message counts as pending."""
- inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents"
- _make_msg(inbox / "20260427T100000Z-from-career-x.org", conv_id="x", seq=1,
- timestamp="2026-04-27T05:00:00-05:00")
- _make_msg(inbox / "20260427T100100Z-from-homelab-x.org", conv_id="x", seq=2, msg_type="release",
- timestamp="2026-04-27T05:01:00-05:00")
- # New message AFTER release: starts a fresh thread that's pending.
- _make_msg(inbox / "20260427T200000Z-from-career-x.org", conv_id="x", seq=3,
- timestamp="2026-04-27T15:00:00-05:00")
- result = _run([], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- homelab_line = next(ln for ln in result.stdout.splitlines() if "homelab" in ln)
- assert "1" in homelab_line # the post-release message is pending
-
-
-def test_status_multiple_projects(fake_projects):
- inbox_a = fake_projects / "projects" / "homelab" / "inbox" / "from-agents"
- inbox_b = fake_projects / "projects" / "career" / "inbox" / "from-agents"
- _make_msg(inbox_a / "20260427T100000Z-from-x-a.org", conv_id="a", seq=1)
- _make_msg(inbox_b / "20260427T100100Z-from-x-b.org", conv_id="b", seq=1)
- _make_msg(inbox_b / "20260427T100200Z-from-x-c.org", conv_id="c", seq=1)
- result = _run([], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- # career has 2 pending, homelab has 1.
- career_line = next(ln for ln in result.stdout.splitlines() if "career" in ln)
- homelab_line = next(ln for ln in result.stdout.splitlines() if "homelab" in ln)
- assert "2" in career_line
- assert "1" in homelab_line
-
-
-def test_status_json_output(fake_projects):
- inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents"
- _make_msg(inbox / "20260427T100000Z-from-career-test.org", conv_id="test", seq=1)
- result = _run(["--json"], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- payload = json.loads(result.stdout)
- assert "projects" in payload
- assert isinstance(payload["projects"], list)
- homelab = next((p for p in payload["projects"] if p["name"] == "homelab"), None)
- assert homelab is not None
- assert homelab["pending_count"] == 1
-
-
-def test_status_sort_pending_first(fake_projects):
- """Projects with pending messages sort before projects with 0."""
- (fake_projects / "projects" / "alpha" / "inbox" / "from-agents").mkdir(parents=True)
- inbox_zeta = fake_projects / "projects" / "zeta" / "inbox" / "from-agents"
- _make_msg(inbox_zeta / "20260427T100000Z-from-x-z.org", conv_id="z", seq=1)
- result = _run([], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- lines = result.stdout.splitlines()
- zeta_idx = next(i for i, ln in enumerate(lines) if "zeta" in ln)
- alpha_idx = next(i for i, ln in enumerate(lines) if "alpha" in ln)
- assert zeta_idx < alpha_idx, "pending project should sort before zero-pending project"
-
-
-def test_status_halt_shows_banner(fake_projects):
- halt = fake_projects / ".config" / "cross-agent-comms" / "HALT"
- halt.parent.mkdir(parents=True)
- halt.write_text("halted for test")
- inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents"
- _make_msg(inbox / "20260427T100000Z-from-x-x.org", conv_id="x", seq=1)
- result = _run([], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0 # status continues to print under HALT
- assert "HALT" in result.stdout
- # Banner should mention the reason.
- assert "halted for test" in result.stdout
-
-
-def test_status_projects_glob_override(fake_projects):
- inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents"
- _make_msg(inbox / "20260427T100000Z-from-x-a.org", conv_id="a", seq=1)
- other_inbox = fake_projects / "projects" / "career" / "inbox" / "from-agents"
- _make_msg(other_inbox / "20260427T100100Z-from-x-b.org", conv_id="b", seq=1)
- # Glob limits to homelab only.
- result = _run(
- ["--projects-glob", str(fake_projects / "projects" / "homelab" / "inbox" / "from-agents") + "/"],
- env={**os.environ, "HOME": str(fake_projects)},
- )
- assert result.returncode == 0
- assert "homelab" in result.stdout
- # career not in scope.
- assert "career" not in result.stdout
diff --git a/.ai/scripts/tests/test_cross_agent_watch.py b/.ai/scripts/tests/test_cross_agent_watch.py
deleted file mode 100644
index 417cc19..0000000
--- a/.ai/scripts/tests/test_cross_agent_watch.py
+++ /dev/null
@@ -1,155 +0,0 @@
-"""Tests for cross-agent-watch.
-
-Black-box: spawn the script, drop files into a watched dir, read the log.
-Tests use --no-notify to avoid firing real desktop notifications.
-"""
-
-from __future__ import annotations
-
-import os
-import subprocess
-import time
-from pathlib import Path
-
-import pytest
-
-SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-watch"
-
-
-def _spawn(watched_dir: Path, log_path: Path, env: dict) -> subprocess.Popen:
- return subprocess.Popen(
- [
- str(SCRIPT),
- "--projects-glob", str(watched_dir) + "/",
- "--log", str(log_path),
- "--no-notify",
- "--quiet",
- ],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.PIPE,
- env=env,
- )
-
-
-def _wait_for_log_lines(log_path: Path, expected: int, timeout: float = 5.0) -> list[str]:
- deadline = time.time() + timeout
- while time.time() < deadline:
- if log_path.exists():
- lines = [ln for ln in log_path.read_text().splitlines() if ln]
- if len(lines) >= expected:
- return lines
- time.sleep(0.1)
- if log_path.exists():
- return [ln for ln in log_path.read_text().splitlines() if ln]
- return []
-
-
-@pytest.fixture
-def isolated_env(tmp_path, monkeypatch):
- fake_home = tmp_path / "home"
- fake_home.mkdir()
- monkeypatch.setenv("HOME", str(fake_home))
- return fake_home
-
-
-def test_watch_help(isolated_env):
- result = subprocess.run(
- [str(SCRIPT), "--help"],
- capture_output=True, text=True,
- env={**os.environ, "HOME": str(isolated_env)},
- )
- assert result.returncode == 0
- assert "Usage:" in result.stdout
-
-
-def test_watch_empty_glob_exits_nonzero(isolated_env):
- """Glob resolving to zero dirs should exit non-zero with a clear message."""
- result = subprocess.run(
- [str(SCRIPT), "--projects-glob", "/nonexistent/path/*/foo/", "--no-notify", "--quiet"],
- capture_output=True, text=True,
- env={**os.environ, "HOME": str(isolated_env)},
- timeout=3,
- )
- assert result.returncode != 0
- assert "0 directories" in result.stderr
-
-
-def test_watch_logs_org_file_create(isolated_env, tmp_path):
- watched = tmp_path / "watched"
- watched.mkdir()
- log = tmp_path / "watch.log"
- proc = _spawn(watched, log, {**os.environ, "HOME": str(isolated_env)})
- try:
- # Give inotifywait a moment to attach.
- time.sleep(0.3)
- (watched / "test-msg.org").write_text("hello")
- lines = _wait_for_log_lines(log, expected=1, timeout=3.0)
- assert len(lines) >= 1
- assert "test-msg.org" in lines[-1]
- finally:
- proc.terminate()
- proc.wait(timeout=2)
-
-
-def test_watch_filters_tmp_files(isolated_env, tmp_path):
- """Files starting with .tmp. must NOT trigger log entries."""
- watched = tmp_path / "watched"
- watched.mkdir()
- log = tmp_path / "watch.log"
- proc = _spawn(watched, log, {**os.environ, "HOME": str(isolated_env)})
- try:
- time.sleep(0.3)
- (watched / ".tmp.staging-file.org").write_text("hello")
- # Wait briefly to confirm nothing logs.
- time.sleep(0.5)
- if log.exists():
- content = log.read_text()
- assert ".tmp.staging-file" not in content
- # Then drop a real file to confirm watcher is alive.
- (watched / "real.org").write_text("real")
- lines = _wait_for_log_lines(log, expected=1, timeout=3.0)
- assert any("real.org" in ln for ln in lines)
- finally:
- proc.terminate()
- proc.wait(timeout=2)
-
-
-def test_watch_filters_asc_sidecars(isolated_env, tmp_path):
- """Only .org events fire; .asc sidecars are silent."""
- watched = tmp_path / "watched"
- watched.mkdir()
- log = tmp_path / "watch.log"
- proc = _spawn(watched, log, {**os.environ, "HOME": str(isolated_env)})
- try:
- time.sleep(0.3)
- (watched / "msg.org.asc").write_text("sig")
- time.sleep(0.5)
- if log.exists():
- assert "msg.org.asc" not in log.read_text()
- # .org event still works.
- (watched / "msg.org").write_text("body")
- lines = _wait_for_log_lines(log, expected=1, timeout=3.0)
- assert any(ln.endswith("msg.org") for ln in lines)
- finally:
- proc.terminate()
- proc.wait(timeout=2)
-
-
-def test_watch_halt_suppresses_but_logs(isolated_env, tmp_path):
- """When HALT is set, watcher logs the event with (suppressed by HALT) marker."""
- halt = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- halt.parent.mkdir(parents=True)
- halt.write_text("halted")
- watched = tmp_path / "watched"
- watched.mkdir()
- log = tmp_path / "watch.log"
- proc = _spawn(watched, log, {**os.environ, "HOME": str(isolated_env)})
- try:
- time.sleep(0.3)
- (watched / "halted-event.org").write_text("body")
- lines = _wait_for_log_lines(log, expected=1, timeout=3.0)
- assert len(lines) >= 1
- assert "suppressed by HALT" in lines[-1]
- finally:
- proc.terminate()
- proc.wait(timeout=2)
diff --git a/.ai/sessions/2026-06-16-23-37-cross-agent-comms-removal-and-batch-specs.org b/.ai/sessions/2026-06-16-23-37-cross-agent-comms-removal-and-batch-specs.org
new file mode 100644
index 0000000..1857e58
--- /dev/null
+++ b/.ai/sessions/2026-06-16-23-37-cross-agent-comms-removal-and-batch-specs.org
@@ -0,0 +1,194 @@
+#+TITLE: Session Context
+#+DATE: 2026-06-15
+
+* Summary
+
+** Active Goal
+
+A long overnight session: process inbound, then run an autonomous 30-min work loop, ending with two directed tasks — write specs from Craig's cj instructions, and remove the unused cross-agent-comms subsystem.
+
+** Decisions
+
+- Reconciled the Phase E (inbox-zero) and "fix speedrun" proposals into ONE autonomous-batch-execution spec: a dedicated =work-the-backlog.org= holds the loop, inbox-zero stays routing-only, "fix speedrun" is a thin preset. (Craig's "your call".)
+- Demoted create-documentation + research-writer to [#D] (designed, unbuilt, awaiting a trigger).
+- Shared-asset change proposals park as [#B] VERIFYs in no-approvals mode, never self-apply (Phase E, dotfiles-discovery, archsetup ai-launcher).
+- cross-agent-comms is removable: an unused parallel system; inbox-send is the live handoff mechanism (kept). Removed it; repointed helper-mode escalation to inbox-send.
+- Wrap-up-routing implementation deferred: it moves tasks across projects' todo.org files (data-loss-adjacent), needs a focused /start-work, not a tail-end rush.
+
+** Data Collected / Findings
+
+- The 30-min loop ran ~28 idle cycles (cycles 1-30); most inbound was =emacs:=-prefixed roam items (foreign, left for .emacs.d). No rulesets task was ever tagged :next: / :solo:+:quick:, so the loop never had eligible work.
+- Candidate quick-wins surfaced: shellcheck warnings across 6 scripts (2 latent-bug suspects in bin/ai — SC2088 tilde-in-quotes, SC1083 literal brace), the parked dotfiles one-liner, token-rotation helper.
+- cross-agent-comms footprint removed: 7 scripts + 7 READMEs (canonical + mirror), cross-agent-comms.org workflow, INDEX entry, 6 test files, 3 startup.org wirings + 2 summary mentions, helper-mode escalation ref, 7 legacy ~/.local/bin symlinks. Verified: no residual refs, sync-check clean, make test green.
+
+** Files Modified
+
+- Removed: claude-templates/.ai/scripts/cross-agent-comms/ + mirror, the 6 test_cross_agent_*.py, cross-agent-comms.org (+ mirror).
+- Edited (canonical + sync'd mirror): startup.org (3 wirings + 2 summary lines, renumbered Phase A), INDEX.org (dropped entry), helper-mode.org (escalation → inbox-send).
+- docs/design/: 2026-06-16-autonomous-batch-execution-spec.org (new), 2026-06-16-encourage-kb-contribution-spec.org (new), generic-agent-runtime-spec.org (removal status note), 2026-06-15-fix-speedrun-workflow-proposal.org (filed source).
+- todo.org: many task-state edits (specs filed with review VERIFYs, audit reconciliation, demotions, parked VERIFYs, Craig's reorder).
+- working/: inbox-zero-phase-e/, ai-dotfiles-discovery/ (staged parked changes).
+- ~/.dotfiles: bootstrapped .ai/ in gitignore mode (install-ai).
+
+** Next Steps
+
+- Three parked VERIFYs await Craig: dotfiles-discovery (one line), autonomous-batch spec (6 decisions), KB-contribution spec (5 decisions).
+- Wrap-up-routing implementation — the next focused /start-work build (data-loss checkpoint).
+- Quick-win backlog to tag :solo:+:quick: when wanted: the shellcheck cleanup (with the 2 bin/ai latent-bug suspects flagged for eyes).
+
+KB: promoted 0 / consulted no
+
+* Session Log
+
+** Startup + inbox pass (2026-06-15 23:29 CDT)
+
+Ran startup clean: no crashed anchor, templates synced, no reminders/pending decisions, roam inbox 0, no cross-agent messages. Staleness nudge: 1 top-level task unreviewed >7 days. Two DOING items remain open (wrap-up routing spec ready for spec-review; helper-instance Phase 1.5 awaiting go to build).
+
+Processed 3 inbox items:
+- .emacs.d "fix speedrun" reusable-workflow proposal — ran the Skeptical Review with the cross-project battery. Passes the value gate but carries 4 unresolved design questions (new-workflow-vs-preset, page firing point + mechanism amid the page-signal removal, auto-pull vs explicit list, guardrails against design/data-loss work). Craig chose option 1: file as a task, build via spec-create later. Preserved the proposal at docs/design/2026-06-15-fix-speedrun-workflow-proposal.org and filed a [#C] :feature:spec: task carrying the skeptical-review open questions.
+- Two pearl FYIs (gitignore-tooling applied; memory sweep Phase 1.5 done) — acks of handoffs we sent, nothing asked. Deleted.
+
+Updated :LAST_INBOX_PROCESS: marker to 2026-06-15. Inbox empty.
+
+** Autonomous 30-min loop started (2026-06-15 23:44 CDT)
+
+Craig set up an autonomous loop: every 30 min, inbox-zero (project inbox + roam global), then find a task tagged :next: OR both :solo: and :quick:, evaluate it, write a spec if useful, implement it in no-approvals mode until the commit is pushed, then inbox-zero again. Runs until told to stop.
+
+Committed + pushed the inbox pass (4fe184e → e7be0de..4fe184e on origin/main).
+
+Cycle 1 (23:44): both inboxes already at zero. No eligible task — nothing tagged :next:, nothing tagged both :solo:+:quick: (no open task carries :solo: at all). One task has :quick: alone (Token-rotation helper, line 1031) but not :solo:, so it doesn't qualify. No-op cycle. Scheduled next wake in 30 min.
+
+Guardrails honored each cycle: shared-asset/convention change proposals never self-apply even in no-approvals (defer-and-stage as [#B] VERIFY per process-inbox Skeptical Review); refuse to speedrun tasks needing design decisions or carrying data-loss risk without a checkpoint (the fix-speedrun guardrail).
+
+** Full task audit (2026-06-15, Craig-requested, interrupts the loop)
+
+Ran task-audit.org over all 11 open tasks. Fanned out 3 read-only Explore subagents over batches; reconciled against session summaries + git + repo state; applied edits serially.
+
+STALE → updated autonomously:
+- Helper-instance (line 46): bumped LAST_REVIEWED 2026-06-12 → 2026-06-15 (this morning's session shipped agent-roster f8bdf30 + helper-mode.org 0b681dc; the 2026-06-15 progress note already captures the shipped-vs-wiring split accurately, so only the stamp lagged).
+- Memories-sync (line 87): rewrote the stale preamble (it claimed phases + validation both "remaining"; Phases 0-4 are all shipped) and bumped LAST_REVIEWED → 2026-06-15. Implementation complete; manual-testing child + other-machine roam.git clone (archsetup handoff) remain before DONE.
+
+CURRENT (left as-is): wrap-up routing (line 37, still awaiting spec-review), morning-ops (182), c4-rename (885), token-rotation (1031, :solo: deliberately withheld), spec-storage (1084) + fix-speedrun (1104) both filed today.
+
+NEEDS-USER (flagged, awaiting Craig): create-documentation [#C] vs [#D]; research-writer [#C] vs [#D]; generic-agent-runtime (1058) overlap/dependency with helper-instance (46).
+
+Phase E: stamped :LAST_AUDIT: 2026-06-15 in notes.org. Phase F (task-review chain) pending the NEEDS-USER adjudication.
+
+Craig adjudicated the 3 NEEDS-USER flags (chose accept-all): demoted create-documentation (line 191) and research-writer (line 828) [#C]→[#D]; kept generic-agent-runtime separate from helper-instance (open dependency question stays the recorded blocker). Phase F completed: re-reviewed all remaining open tasks (reconciled CURRENT in the audit) and bumped their LAST_REVIEWED to 2026-06-15. No kills. Committed + pushed the audit (ab9f79a → 4fe184e..ab9f79a on origin/main).
+
+** install-ai on ~/.dotfiles (2026-06-16, Craig-requested, cross-project)
+
+Craig asked to install-ai ~/.dotfiles in gitignore mode and page him. Ran scripts/install-ai.sh --gitignore $HOME/.dotfiles: created .ai/ (protocols, workflows, scripts, notes.org seeded project=.dotfiles) + inbox/, and appended .ai/ .claude/ CLAUDE.md AGENTS.md to ~/.dotfiles/.gitignore. Verified .ai/ is git-ignored (git check-ignore IGNORED; absent from git status). (.claude shows not-ignored via check-ignore only because the dir doesn't exist yet — directory-scoped pattern; will ignore once created.) Left the .gitignore change uncommitted — ~/.dotfiles had 11 unrelated modified WIP files, so the commit decision is Craig's. Paged him via notify success --persist (exit 0).
+
+** Loop cycle 2 (2026-06-16 00:15 CDT)
+
+Inbox had real work this time: 2 .emacs.d handoffs + 1 roam item. No eligible task (no :next:, no :solo:+:quick:).
+
+- .emacs.d inbox-zero Phase E proposal (adds autonomous task execution to the synced inbox-zero.org): shared-asset change in a no-approvals session → defer-and-stage per the Skeptical Review gate. Staged the proposed file + diff + sender note under working/inbox-zero-phase-e/, filed a [#B] VERIFY (recommend spec it, not apply: assumes .emacs.d's commit waiver, hardcodes eligibility tags, undefined do-not-implement set + kill-switch, unresolved seam question; overlaps the fix-speedrun task — reconcile into one spec). Replied to .emacs.d that it's parked.
+- Roam item "rulesets: encourage building a knowledge base..." (:next:, rulesets-prefixed): claimed and filed as [#C] :feature: "Encourage org-roam KB contribution across workflows." Dropped the :next: tag — it touches 4 synced workflows and needs a best-practices curation decision, so it's a design task, not a loop auto-implement (guardrail). Removed from roam inbox, committed + pushed roam (af1e09f).
+- Rulesets commit 26bcae6 (todo.org VERIFY + task, working/ staged). Both inboxes verified empty. Rescheduled next wake +30 min.
+
+** /respond-to-cj-comments on todo.org (2026-06-16, Craig-invoked)
+
+cj-scan found 4 cj comments. Craig had also flipped Helper-instance and memories-sync to VERIFY in his buffer (his edits, folded into the same commit).
+
+- cj on Phase E VERIFY ("write a spec, file a verify subtask") + cj on fix-speedrun ("your call" on workflow-vs-preset + effectiveness measurement): reconciled into ONE autonomous-batch execution spec (docs/design/2026-06-16-autonomous-batch-execution-spec.org). Design call (mine, per "your call"): a dedicated work-the-backlog.org holds the loop; "fix speedrun" is a thin preset; inbox-zero stays routing-only. Spec also designs the effectiveness-measurement trial (per-task JSONL + org-roam synthesis articles). Phase E VERIFY folded to a dated entry; fix-speedrun got a dated answer + a *** VERIFY "Review the autonomous-batch execution spec".
+- cj on KB-encouragement ("write a spec, file a verify subtask"): docs/design/2026-06-16-encourage-kb-contribution-spec.org (4 light workflow prompts + curated best-practices node, sources cited). Filed *** VERIFY "Review the KB-contribution spec".
+- cj on Wrap-up routing ("approved, take through spec-response → implementation"): LEFT IN PLACE / deferred. The build moves tasks between projects' todo.org files = data-loss-adjacent cross-project mutation; per the guardrail it needs a focused /start-work session with a checkpoint, not a tail-end rush after two specs. Craig's approval edit to wrapup-routing-spec.org is unstaged (belongs with that build).
+
+Two spec drafts came from parallel general-purpose subagents (held the design decisions myself; they drafted the docs). Commit 7467d1f pushed. Both review VERIFYs await Craig.
+
+Pending at task boundary: a new inbox handoff arrived mid-task — inbox/2026-06-16-0054-from-archsetup-ai-launcher-dotfiles-discovery.org — not yet processed (next loop cycle or a follow-up).
+
+** Loop cycle 3 (2026-06-16 00:59 CDT)
+
+Processed the archsetup handoff (add =~/.dotfiles= to bin/ai's build_candidates() discovery, completing the dotfiles bootstrap). Synced asset + no-approvals → defer-and-stage. Skeptical review confirmed correct (verified build_candidates only scans .emacs.d/code/projects; maybe_add_candidate guards on .ai/protocols.org; no bin/ai discovery test to also update). Staged the one-line diff under working/ai-dotfiles-discovery/, filed a [#B] VERIFY "Parked: add ~/.dotfiles to the ai launcher's project discovery", replied to archsetup (parked). Commit 4e2db8f pushed. No eligible task (no :next:, no :solo:+:quick:). Both inboxes verified empty. Rescheduled +30 min.
+
+** Loop cycle 4 (2026-06-16 01:32 CDT)
+
+No-op cycle. Both inboxes empty, no eligible task. Craig reordered todo.org in his Emacs buffer (moved the Phase E / wrap-up-routing / KB-contribution blocks down below the memories-sync task) — a pure reorder, byte-identical content, no new cj/approval/tag. That edit plus his earlier wrapup-routing-spec.org approval annotation are uncommitted; left untouched (his in-progress WIP, not the loop's to commit). No commit this cycle. Rescheduled +30 min.
+
+** Loop cycle 5 (2026-06-16 02:04 CDT)
+
+No-op (3rd idle cycle). Inboxes empty, no eligible task, no new cj comments (the wrap-up-routing one is still the only one, still deferred). Craig's todo.org reorder + wrapup-routing-spec.org approval still uncommitted, still left alone. No commit. Rescheduled +30 min.
+
+** Loop cycle 6 (2026-06-16 02:35 CDT)
+
+No-op (4th idle cycle). State unchanged from cycle 5: inboxes empty, no eligible task, one deferred cj (wrap-up routing), Craig's WIP still uncommitted. No commit. Rescheduled +30 min.
+
+** Loop cycle 7 (2026-06-16 03:06 CDT)
+
+No-op (5th idle cycle). State frozen since cycle 4: inboxes empty, no eligible task, wrap-up-routing cj still deferred, Craig's reorder + spec approval still uncommitted. No commit. Rescheduled +30 min.
+
+** Loop cycle 8 (2026-06-16 03:37 CDT)
+No-op (6th idle cycle). State unchanged. No commit. Rescheduled +30 min.
+
+** Loop cycle 9 (2026-06-16 04:08 CDT)
+No-op (7th idle cycle). State unchanged. No commit. Rescheduled +30 min.
+
+** Loop cycle 10 (2026-06-16 04:39 CDT)
+No-op (8th idle cycle). State unchanged. No commit. Rescheduled +30 min.
+
+** Loop cycle 11 (2026-06-16 05:10 CDT)
+No-op for rulesets (9th idle cycle). A new roam inbox item appeared but it's =emacs:=-prefixed (difftastic "d" diff option for the buffer-differs prompt, :next:) — foreign to rulesets, left untouched for .emacs.d's session to claim per the ownership rule. No rulesets-owned roam items, no project inbox, no eligible todo.org task. No commit. Rescheduled +30 min.
+
+** Loop cycle 12 (2026-06-16 05:41 CDT)
+No-op (10th idle cycle). The foreign =emacs:= roam item from cycle 11 is gone (claimed by .emacs.d). Roam + project inboxes empty, no eligible task. No commit. Rescheduled +30 min.
+
+** Loop cycle 13 (2026-06-16 06:12 CDT)
+No-op for rulesets (11th idle cycle). Another =emacs:=-prefixed roam item appeared (studio view-assignment dropdown next/prev buttons, :next:) — foreign, left for .emacs.d. No rulesets-owned items, no project inbox, no eligible task. No commit. Rescheduled +30 min.
+
+** Loop cycle 14 (2026-06-16 06:43 CDT)
+No-op (12th idle cycle). Cycle 13's foreign =emacs:= roam item cleared (.emacs.d claimed it). All inboxes empty, no eligible task. No commit. Rescheduled +30 min.
+
+** Loop cycle 15 (2026-06-16 07:14 CDT)
+No-op (13th idle cycle). All inboxes empty, no eligible task, state unchanged. No commit. Rescheduled +30 min.
+
+** Loop cycle 16 (2026-06-16 07:45 CDT)
+No-op (14th idle cycle). All inboxes empty, no eligible task, state unchanged. No commit. Rescheduled +30 min.
+
+** Loop cycle 17 (2026-06-16 08:16 CDT)
+No-op (15th idle cycle). All inboxes empty, no eligible task, state unchanged. No commit. Rescheduled +30 min.
+
+** Loop cycle 18 (2026-06-16 08:47 CDT)
+No-op for rulesets (16th idle cycle). Two new =emacs:=-prefixed roam items (theme-studio palette-closed default, assignment-view dropdown improvements, both :next:) — foreign, left for .emacs.d. No rulesets-owned items, no project inbox, no eligible task. No commit. Rescheduled +30 min.
+
+** Loop cycle 19 (2026-06-16 09:18 CDT)
+No-op (17th idle cycle). Cycle 18's foreign =emacs:= roam items cleared (.emacs.d claimed them). All inboxes empty, no eligible task. No commit. Rescheduled +30 min.
+
+** Loop cycle 20 (2026-06-16 09:49 CDT)
+No-op (18th idle cycle). All inboxes empty, no eligible task, state unchanged. No commit. Rescheduled +30 min.
+
+** Loop cycle 21 (2026-06-16 10:20 CDT)
+No-op (19th idle cycle). All inboxes empty, no eligible task, state unchanged. No commit. Rescheduled +30 min.
+
+** Loop cycle 22 (2026-06-16 10:51 CDT)
+No-op (20th idle cycle). All inboxes empty, no eligible task, state unchanged. No commit. Rescheduled +30 min.
+
+** Loop cycle 23 (2026-06-16 11:22 CDT)
+No-op (21st idle cycle). All inboxes empty, no eligible task, state unchanged. No commit. Rescheduled +30 min.
+
+** Loop cycle 24 (2026-06-16 11:53 CDT)
+No-op (22nd idle cycle). All inboxes empty, no eligible task, state unchanged. No commit. Rescheduled +30 min.
+
+** Loop cycle 25 (2026-06-16 12:24 CDT)
+No-op (23rd idle cycle). All inboxes empty, no eligible task, state unchanged. No commit. Rescheduled +30 min.
+
+** Loop cycle 26 (2026-06-16 12:55 CDT)
+No-op (24th idle cycle). All inboxes empty, no eligible task. Craig edited .claude/settings.json (removed the pinned =model: opus= line) — his config WIP, left uncommitted with his other edits. No commit. Earlier this turn surfaced candidate work for Craig (shellcheck warnings across 6 scripts incl. 2 latent-bug-suspect ones in bin/ai; the parked dotfiles one-liner; token-rotation helper; spec-storage decision) — awaiting his pick on what to tag/fix. Rescheduled +30 min.
+
+** Loop cycle 27 (2026-06-16 13:27 CDT)
+No-op (25th idle cycle). All inboxes empty, no eligible task, state unchanged (Craig's settings.json + reorder WIP still uncommitted). No commit. Rescheduled +30 min.
+
+** Loop cycle 28 (2026-06-16 13:58 CDT)
+No-op (26th idle cycle). All inboxes empty, no eligible task, state unchanged. No commit. Rescheduled +30 min.
+
+** Loop cycle 29 (2026-06-16 14:29 CDT)
+No-op (27th idle cycle). All inboxes empty, no eligible task, state unchanged. No commit. Rescheduled +30 min.
+
+** Loop cycle 30 (2026-06-16 14:59 CDT)
+No-op (28th idle cycle). All inboxes empty, no eligible task, state unchanged. No commit. Rescheduled +30 min.
+
+** Loop STOPPED + remove cross-agent-comms (2026-06-16 ~15:05 CDT)
+
+Craig stopped the 30-min loop (the queued 15:31 wakeup fired once after; not continued). Then directed removing the unused cross-agent-comms subsystem (option 2: remove now, repoint helper-mode/spec refs to inbox-send). Distinct from inbox-send (the live handoff mechanism, kept). Removing: 7 scripts + 7 READMEs, cross-agent-comms.org workflow, INDEX entry, 5 test files, 3 startup.org wirings (Phase A step 6, Phase B step 4, Phase C bullet) + 2 summary mentions, helper-mode.org escalation ref (repoint to inbox-send / report-to-Craig), 7 legacy ~/.local/bin/cross-agent-* symlinks. Editing canonical (claude-templates/.ai/) then sync-check --fix to mirror. generic-agent-runtime-spec.org gets a removal note (not a full rewrite of its 9 historical refs).
diff --git a/.ai/workflows/INDEX.org b/.ai/workflows/INDEX.org
index 42119b4..a45807e 100644
--- a/.ai/workflows/INDEX.org
+++ b/.ai/workflows/INDEX.org
@@ -107,7 +107,6 @@ This index must list every =.org= file in =.ai/workflows/= except this one and e
- Triggers: "session harvest", "harvest the sessions", "let's run the session-harvest workflow", "monthly harvest", "mine the sessions"
- =no-approvals.org= — drop the interaction-level approval gates for a pre-agreed batch while keeping engineering-discipline gates (=/review-code=, =/voice personal=, tests, session-log updates, subagent reviews, destructive-action consent). Mode stays on until Craig turns it off, a real question arises, the queue empties, or the conversation switches topics.
- Triggers: "no-approvals mode", "no approvals", "no-approval", "no need for approval gates", "stop asking, just keep going", "I'll check back in when you're done or stuck", "do all =<selector>= with no-approval"
-- =cross-agent-comms.org= — protocol for cross-project agent coordination via =inbox/from-agents/= (file-based IPC, GPG-signed, supports cross-machine over Tailscale). Auto: when =cross-agent-watch= detects a new inbound message, or when an agent decides to initiate a cross-project conversation. Operational scripts (=cross-agent-send=, =-recv=, =-watch=, =-status=, =-discover=, =-halt=, =-resume=) and their READMEs live at =.ai/scripts/cross-agent-comms/=.
* Living Document
diff --git a/.ai/workflows/cross-agent-comms.org b/.ai/workflows/cross-agent-comms.org
deleted file mode 100644
index 430b4b0..0000000
--- a/.ai/workflows/cross-agent-comms.org
+++ /dev/null
@@ -1,334 +0,0 @@
-#+TITLE: Cross-Agent Communication Workflow (v5)
-#+AUTHOR: Craig Jennings & Claude (homelab + career sessions)
-#+DATE: 2026-04-27
-#+VERSION: 5
-
-* Status
-
-Draft. Iterating between the homelab and career sessions through a multi-round design discussion. Awaiting Craig's review for promotion to =~/code/rulesets/claude-templates/.ai/workflows/=.
-
-v5 changes from v4:
-- *Script absorption.* Seven operational scripts (=cross-agent-send=, =cross-agent-recv=, =cross-agent-watch=, =cross-agent-status=, =cross-agent-discover=, =cross-agent-halt=, =cross-agent-resume=) now own most implementation detail. Their READMEs are the operational source of truth. The spec stays declarative.
-- *Failsafe halt.* Layered HALT-file mechanism stops all cross-agent activity on a machine within ~5 min, without visiting individual sessions or restarting Claude Code. =cross-agent-halt= and =cross-agent-resume= are the convenience entry points; every other component checks the HALT file independently.
-- *Identity.* Messages are GPG-signed by sender and verified by receiver. Combined with POSIX permissions on =from-agents/= and Tailscale-level network auth, identity becomes a three-layer story.
-- *Atomic writes.* Writers MUST use temp-file + rename. =cross-agent-send= handles this; the spec just states the contract.
-- *Dedup.* Sequence-collision dedup is now binary SHA-256 equality, not a fuzzy ">90% match" threshold.
-- *Cold-start handling.* Layered: =cross-agent-watch= (push notifications via =inotifywait=) is the primary mechanism; startup-workflow check and user-direct-injection are coverage layers.
-- *Spec stays roughly the same length but does more protocol work.* Operational detail (rsync retry numbers, inotifywait recipes, peers.toml schema, GPG flags, dedup mechanics) moved to the script READMEs. The spec adds new protocol elements (identity layer, atomic-writes contract, SHA-256 dedup, =escalate= type, =RELEASE_STATUS= values, =REQUIRES_TOOLS= optional field) in the freed space. Total documentation surface (spec + seven READMEs ≈ 1000 lines) is larger than v4's 259 lines, but the spec and the READMEs serve different audiences — protocol-thinkers and CLI-users — and a reader of just the spec can comprehend the protocol without consulting any README.
-
-* When to use
-
-When two Claude sessions in different projects (same machine or different machines on the same Tailscale tailnet) need to coordinate on a shared task that one session can't complete alone — typically because one has tooling, context, or MCP access the other doesn't.
-
-Examples that fit:
-- Session A asks session B to apply a workflow patch in B's project, then verify it.
-- Session A runs a long task and needs session B to monitor results in B's domain.
-- Two sessions co-design a workflow.
-
-Examples that don't fit:
-- A simple file handoff that doesn't require iteration.
-- A task one session can do alone.
-- Cross-tailnet or cross-organization. The protocol is local-tailnet-scoped.
-
-* Protocol
-
-** File location
-
-Each project has =inbox/from-agents/= as its agent-comms mailbox. Create the directory if it doesn't exist; set permissions =chmod 700= and ownership to the user.
-
-- Sender writes to receiver's =inbox/from-agents/=.
-- Receiver polls (or watches) =inbox/from-agents/=, *not* the parent =inbox/=.
-- The parent =inbox/= stays reserved for human-triage items.
-- Out-of-band artifacts (PDFs, datasets) live at =inbox/from-agents/artifacts/=. Reference by relative path in the message body.
-
-The user does NOT write directly to =from-agents/=. To inject input into a running conversation, the user tells one of the agents in that agent's session; the agent writes the input as a normal message attributed to the user.
-
-** File naming
-
-=YYYYMMDDTHHMMSSZ-from-<sender>-<short-conv-id>.org=
-
-- Timestamp is UTC ISO 8601 compact. The trailing =Z= is mandatory.
-- =from-<sender>= prefix.
-- =<short-conv-id>= is a stable kebab-case slug across the back-and-forth. Reusable across time; ordering relies on filename timestamps.
-
-Frontmatter =#+TIMESTAMP= carries the same instant in local time with explicit offset. The two MUST refer to the same instant.
-
-The implementation (=cross-agent-send=) generates the canonical filename from the message's frontmatter (=CONVERSATION_ID=, current UTC time) and the sender's project context. Senders supply only the message body file; the script handles naming. Senders MUST NOT pre-name files in this format and pass them through; the script overwrites with its own canonical name to ensure consistency and enable the sender-side max-seen sequence-collision-reduction scan.
-
-GPG signatures live in a sibling file =YYYYMMDDTHHMMSSZ-from-<sender>-<short-conv-id>.org.asc=. Receivers verify before processing. See =* Writes are atomic= for the two-file delivery ordering rule.
-
-** Frontmatter
-
-Required:
-
-#+begin_example
-#+TITLE: <human-readable subject>
-#+CONVERSATION_ID: <stable across the thread>
-#+MESSAGE_TYPE: <see types below>
-#+SEQUENCE: <integer hint>
-#+TIMESTAMP: <ISO 8601 with explicit offset>
-#+PROTOCOL_VERSION: 5
-#+end_example
-
-Optional:
-
-#+begin_example
-#+REQUIRES_TOOLS: <comma-separated tool/MCP slugs, e.g. gmail-mcp, slack-mcp>
-#+RELEASE_STATUS: <see release-statuses; valid only on MESSAGE_TYPE: release>
-#+WORKFLOW_VERSION: <sender's version of cross-agent-comms.org; informational only in v5 — no enforcement>
-#+end_example
-
-Receiver sanity-checks frontmatter before acting. Missing or malformed frontmatter → surface to user, don't proceed. Mismatched =PROTOCOL_VERSION= → receiver writes a =query= asking the originator to upgrade.
-
-** Identity
-
-Messages are GPG-signed by the sender. Receivers verify the detached signature before processing the message body.
-
-The implementation (=cross-agent-send=) signs automatically with the sender's configured key (the user's primary GPG key by default; configurable via =--key= flag or environment). Receivers verify automatically against the keys in their GPG keyring.
-
-Identity is a three-layer story:
-
-1. *Tailscale layer.* Only tailnet members can reach the rsync-over-SSH endpoint at all.
-2. *POSIX layer.* =chmod 700= on =from-agents/= means only processes running as the directory's owner can write.
-3. *GPG layer.* Sender's signature on each message proves the message originated from a process holding the key.
-
-Three independent layers. Per-user GPG (using existing keys) gives a correctness check more than a security boundary — unsigned messages are almost certainly bugs, not attackers. That's still load-bearing.
-
-** Writes are atomic
-
-Writers MUST use a temp-file + rename pattern (=mktemp= + =mv= within the same filesystem) so receivers never see partial files. The implementation script (=cross-agent-send=) handles this.
-
-Receivers ignore =.tmp.*= files, processing only the final renamed name.
-
-*Two-file ordering.* When a message has a sibling GPG signature file (=.org.asc=), the writer MUST rename the =.asc= to its final name *before* renaming the =.org=. Two =mv= operations are not atomic together — without this ordering, a receiver could read the =.org= in the window between the two renames and fail GPG verify because the =.asc= hasn't landed yet. The rule: receiver only acts on =.org= files, and a =.org= without a corresponding =.asc= means the signature is genuinely missing (not still in flight).
-
-** Sequence numbering
-
-=#+SEQUENCE= is a *hint*, not a strict counter. Canonical order is =#+TIMESTAMP=. Sequences may collide under rapid back-and-forth (both sides write what they think is sequence N near-simultaneously). Treat collision as a normal protocol event.
-
-*Receiver-side dedup rule.* When a new file shares =CONVERSATION_ID= + =SEQUENCE= with an already-processed message, compare SHA-256 hashes. Identical hashes → silent dedup, treat as a retry. Different hashes → process both, ordered by =#+TIMESTAMP=.
-
-*Sender-side collision-reduction (best-effort).* Before picking sequence, scan the receiver's =from-agents/= for the highest existing sequence in this conversation across both sender prefixes. Use =max(seen) + 1=.
-
-** Message types
-
-- *request* — a side asks for work, input, or a decision. Sequence 1 is always =request=.
-- *progress* — work-in-progress checkpoint. "Here's where I am, no action needed from you, more coming." Originator's poll loop should NOT page the user on progress messages.
-- *query* — either side asks a clarifying question that blocks further work. Originator's poll loop SHOULD surface this immediately. Originator answers and work continues.
-- *pushback* — receiver formally disagrees with the request and has *not* started the work. Carries reasoning. Distinct from =query= because the originator's response path differs.
-- *complete* — receiver signals the requested work is done. Triggers verification.
-- *release* — terminal type. Originator writes after verifying =complete=. Carries =RELEASE_STATUS= to disambiguate the closure mode.
-- *escalate* — punts the conversation to the user for adjudication. Both sides pause polling on =escalate=; the user resolves.
-
-Reply expectation is implied by type: =request=, =query=, =pushback=, =escalate= expect a reply; =progress=, =complete=, =release= don't.
-
-** Conversation lifecycle
-
-A conversation is a directed loop between an originator (issued sequence 1) and a receiver:
-
-1. Originator writes =request= (sequence 1). Begins polling for replies.
-2. *Optional acknowledgment.* Receiver may write a =progress= at sequence 2 to acknowledge receipt and set expectations. Required if work will take >5 minutes (so the originator's poll loop doesn't waste wakes).
-3. *Optional echo-back.* For ambiguous or large requests, receiver writes a =progress= that restates work items and announces "starting now unless you push back within N minutes."
-4. Receiver works. May write =progress= updates. =query= mid-work if blocked. =pushback= if the request is wrong.
-5. Receiver writes =complete=. Begins polling for =release=.
-6. Originator reads, *verifies the deliverable directly*. For subjective deliverables, verification is the originator's editorial accept.
-7. If verified: =release= with =RELEASE_STATUS: complete=. If problems: new =request= (next sequence number).
-8. Receiver sees =release=, stops polling.
-
-The verification step is load-bearing. =complete= is a *claim*; =release= is *verification*.
-
-** Pushback path
-
-On receiving a =pushback=, the originator chooses:
-
-1. *Revise* — new =request= with adjusted scope.
-2. *Insist* — new =request= addressing the pushback's reasoning, standing by direction.
-3. *Withdraw* — =release= with =RELEASE_STATUS: withdrawn-after-pushback=.
-
-*Deadlock cap.* After two pushback-insist exchanges, the next message MUST be =MESSAGE_TYPE: escalate=. Both agents pause polling; the user resolves.
-
-** =RELEASE_STATUS= values
-
-| Status | Meaning |
-|---+---|
-| =complete= | Goal achieved, originator verified |
-| =cancelled= | Originator changed their mind mid-conversation |
-| =withdrawn-after-pushback= | Originator chose option 3 on receiver's =pushback= |
-| =abandoned-after-escalation= | User adjudicated and chose to close the conversation |
-| =abandoned-after-timeout= | Receiver auto-closed after originator never returned to verify |
-
-** Async fallback
-
-If the originator session ends between =request= and =complete=, the receiver's =complete= goes unverified. Receiver behavior:
-
-- Polls for =release= up to ~24 hours of cycles (implementation default).
-- After timeout, writes a final =progress= message ("treating as terminal-without-verification; originator never returned to release") and stops polling. Receiver does NOT write =release= itself — that would contradict the lifecycle rule that =release= is the originator's terminal action.
-- Next time the originator project starts, the unreleased =complete= is surfaced as a startup item. The user can issue a late =release= (with whichever =RELEASE_STATUS= fits) or open a fresh conversation to revisit. =RELEASE_STATUS: abandoned-after-timeout= is used at that point if the user wants to formally close the orphaned thread.
-
-** Escalation
-
-A side writes =escalate= when:
-- Pushback-insist deadlock cap reached.
-- Conversation has stalled (no productive movement in N exchanges).
-- A reply-expecting message has gone unanswered past timeout.
-
-Body summarizes both sides' positions in 60 seconds of reading. Both agents pause polling; the user resolves.
-
-* Implementation notes
-
-This sub-section describes how to operate the protocol. Operational detail lives in the seven scripts' READMEs.
-
-** Recommended scripts
-
-| Script | Replaces user action | README |
-|---+---+---|
-| =cross-agent-send <dest> <msg>= | Filename generation, GPG sign, atomic write, peer lookup, rsync push, retry+backoff, failure surfacing — seven mechanical sender-side steps. Frontmatter and message body are still author-supplied. | =cross-agent-send.md= |
-| =cross-agent-recv <msg>= | Frontmatter sanity-check, =PROTOCOL_VERSION= verify, GPG verify, SHA-256 dedup, =REQUIRES_TOOLS= check — five mechanical receiver-side steps. Output is a structured decision (=process= / =dedup= / =query= / =reject=) the agent acts on. | =cross-agent-recv.md= |
-| =cross-agent-watch= | Manually checking inboxes; "did I get a message?" | =cross-agent-watch.md= |
-| =cross-agent-status= | Walking each project to count pending messages | =cross-agent-status.md= |
-| =cross-agent-discover= | Remembering project topology and reachability | =cross-agent-discover.md= |
-| =cross-agent-halt [reason] [--tailnet]= | Visiting each session to stop polling, restarting Claude Code, or hand-killing processes when comms go runaway. =--tailnet= propagates HALT to all peers. | =cross-agent-halt.md= |
-| =cross-agent-resume [--tailnet]= | Manually clearing the HALT state and restarting the watcher. Per-session polling does NOT auto-resume — the user re-engages each session explicitly. | =cross-agent-resume.md= |
-
-The scripts are tools the user runs from any terminal. They do not depend on agent context — =cross-agent-status= run from a fresh shell works.
-
-A reader can comprehend this protocol from this spec alone. Script READMEs add operational detail that makes the protocol practical to use, but understanding the protocol's semantics requires only this document.
-
-** Polling
-
-Default cadence: 270 seconds (≈4.5 min). Sits just under the 5-minute prompt-cache TTL.
-
-If a side needs to slow down (heads-down work, idle wait), it writes a =progress= message saying so in prose. The other side adapts. There are no named polling modes.
-
-After ~12 empty polls in a row, the poll loop surfaces the silence to the user.
-
-A future runtime with native filesystem-event support could replace polling for active sessions; =cross-agent-watch= already provides event-driven notifications outside active sessions.
-
-** User multi-tasking
-
-- *Deferral.* If the user's last message in the agent's session was less than 60 seconds ago AND a poll fires, queue the inbox check until either the user sends another message OR 5 minutes pass without further input.
-- *Surfacing.* On the next user-facing response: "While we were working on X, a cross-agent message landed from <project>. It's a =<type>= — want me to handle it now or after we finish?"
-- *Mid-question.* Answer the user first.
-- *Project switch.* If the user moves to the receiver project mid-conversation, the receiver agent surfaces the in-flight thread on first user prompt.
-- *Conversation state.* Always include in any response that mentions a cross-agent thread: "<conv-id> at sequence N, awaiting <event>."
-
-** Failure modes
-
-The seven scripts surface most failures with concrete error messages. Spec-level failure modes:
-
-- *Malformed frontmatter on a received file.* Surface to user; do not act.
-- *Mismatched =PROTOCOL_VERSION=.* Receiver writes =query= asking originator to upgrade.
-- *Missing or invalid GPG signature.* Receiver surfaces "unsigned/unverified message"; refuses to act.
-- *Sequence collision* with non-matching SHA-256. Process both, ordered by timestamp.
-- *Required tool unavailable.* Receiver checks =REQUIRES_TOOLS= during frontmatter-sanity-check (before any work begins). On a missing tool, receiver writes =query= asking the originator to reframe the request to avoid the unavailable tool. Originator may revise (new =request=) or withdraw (=release= with =RELEASE_STATUS: cancelled=). =query= is the right type rather than =pushback= because missing-tool is a capability gap, not disagreement.
-- *Runaway resource usage.* User invokes =cross-agent-halt= globally (or =cross-agent-halt --tailnet= for cross-machine). HALT file stops all components within one polling cycle (~5 min). See =* Halt mechanism= for the layered checks.
-- *User halts mid-conversation.* Both sides write a final =progress= note ("HALT fired; pausing"); polling stops within one cadence; conversations resume on explicit per-session re-engage after HALT clears.
-- *HALT file accidentally created* (typo, errant =touch=). =cross-agent-status= prominently flags HALT active; user clears with =cross-agent-resume=. Cost: no messages send during the typo window.
-- *HALT file unreadable* (perms wrong, partial write). Each component fails-closed (treats as halted) and reports "HALT file present but unreadable; treat as halted." Safer than fail-open.
-
-Operational failures (rsync push fails, watcher dies, peer unreachable) live in the script READMEs' failure-mode tables.
-
-* Halt mechanism
-
-A failsafe to stop all cross-agent activity on a machine without visiting individual sessions or restarting Claude Code. Designed for the runaway-polling case: an agent has spun up conversations with N other agents, polling is eating CPU, and the user needs to stop everything *now*.
-
-** The HALT file
-
-Path: =~/.config/cross-agent-comms/HALT=.
-
-Existence triggers halt across all components on the machine. The file's body may carry an optional human-readable reason (reviewed by the user later when deciding to resume).
-
-User commands:
-
-#+begin_example
-$ touch ~/.config/cross-agent-comms/HALT # halt
-$ rm ~/.config/cross-agent-comms/HALT # resume
-#+end_example
-
-Or via convenience scripts (=cross-agent-halt= / =cross-agent-resume=) that also handle the watcher service and cross-machine propagation.
-
-** Layered checks (the failsafe property)
-
-Every component MUST check the HALT file. The "any one component stops the system independently" property is what makes this failsafe — the system doesn't depend on a single point doing the right thing.
-
-| Component | Check timing | Behavior on HALT |
-|---+---+---|
-| =cross-agent-send= | At start of send + between =.asc= and =.org= rsync + between retry iterations | Refuse to start new send; complete current step then exit. Worst case: one in-flight send finishes within a few seconds. |
-| =cross-agent-recv= | Before any verify or dedup | Leave inbound message in place — do NOT dedup, reject, or move. Resume picks it up via cold-start handling. |
-| =cross-agent-watch= | At iteration start | Suppress notifications; log only. Continues running, no-op until HALT clears. |
-| =cross-agent-status= | At start | Print prominent "⚠ HALT ACTIVE" banner before normal output. Read-only, continues. |
-| =cross-agent-discover= | At start | Print HALT banner; continue read-only enumeration. |
-| Agent polling loop | First action on every wake | Write a final =progress= note to any active conversation ("HALT fired; pausing"), do NOT reschedule, surface "halt active" to user. Polling decays within one cadence (~5 min). |
-| Agent user-facing responses | Every response while HALT is set | Append "(HALT active; cross-agent comms paused)" to the response. On HALT clear, the next response says "(HALT cleared; cross-agent comms ready to resume — say so to re-engage polling)." Persistent, not just first-response — keeps awareness alive. |
-| Conversation initiator | Before writing sequence 1 of any new conversation | Refuse and surface to user. |
-| Startup workflow | Phase A on session start | If HALT exists, surface immediately and skip cross-agent inbox checks. |
-
-The agent polling-loop check is the load-bearing one for "stops eating CPU." Wake-ups already scheduled fire, but each wake on-HALT is a no-op + reschedule-prevention. Within one polling cadence (~5 min) all polling stops.
-
-*Fail-closed on unreadable HALT.* If the HALT file exists but is unreadable (wrong permissions, partial write), components MUST treat as halted. Safer than fail-open.
-
-** Resume asymmetry (deliberate)
-
-Halt is automatic everywhere. Resume requires explicit user intent per-session.
-
-When the user removes HALT (or runs =cross-agent-resume=), components stop refusing to act, but agent polling does NOT auto-resume. The user must open each session and tell that agent to resume polling for its conversations.
-
-The asymmetry exists because:
-
-1. Auto-resume could silently invert intentional kills. If the user halted because a session was misbehaving, removing HALT shouldn't quietly revive it.
-2. Per-session resume forces the user to look at each session and confirm the situation is resolved before re-engaging.
-
-** Cross-machine halt
-
-=cross-agent-halt --tailnet= iterates =peers.toml= and SSH-touches HALT on each peer. Same shape for resume.
-
-Reports per-peer status with non-zero exit on partial halt:
-
-#+begin_example
-$ cross-agent-halt --tailnet
-Halting velox.local ✓ (HALT file written)
-Halting bastion.local ✗ (ssh exit 255: no route to host)
-Halting locally ✓ (HALT file written)
-
-PARTIAL HALT: 2/3 machines halted. bastion.local needs manual halt.
-Exit 1.
-#+end_example
-
-Scripting can detect partial halt via the exit code. Same pattern for =--tailnet= on resume.
-
-* Limitations
-
-- *Local-tailnet only.* Filesystem IPC + rsync over SSH. Cross-tailnet or cross-organization is out of scope.
-- *Identity has three layers (Tailscale + POSIX + GPG)* but no message-content encryption. Confidentiality is not the goal; signing is correctness, not secrecy.
-- *Single-receiver per conversation.* Fan-out to multiple receivers requires manually orchestrating multiple parallel conversations.
-- *Polling is best-effort.* A wake may be delayed by an in-flight tool call until the runtime is idle. =cross-agent-watch= mitigates by offering event-driven notifications.
-- *Project-extension drift.* If two projects' =.ai/project-workflows/= modify shared workflow definitions in incompatible ways, cross-agent assumptions can diverge silently. The optional =#+WORKFLOW_VERSION= advisory field is informational only in v5 — no implementation reads or acts on it. A future version may add enforcement on mismatch (e.g. receiver writes =query= asking which side is stale). Today, alignment is verified manually before high-stakes conversations.
-
-* Persistence after release
-
-Conversation files persist by default. The conversation log is the audit trail.
-
-Manual archival is fine if the inbox grows unmanageable. Suggested cadence: once the conversation has been =release='d AND the work it produced has shipped, archive both projects' message files into =.ai/sessions/cross-agent/= as a flat directory — no per-conversation subdirectories. Rename each archived file to lead with the conversation-id so messages from the same conversation cluster on =ls=: =<conv-id>-<TIMESTAMP>-from-<sender>.org= (and the matching =.asc= sibling, if present). Inbox filenames lead with the timestamp because chronological arrival is what matters in =from-agents/=; archives invert that because grouping by conversation is what matters when reading history. Keep the =.asc= signatures alongside the =.org= files in archive — they're small and document the GPG verification chain.
-
-Old messages don't affect protocol behavior (=cross-agent-status='s pending semantics correctly ignore released messages) but the =from-agents/= directory grows indefinitely without manual archival. =cross-agent-status= performance degrades noticeably when a project's =from-agents/= exceeds a few hundred files. =cross-agent-init= (deferred to v6) would include an archival sub-command.
-
-* Open questions
-
-- *=cross-agent-init= and =cross-agent-compose= helper scripts.* =-init= would be one-command project bootstrap (creates =inbox/from-agents/= with =chmod 700=, installs the =cross-agent-watch= systemd path unit, validates peer config, runs a discovery probe). =-compose= would be interactive frontmatter authoring (prompts for required fields, produces a draft message file). Both deferred to v6. Current onboarding requires manual =mkdir= + systemd setup per =cross-agent-watch.md='s install recipe; current message authoring requires writing the file by hand or via a small in-agent template.
-- *Hard conversation timeout.* The async-fallback timeout is implementation-default ~24 hours. Right number depends on use case; tighten as patterns emerge.
-- *=paused= polling state.* Today there's no clean signal for "pause without ending." Add when first user complaint surfaces.
-- *Multi-LLM context.* If we ever bring in a non-Claude agent, the protocol's natural-language framing may need formalization.
-
-* Examples
-
-** =prep-fixup= conversation (2026-04-26 → 2026-04-27)
-
-Eleven exchanges between homelab and career produced the v4 spec by iterative critique-and-simplification. Three real-time sequence collisions during the conversation drove the sequence-as-hint rule that landed in v4 and persists in v5.
-
-Files at =~/projects/{homelab,career}/inbox/from-agents/= named =*-prep-fixup.org=. Worth re-reading when designing future cross-agent flows.
-
-** =comms-cold-start-discovery= conversation (2026-04-27)
-
-The follow-up that produced this v5 spec. Cold-start, watcher tooling, agent discovery, GPG identity, sha256 dedup, atomic writes, POSIX perms, script absorption, and process-vs-text simplification. Tonight's first cold-start in real time (career session went dormant after =prep-fixup= release; Craig's user-injection re-engaged it) is the worked demonstration of the v5 user-injection rule.
-
-Files at =~/projects/{homelab,career}/inbox/from-agents/= named =*-comms-cold-start-discovery.org=.
diff --git a/.ai/workflows/helper-mode.org b/.ai/workflows/helper-mode.org
index 8ead37b..cdec200 100644
--- a/.ai/workflows/helper-mode.org
+++ b/.ai/workflows/helper-mode.org
@@ -65,7 +65,7 @@ The git ban is concurrency-scoped. /Helper wrap-up/ below lifts it for exactly o
** Escalation
-Anything the contract blocks routes through the cross-agent message form (=machine.project.agent-id=), or just gets reported to Craig. The helper leaves its tree changes for the primary's next commit, or describes them in a targeted message.
+Anything the contract blocks gets reported to Craig, or — for a cross-project handoff — routed through =inbox-send= to the owning project's =inbox/=. The helper leaves its tree changes for the primary's next commit, or describes them in a note to Craig.
* Data-Integrity Rules
diff --git a/.ai/workflows/startup.org b/.ai/workflows/startup.org
index 59c9c54..fe7778f 100644
--- a/.ai/workflows/startup.org
+++ b/.ai/workflows/startup.org
@@ -10,8 +10,8 @@ The workflow is structured into four phases. *Phase A.0* is a sequential pre-fli
Quick contract — runs / produces:
- *Phase A.0* (sequential): refresh rulesets, then the project repo.
-- *Phase A* (parallel batch): timestamp, session-context check, guarded =.ai/= sync, recent sessions, inbox-status, cross-agent status, notes.org, staleness, language-bundle freshness.
-- *Phase B* (parallel batch): read the crash-recovery anchor if present, the recent session summaries, new inbox items, pending cross-agent messages.
+- *Phase A* (parallel batch): timestamp, session-context check, guarded =.ai/= sync, recent sessions, inbox-status, notes.org, staleness, language-bundle freshness.
+- *Phase B* (parallel batch): read the crash-recovery anchor if present, the recent session summaries, new inbox items.
- *Phase C* (interactive): surface findings, process the inbox, run project startup-extras, ask priorities.
* Execution
@@ -146,12 +146,11 @@ These calls have no dependencies on each other. Issue them all together in one m
4. =\ls -t .ai/sessions/ 2>/dev/null | head -5= — list 5 most recent session files. The backslash bypasses any =ls= alias in the user's profile. Without it, bare =ls -t= silently returns no output under =exa= (a common =ls= replacement) — which makes a sessions directory full of files look empty, and the agent then skips Phase B step 2.
5. =\ls -la inbox/ 2>/dev/null= — inventory the inbox. Same reason for the backslash escape, applied uniformly across the Phase A =ls= calls.
-6. =cross-agent-status 2>/dev/null || true= — snapshot of pending cross-agent messages across local projects. This is layer A of the cold-start design from =cross-agent-comms.org=: pending messages from other agents (delivered while no session was active here) get surfaced on session start. The =|| true= keeps Phase A from failing if =cross-agent-status= isn't installed yet — older projects without the script still boot cleanly. If HALT is active, =cross-agent-status= prints a banner; surface that prominently in Phase C.
-7. Read =.ai/notes.org= — Project-Specific Context, Active Reminders, Pending Decisions sections (skip About This File).
-8. Read =.ai/project-workflows/startup-extras.org= if it exists.
-9. =[ -f todo.org ] && .ai/scripts/task-review-staleness.sh todo.org 7 || true= — count top-level tasks overdue for review (the daily task-review habit's startup nudge). The =[ -f todo.org ]= guard skips projects without a root todo.org; =|| true= keeps Phase A from failing if the script isn't synced yet. Threshold 7 days is one review cycle of slack — softer than the wrap-up health check's 30-day alarm.
-10. =bash ~/code/rulesets/scripts/sync-language-bundle.sh "$PWD" 2>/dev/null || true= — language-bundle freshness for the current project. Fingerprint-detects which bundle (if any) the project has, auto-fixes drifted rulesets-owned files (=.claude/rules/*.md=, =.claude/hooks/*=, =githooks/*=), and surfaces drift in =settings.json= without writing it (a project may have customized it). =CLAUDE.md= is deliberately left untracked — it's seed-only in =install-lang= and project-owned afterward, mirroring how =diff-lang= skips it. Quiet when there's no bundle or everything's clean. Hardcodes the rulesets path because =languages/= is the canonical source and lives only there — the same absolute-path dependency the rsyncs already carry. =|| true= keeps Phase A from failing on older checkouts where the script isn't present yet. The =.ai/= rsyncs and this call write to disjoint paths (=.ai/= vs =.claude/=/=githooks/=), so the batch stays parallel-safe.
-11. =[ -f "$HOME/org/roam/inbox.org" ] && grep -cE '^\*\* ' "$HOME/org/roam/inbox.org" || true= — count items in the roam global inbox (=~/org/roam/inbox.org=), the inbox-zero startup nudge. Silent if the roam clone isn't on this machine. Phase C reads the file when the count is non-zero, splits total vs items related to this project, and surfaces the offer (see =inbox-zero.org=). Read-only; never files at startup.
+6. Read =.ai/notes.org= — Project-Specific Context, Active Reminders, Pending Decisions sections (skip About This File).
+7. Read =.ai/project-workflows/startup-extras.org= if it exists.
+8. =[ -f todo.org ] && .ai/scripts/task-review-staleness.sh todo.org 7 || true= — count top-level tasks overdue for review (the daily task-review habit's startup nudge). The =[ -f todo.org ]= guard skips projects without a root todo.org; =|| true= keeps Phase A from failing if the script isn't synced yet. Threshold 7 days is one review cycle of slack — softer than the wrap-up health check's 30-day alarm.
+9. =bash ~/code/rulesets/scripts/sync-language-bundle.sh "$PWD" 2>/dev/null || true= — language-bundle freshness for the current project. Fingerprint-detects which bundle (if any) the project has, auto-fixes drifted rulesets-owned files (=.claude/rules/*.md=, =.claude/hooks/*=, =githooks/*=), and surfaces drift in =settings.json= without writing it (a project may have customized it). =CLAUDE.md= is deliberately left untracked — it's seed-only in =install-lang= and project-owned afterward, mirroring how =diff-lang= skips it. Quiet when there's no bundle or everything's clean. Hardcodes the rulesets path because =languages/= is the canonical source and lives only there — the same absolute-path dependency the rsyncs already carry. =|| true= keeps Phase A from failing on older checkouts where the script isn't present yet. The =.ai/= rsyncs and this call write to disjoint paths (=.ai/= vs =.claude/=/=githooks/=), so the batch stays parallel-safe.
+10. =[ -f "$HOME/org/roam/inbox.org" ] && grep -cE '^\*\* ' "$HOME/org/roam/inbox.org" || true= — count items in the roam global inbox (=~/org/roam/inbox.org=), the inbox-zero startup nudge. Silent if the roam clone isn't on this machine. Phase C reads the file when the count is non-zero, splits total vs items related to this project, and surfaces the offer (see =inbox-zero.org=). Read-only; never files at startup.
Notes on the rsync commands:
- Trailing slashes on both source and destination matter — they tell rsync to sync /contents/ rather than nest a directory inside.
@@ -170,7 +169,6 @@ These calls depend on Phase A outputs, but are independent of each other. Issue
1. *Read =.ai/session-context.org= if Phase A reported it exists.* The file is the crash-recovery anchor — if it's there, the previous session was interrupted and the context lives only in this file.
2. *Read each of the 5 most recent session files* from Phase A's =\ls -t .ai/sessions/= output. Read just the =* Summary= section of each — not the full file. The Summary gives Active Goal / Decisions / Data Collected / Findings / Files Modified / Next Steps. That's enough to pick up where things left off. Drill into a specific =* Session Log= later only if you need the /why/ or sequence on something. *If Phase A's listing came back empty, sanity-check with =\ls -la .ai/sessions/= before treating empty as definitive — sessions/ should normally be populated, and an empty result usually means the listing got swallowed somewhere, not that the directory is genuinely empty.*
3. *Read each new inbox file* from Phase A's =\ls -la inbox/= output. For =.eml= files, defer to Phase C — those need the extract script (below) rather than a raw Read.
-4. *Process pending cross-agent messages.* For each project with a pending count >0 in Phase A's =cross-agent-status= output (typically the current project; cross-project pending is surfaced too but only acted on if the user asks), run =cross-agent-recv <message-file>= on the file path =cross-agent-status= named. The script returns a structured decision (=process= / =dedup= / =query= / =reject=) per the protocol. For =process=, read the message body to determine the action. For =query=, prepare a clarifying reply. For =reject=, surface to user with the reason. For =dedup=, no action — silent retry already handled. Surface all decisions in Phase C alongside other findings.
Rationale: Reads are independent and benign. Batching them means the whole session-history view + inbox view lands in one round-trip instead of one per file.
@@ -197,7 +195,6 @@ This phase touches the user and runs sequentially:
#+end_src
If it reports a count, surface one line: wrap-up's Step 4.0 will commit it as =chore: sync .ai tooling from templates=, or offer to commit it now. If silent, say nothing. This is the crashed-session counterpart to the wrap-up commit step (the primary fix). From the 2026-05-31 jr-estate + work handoffs.
- - *Surface pending cross-agent messages.* If =cross-agent-status= reported any pending messages, list them with their =cross-agent-recv= decision (process / query / reject) per file. For =process= messages in this project's inbox, propose handling now or after the current task. For pending in other projects, mention the count so the user knows to switch projects when ready. If HALT was active, surface that prominently — cross-agent activity is paused until =cross-agent-resume= clears it.
2. *Process inbox if non-empty.* Mandatory — don't ask, just delegate to [[file:process-inbox.org][process-inbox.org]]. That workflow owns the value gate (advances an existing TODO / improves the project / serves the mission), the per-source rejection flow (Craig / project handoff / script), the priority-scheme check before filing, and the =.eml= extraction path. Single source of truth for the discipline.
3. *Execute project-specific startup extras* (the contents of =.ai/project-workflows/startup-extras.org= read in Phase A). If the file didn't exist, skip.
4. *Ask about priorities.* "What would you like to work on, or is there something urgent you need?"
diff --git a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-discover b/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-discover
deleted file mode 100755
index 152cf27..0000000
--- a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-discover
+++ /dev/null
@@ -1,230 +0,0 @@
-#!/usr/bin/env python3
-"""Enumerate cross-agent destinations: local projects + tailnet peers.
-
-See cross-agent-discover.md. Local: scan ~/projects/*/.ai/. Peers: read
-peers.toml, SSH-probe each for reachability. --enumerate-remote optionally
-runs `ls -d ~/projects/*/.ai/` over SSH to list remote projects.
-
-Cache results for 5 min at ~/.cache/cross-agent-comms/discovery.json so
-repeated invocations don't re-probe.
-
-HALT: prints a banner; otherwise continues.
-"""
-
-from __future__ import annotations
-
-import argparse
-import datetime as _dt
-import json
-import os
-import subprocess
-import sys
-import time
-import tomllib
-from pathlib import Path
-
-CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms"
-PEERS_TOML = CONFIG_DIR / "peers.toml"
-HALT_FILE = CONFIG_DIR / "HALT"
-CACHE_DIR = Path.home() / ".cache" / "cross-agent-comms"
-CACHE_FILE = CACHE_DIR / "discovery.json"
-CACHE_TTL_SECONDS = 300
-
-EXIT_OK = 0
-EXIT_GENERAL = 1
-EXIT_PEERS_TOML = 1
-
-
-def err(msg: str) -> None:
- print(msg, file=sys.stderr)
-
-
-def render_banner_if_halt() -> None:
- if not HALT_FILE.exists():
- return
- try:
- reason = HALT_FILE.read_text().strip()
- except OSError:
- reason = "(HALT file unreadable; treated as halted)"
- print("⚠ HALT ACTIVE — cross-agent comms paused")
- if reason:
- print(f" reason: {reason}")
- print()
-
-
-def enumerate_local_projects() -> list[str]:
- projects_dir = Path.home() / "projects"
- if not projects_dir.is_dir():
- return []
- found = []
- for child in sorted(projects_dir.iterdir()):
- if child.is_dir() and (child / ".ai").is_dir():
- found.append(child.name)
- return found
-
-
-def load_peers() -> dict:
- if not PEERS_TOML.exists():
- return {"peers": {}}
- try:
- return tomllib.loads(PEERS_TOML.read_text())
- except (tomllib.TOMLDecodeError, OSError) as e:
- err(f"cannot parse peers.toml: {e}")
- sys.exit(EXIT_PEERS_TOML)
-
-
-def probe_peer_reachability(host: str, ssh_user: str | None) -> tuple[bool, str | None]:
- """Run a short SSH probe with BatchMode=yes (no interactive prompt)."""
- target = f"{ssh_user}@{host}" if ssh_user else host
- try:
- result = subprocess.run(
- ["ssh", "-o", "ConnectTimeout=2", "-o", "BatchMode=yes", target, "true"],
- capture_output=True,
- text=True,
- timeout=5,
- )
- except (FileNotFoundError, subprocess.TimeoutExpired):
- return False, "ssh probe failed"
- if result.returncode == 0:
- return True, None
- return False, (result.stderr.strip().splitlines() or [f"exit {result.returncode}"])[-1]
-
-
-def enumerate_remote_projects(host: str, ssh_user: str | None) -> list[str] | None:
- target = f"{ssh_user}@{host}" if ssh_user else host
- try:
- result = subprocess.run(
- [
- "ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes", target,
- "ls -d ~/projects/*/.ai/ 2>/dev/null",
- ],
- capture_output=True,
- text=True,
- timeout=10,
- )
- except (FileNotFoundError, subprocess.TimeoutExpired):
- return None
- if result.returncode != 0:
- return None
- projects = []
- for line in result.stdout.splitlines():
- # Each line looks like /home/<user>/projects/<name>/.ai/
- parts = line.rstrip("/").split("/")
- if len(parts) >= 2 and parts[-1] == ".ai":
- projects.append(parts[-2])
- return projects
-
-
-def read_cache() -> dict | None:
- if not CACHE_FILE.exists():
- return None
- try:
- age = time.time() - CACHE_FILE.stat().st_mtime
- if age > CACHE_TTL_SECONDS:
- return None
- return json.loads(CACHE_FILE.read_text())
- except (OSError, json.JSONDecodeError):
- return None
-
-
-def write_cache(payload: dict) -> None:
- CACHE_DIR.mkdir(parents=True, exist_ok=True)
- CACHE_FILE.write_text(json.dumps(payload, indent=2))
-
-
-def discover(peer_filter: str | None, enumerate_remote: bool) -> dict:
- local = enumerate_local_projects()
- peers_cfg = load_peers().get("peers", {})
-
- peers_out = []
- for name, cfg in sorted(peers_cfg.items()):
- if peer_filter and name != peer_filter:
- continue
- host = cfg.get("host", name)
- ssh_user = cfg.get("ssh_user")
- reachable, error = probe_peer_reachability(host, ssh_user)
- entry = {
- "name": name,
- "host": host,
- "reachable": reachable,
- }
- if not reachable:
- entry["error"] = error
- if enumerate_remote and reachable:
- entry["projects"] = enumerate_remote_projects(host, ssh_user) or []
- peers_out.append(entry)
-
- return {
- "scanned_at": _dt.datetime.now(_dt.timezone.utc).isoformat(),
- "halt_active": HALT_FILE.exists(),
- "local": local,
- "peers": peers_out,
- }
-
-
-def render_table(payload: dict, enumerate_remote: bool) -> None:
- local = payload.get("local", [])
- print(f"Local ({_local_hostname()}):")
- if local:
- wrapped = ", ".join(local)
- print(f" {wrapped} [{len(local)} project{'s' if len(local) != 1 else ''}]")
- else:
- print(" (no projects with .ai/ found)")
- print()
-
- peers = payload.get("peers", [])
- if not peers:
- print("Peers (from peers.toml):")
- print(" (no peers configured)")
- return
-
- print("Peers (from ~/.config/cross-agent-comms/peers.toml):")
- for p in peers:
- marker = "✓ reachable" if p.get("reachable") else f"✗ UNREACHABLE ({p.get('error', 'unknown')})"
- print(f" {p['name']:<16} {p['host']:<24} {marker}")
- if enumerate_remote and p.get("projects"):
- wrapped = ", ".join(p["projects"])
- print(f" projects: {wrapped}")
-
-
-def _local_hostname() -> str:
- import socket
- return socket.gethostname().split(".")[0]
-
-
-def main() -> int:
- parser = argparse.ArgumentParser(description="Discover cross-agent destinations.")
- parser.add_argument("--enumerate-remote", action="store_true",
- help="SSH into each peer and list ~/projects/*/.ai/")
- parser.add_argument("--no-cache", action="store_true", help="Skip cache; force fresh probe")
- parser.add_argument("--peer", help="Limit to a single peer name from peers.toml")
- parser.add_argument("--json", action="store_true", help="Machine-readable output")
- args = parser.parse_args()
-
- render_banner_if_halt()
-
- payload = None
- if not args.no_cache:
- cached = read_cache()
- if cached is not None:
- # Honor --peer filter on cached payload.
- if args.peer:
- cached["peers"] = [p for p in cached.get("peers", []) if p["name"] == args.peer]
- payload = cached
-
- if payload is None:
- payload = discover(args.peer, args.enumerate_remote)
- if not args.no_cache and not args.peer:
- # Only cache full (unfiltered) discoveries.
- write_cache(payload)
-
- if args.json:
- print(json.dumps(payload, indent=2))
- return EXIT_OK
-
- render_table(payload, args.enumerate_remote)
- return EXIT_OK
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-discover.md b/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-discover.md
deleted file mode 100644
index 95134bb..0000000
--- a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-discover.md
+++ /dev/null
@@ -1,155 +0,0 @@
-# cross-agent-discover
-
-**Purpose.** Enumerate available cross-agent destinations — local projects on
-this machine and remote projects on tailnet peers. Validates SSH reachability
-for cross-machine destinations before reporting them as usable.
-
-## Usage
-
-```
-cross-agent-discover [--enumerate-remote] [--no-cache] [--peer <name>]
-```
-
-No args required for the common case (local enumeration + peer reachability).
-
-### Flags
-
-| Flag | Default | Purpose |
-|---|---|---|
-| `--enumerate-remote` | off | SSH into each peer and list projects under `~/projects/*/.ai/`. Off by default because SSH adds latency; turn on when you want to see what's available on a remote machine you haven't fully configured. |
-| `--no-cache` | off | Skip the 5-minute cache; force fresh discovery. |
-| `--peer <name>` | (all) | Limit to a single peer from `peers.toml`. |
-| `--json` | off | Machine-readable output. |
-
-## Output
-
-### Default
-
-```
-$ cross-agent-discover
-Local (ratio):
- career, claude-templates, clipper, danneel, documents, elibrary,
- finances, health, homelab, jr-estate, kit, little-elisper,
- philosophy, website [14 projects]
-
-Peers (from ~/.config/cross-agent-comms/peers.toml):
- velox.local reachable (last seen 2 sec ago)
- bastion.local UNREACHABLE (ssh exit 255: connection refused)
-```
-
-### With `--enumerate-remote`
-
-```
-$ cross-agent-discover --enumerate-remote
-Local (ratio):
- ... (as above)
-
-velox.local (reachable):
- career, homelab [2 projects]
-```
-
-## Configuration
-
-Reads `~/.config/cross-agent-comms/peers.toml`:
-
-```toml
-# Each peer is a remote machine reachable via SSH (typically over Tailscale).
-
-[peers.velox]
-host = "velox.local"
-ssh_user = "cjennings"
-
-[peers.bastion]
-host = "bastion.local"
-ssh_user = "cjennings"
-```
-
-Peers entries describe machines, NOT projects. Projects are enumerated
-on-demand under `~/projects/*/.ai/` either locally or via SSH.
-
-## Cache
-
-Successful discovery results are cached at
-`~/.cache/cross-agent-comms/discovery.json` for 5 minutes. Repeated invocations
-within the window read from cache.
-
-`--no-cache` forces a fresh probe. Useful when adding a new peer or after a
-network change.
-
-## SSH reachability check
-
-For each peer, runs:
-
-```
-ssh -o ConnectTimeout=2 -o BatchMode=yes <user>@<host> true
-```
-
-`BatchMode=yes` prevents interactive password prompts — peers that don't have
-key-based auth set up are reported as UNREACHABLE.
-
-If `--enumerate-remote` is set, on success runs:
-
-```
-ssh <user>@<host> 'ls -d ~/projects/*/.ai/ 2>/dev/null'
-```
-
-## Failure modes
-
-| Symptom | Likely cause | Fix |
-|---|---|---|
-| Peer reported UNREACHABLE | Tailscale not connected, SSH key not authorized, host firewalled | `tailscale status`; `ssh -v <peer>` to debug. |
-| Local list is empty | Glob misresolved, or `~/projects/` doesn't exist | Check `ls -d ~/projects/*/.ai/`. |
-| `--enumerate-remote` slow | Cold cache, slow tailnet, many peers | First run is slow, subsequent runs hit cache. Use `--peer <name>` to scope. |
-| Peer unexpectedly missing from output | Not in `peers.toml`, or `peers.toml` malformed | `cat ~/.config/cross-agent-comms/peers.toml` and validate. |
-
-## HALT awareness
-
-Checks `~/.config/cross-agent-comms/HALT` at start. If HALT exists, prints a
-prominent banner before normal output:
-
-```
-$ cross-agent-discover
-⚠ HALT ACTIVE — cross-agent comms paused
- Reason: <reason from HALT file body, if any>
- Resume with: cross-agent-resume
-
-(enumeration continues normally — HALT does not suppress visibility)
-
-Local (ratio):
- career, claude-templates, ...
-
-Peers:
- velox.local reachable
-```
-
-Discover is read-only. Like `cross-agent-status`, it always runs so the user
-keeps visibility into what destinations exist regardless of halt state. The
-banner makes the halt state impossible to miss.
-
-If the HALT file exists but is unreadable, print a warning banner and
-continue.
-
-See `cross-agent-halt.md` for the full halt mechanism.
-
-## Examples
-
-```bash
-# Common: see what's available
-cross-agent-discover
-
-# Force fresh probe after network change
-cross-agent-discover --no-cache
-
-# What's on velox specifically
-cross-agent-discover --peer velox --enumerate-remote
-
-# Pipe to grep
-cross-agent-discover --json | jq '.peers[] | select(.reachable)'
-```
-
-## See also
-
-- `cross-agent-send` — uses `peers.toml` for routing destinations.
-- `cross-agent-status` — local pending messages.
-- `cross-agent-comms.org` — protocol spec, `* Limitations` section
- explains the cross-machine model.
diff --git a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-halt b/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-halt
deleted file mode 100755
index df25115..0000000
--- a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-halt
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/usr/bin/env python3
-"""Failsafe halt for cross-agent comms.
-
-See cross-agent-halt.md. Touches ~/.config/cross-agent-comms/HALT and stops
-the cross-agent-watch systemd user service. With --tailnet, propagates the
-HALT file to every peer in peers.toml via SSH; reports per-peer status with
-non-zero exit on partial halt.
-
-Does NOT pkill in-flight scripts — they detect HALT on next iteration and
-stop themselves.
-"""
-
-from __future__ import annotations
-
-import argparse
-import subprocess
-import sys
-import tomllib
-from pathlib import Path
-
-CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms"
-HALT_FILE = CONFIG_DIR / "HALT"
-PEERS_TOML = CONFIG_DIR / "peers.toml"
-
-EXIT_OK = 0
-EXIT_PARTIAL = 1
-
-
-def err(msg: str) -> None:
- print(msg, file=sys.stderr)
-
-
-def write_halt_file(reason: str) -> None:
- CONFIG_DIR.mkdir(parents=True, exist_ok=True)
- HALT_FILE.write_text((reason + "\n") if reason else "")
-
-
-def stop_watcher_service() -> None:
- """Best-effort stop of the systemd watcher service. Failures are logged but not fatal."""
- try:
- subprocess.run(
- ["systemctl", "--user", "stop", "cross-agent-watch.path"],
- capture_output=True, text=True, timeout=5,
- )
- except (FileNotFoundError, subprocess.TimeoutExpired):
- # Watcher service may not be installed — fine.
- pass
-
-
-def load_peers() -> dict:
- if not PEERS_TOML.exists():
- return {}
- try:
- return tomllib.loads(PEERS_TOML.read_text())
- except (tomllib.TOMLDecodeError, OSError) as e:
- err(f"cannot parse peers.toml: {e}")
- return {}
-
-
-def ssh_touch_halt(host: str, ssh_user: str | None, reason: str) -> tuple[bool, str]:
- target = f"{ssh_user}@{host}" if ssh_user else host
- # Build the remote command. Quote the reason carefully.
- remote_cmd = (
- f"mkdir -p ~/.config/cross-agent-comms && "
- f"printf %s {_sh_quote(reason)} > ~/.config/cross-agent-comms/HALT"
- )
- try:
- result = subprocess.run(
- ["ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes", target, remote_cmd],
- capture_output=True, text=True, timeout=10,
- )
- except (FileNotFoundError, subprocess.TimeoutExpired):
- return False, "ssh unavailable or timed out"
- if result.returncode == 0:
- return True, "HALT file written"
- return False, (result.stderr.strip().splitlines() or [f"exit {result.returncode}"])[-1]
-
-
-def _sh_quote(s: str) -> str:
- return "'" + s.replace("'", "'\"'\"'") + "'"
-
-
-def main() -> int:
- parser = argparse.ArgumentParser(description="Halt all cross-agent comms on this machine (and optionally tailnet).")
- parser.add_argument("reason", nargs="?", default="", help="Optional human-readable reason")
- parser.add_argument("--tailnet", action="store_true",
- help="Propagate HALT to every peer in peers.toml")
- args = parser.parse_args()
-
- # Local halt.
- write_halt_file(args.reason)
- stop_watcher_service()
- print("Halting locally ✓ (HALT file written)")
-
- if not args.tailnet:
- print()
- print(f"Halt active. Remove {HALT_FILE} or run cross-agent-resume to clear.")
- print("Agent polling will stop within ~5 min (one cadence cycle).")
- return EXIT_OK
-
- peers = load_peers().get("peers", {})
- if not peers:
- print()
- print("No peers configured in peers.toml — local-only halt complete.")
- return EXIT_OK
-
- print()
- successes = 1 # local already counted
- failures = []
- for name, cfg in sorted(peers.items()):
- host = cfg.get("host", name)
- ssh_user = cfg.get("ssh_user")
- ok, detail = ssh_touch_halt(host, ssh_user, args.reason)
- marker = "✓" if ok else "✗"
- print(f"Halting {host:<28} {marker} ({detail})")
- if ok:
- successes += 1
- else:
- failures.append(f"{name} ({host}): {detail}")
-
- print()
- total = len(peers) + 1
- if failures:
- print(f"PARTIAL HALT: {successes}/{total} machines halted.")
- for f in failures:
- print(f" - {f}")
- print("Resolve the failures or manually halt each machine.")
- return EXIT_PARTIAL
- print(f"Halt active across {total} machine(s).")
- return EXIT_OK
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-halt.md b/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-halt.md
deleted file mode 100644
index b817fbc..0000000
--- a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-halt.md
+++ /dev/null
@@ -1,134 +0,0 @@
-# cross-agent-halt
-
-**Purpose.** Failsafe stop for all cross-agent activity on the local machine
-(or, with `--tailnet`, across all configured peers). Creates the HALT file
-that every component in the protocol checks; within one polling cadence
-(~5 min) all polling, sending, watching, and receiving stops.
-
-This is the user's emergency brake. Use when something is misbehaving and
-visiting individual sessions is too slow.
-
-## Usage
-
-```
-cross-agent-halt [reason] [--tailnet] [--no-stop-watcher]
-```
-
-### Positional argument
-
-| Position | Meaning | Example |
-|---|---|---|
-| 1 | Optional human-readable reason for the halt. Written into the HALT file's body. Helps future-you remember why you stopped things. | `"investigating runaway poll loop, 2026-04-27"` |
-
-### Flags
-
-| Flag | Default | Purpose |
-|---|---|---|
-| `--tailnet` | local only | Propagate halt to every peer in `peers.toml` via SSH over Tailscale. |
-| `--no-stop-watcher` | (stops watcher) | Skip stopping the `cross-agent-watch.path` systemd unit. Useful if the watcher is intentionally separate from comms (rare). |
-
-## Behavior
-
-### Local halt (default)
-
-1. Write the HALT file: `~/.config/cross-agent-comms/HALT`. If a `[reason]` was
- passed, write it as the file's body. Otherwise the file is empty (existence
- alone triggers halt).
-2. Stop the watcher service: `systemctl --user stop cross-agent-watch.path`
- (and the corresponding `.service` if running).
-3. Print a summary:
- ```
- ✓ HALT file written: ~/.config/cross-agent-comms/HALT
- ✓ Watcher service stopped (cross-agent-watch.path)
- - In-flight sends will complete their current rsync step (~seconds), then
- stop. New sends are blocked.
- - Active agent polling sessions stop within one cadence (~5 min).
- - Use `cross-agent-resume` to clear HALT.
- Per-session polling does NOT auto-resume — you re-engage each session by
- telling its agent to resume polling.
- ```
-4. Exit 0.
-
-### Cross-tailnet halt (`--tailnet`)
-
-1. Apply local halt steps 1-2 first.
-2. Read `peers.toml` for the list of remote machines.
-3. For each peer, SSH and write the HALT file:
- ```
- ssh <user>@<host> "echo '<reason>' > ~/.config/cross-agent-comms/HALT && \
- systemctl --user stop cross-agent-watch.path"
- ```
-4. Track per-peer success/failure. Print results:
- ```
- Halting velox.local ✓ (HALT file written)
- Halting bastion.local ✗ (ssh exit 255: no route to host)
- Halting locally ✓ (HALT file written)
-
- PARTIAL HALT: 2/3 machines halted. bastion.local needs manual halt.
- ```
-5. Exit 0 if all peers halted; exit 1 if any peer failed (so scripts can
- detect partial halt). The local halt always succeeds — even on `--tailnet`,
- if remote peers fail, local is still halted.
-
-## What "halt active" means for each component
-
-| Component | Behavior under HALT |
-|---|---|
-| `cross-agent-send` | Refuses to send. Exits 5 with "halt active; remove ~/.config/cross-agent-comms/HALT to resume." Checks HALT at start AND between each retry/rsync step, so an in-flight send completes its current step then stops. |
-| `cross-agent-recv` | Refuses to verify or dedup. Exits 5 with same message. Inbound files are **left in place** — not moved, not rejected — so resume picks them up cleanly via cold-start. |
-| `cross-agent-watch` | Continues running but suppresses notifications. Logs each event with `(suppressed by HALT)` so the operator can see what would have fired. |
-| `cross-agent-status` | Prints prominent `⚠ HALT ACTIVE` banner before normal output. Continues to enumerate (read-only). |
-| `cross-agent-discover` | Same banner. Continues (read-only). |
-| Agent polling loops | Check HALT on every wake. If set: write a final `progress` note to any active conversation ("HALT fired locally; pausing"), surface "(HALT active; cross-agent comms paused)" in every user response, and stop rescheduling. Polling decays naturally within one cadence. |
-| Conversation initiator | Refuses to write sequence 1 of any new conversation. Surfaces refusal to user. |
-| Startup workflow (Phase A) | Checks HALT at session boot. If set, surfaces immediately and skips cross-agent inbox checks. |
-
-## Failure modes
-
-| Symptom | Cause | Fix |
-|---|---|---|
-| `~/.config/cross-agent-comms/HALT` already exists | Halt was already active | OK — running halt again refreshes the reason text. Safe. |
-| `systemctl --user stop` fails | Watcher service not installed, or systemd not available | The HALT file is still written — components that check HALT will still stop. The systemctl failure surfaces as a non-fatal warning. |
-| `--tailnet` halts some peers but not others | One or more peers unreachable | Exit 1 with per-peer status. Manually halt the unreachable peers (visit each machine, `touch ~/.config/cross-agent-comms/HALT`), or fix the network and re-run. |
-| Permission denied writing the HALT file | `~/.config/cross-agent-comms/` doesn't exist or is owned by another user | `mkdir -p ~/.config/cross-agent-comms/`; check ownership. |
-
-## What halt does NOT do
-
-- Does not kill running Claude sessions. Polling stops within ~5 min, but the
- session itself stays alive and can be re-engaged after resume.
-- Does not delete pending messages. Inbound files in `inbox/from-agents/`
- remain; they get processed when polling resumes.
-- Does not abort in-flight rsync push mid-byte. Atomic-write semantics
- guarantee in-flight messages either complete cleanly or leave only `.tmp.*`
- files (which receivers ignore).
-
-## Examples
-
-```bash
-# Quick halt with no reason
-cross-agent-halt
-
-# Halt with a memo
-cross-agent-halt "runaway poll loop in homelab session, debugging"
-
-# Halt all tailnet peers + local
-cross-agent-halt --tailnet "shutting down for system update"
-
-# Halt protocol comms but leave the watcher service running
-cross-agent-halt --no-stop-watcher
-```
-
-## Recovery
-
-Always pair with `cross-agent-resume` when the situation is resolved:
-
-```bash
-cross-agent-resume # local
-cross-agent-resume --tailnet # all peers
-```
-
-## See also
-
-- `cross-agent-resume` — counterpart that clears HALT.
-- `cross-agent-status` — see HALT state at a glance.
-- `cross-agent-comms.org` — protocol spec, `* Halt mechanism` section.
diff --git a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-recv b/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-recv
deleted file mode 100755
index b67533a..0000000
--- a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-recv
+++ /dev/null
@@ -1,250 +0,0 @@
-#!/usr/bin/env python3
-"""Cross-agent message receiver.
-
-See cross-agent-recv.md for the full contract. Reads one message file and
-emits a structured decision the agent acts on:
-
- process | dedup | query | reject
-
-Decision exit codes:
- 0 = process 1 = dedup 2 = query 3 = reject
-
-When HALT is set, the script refuses to verify or dedup and leaves the
-inbound file in place — resume picks it up via cold-start.
-"""
-
-from __future__ import annotations
-
-import argparse
-import hashlib
-import json
-import re
-import shutil
-import subprocess
-import sys
-from pathlib import Path
-
-CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms"
-HALT_FILE = CONFIG_DIR / "HALT"
-EXPECTED_PROTOCOL_VERSION = "5"
-
-REQUIRED_FRONTMATTER = ["TITLE", "CONVERSATION_ID", "MESSAGE_TYPE", "SEQUENCE", "TIMESTAMP", "PROTOCOL_VERSION"]
-VALID_MESSAGE_TYPES = {"request", "progress", "query", "pushback", "complete", "release", "escalate"}
-
-DEC_PROCESS = "process"
-DEC_DEDUP = "dedup"
-DEC_QUERY = "query"
-DEC_REJECT = "reject"
-
-EXIT_FOR_DECISION = {
- DEC_PROCESS: 0,
- DEC_DEDUP: 1,
- DEC_QUERY: 2,
- DEC_REJECT: 3,
-}
-
-EXIT_HALT = 5
-
-
-def err(msg: str) -> None:
- print(msg, file=sys.stderr)
-
-
-def check_halt() -> None:
- if HALT_FILE.exists():
- try:
- reason = HALT_FILE.read_text().strip()
- except OSError:
- err("halt active (HALT file present but unreadable; treated as halted)")
- sys.exit(EXIT_HALT)
- msg = "halt active; leaving inbound message in place (resume will pick up)"
- if reason:
- msg = f"{msg}: {reason}"
- err(msg)
- sys.exit(EXIT_HALT)
-
-
-def parse_frontmatter(path: Path) -> dict[str, str]:
- try:
- text = path.read_text()
- except OSError as e:
- return {"_parse_error": f"cannot read: {e}"}
- fm: dict[str, str] = {}
- for line in text.splitlines():
- line = line.rstrip()
- if not line:
- if fm:
- break
- continue
- m = re.match(r"#\+([A-Z_]+):\s*(.*)", line)
- if m:
- fm[m.group(1)] = m.group(2).strip()
- elif fm:
- break
- return fm
-
-
-def emit_decision(
- decision: str,
- reason: str | None,
- fm: dict[str, str],
- sha256: str | None,
- args: argparse.Namespace,
-) -> int:
- payload = {
- "decision": decision,
- "reason": reason,
- "message_type": fm.get("MESSAGE_TYPE"),
- "conversation_id": fm.get("CONVERSATION_ID"),
- "sequence": fm.get("SEQUENCE"),
- "timestamp": fm.get("TIMESTAMP"),
- "sha256": sha256,
- }
- if args.json:
- print(json.dumps(payload, indent=None if args.compact_json else 2))
- else:
- print(f"decision: {decision}")
- if reason:
- print(f"reason: {reason}")
- for k in ("message_type", "conversation_id", "sequence", "timestamp"):
- v = payload[k]
- if v is not None:
- print(f"{k}: {v}")
- if sha256:
- print(f"sha256: {sha256}")
- return EXIT_FOR_DECISION[decision]
-
-
-def gpg_verify(message_path: Path, sig_path: Path) -> tuple[bool, str]:
- try:
- result = subprocess.run(
- ["gpg", "--verify", str(sig_path), str(message_path)],
- capture_output=True,
- text=True,
- )
- except FileNotFoundError:
- return False, "gpg not installed"
- if result.returncode == 0:
- return True, ""
- return False, result.stderr.strip().splitlines()[-1] if result.stderr.strip() else f"exit {result.returncode}"
-
-
-def sha256_of(path: Path) -> str:
- h = hashlib.sha256()
- with path.open("rb") as f:
- for chunk in iter(lambda: f.read(65536), b""):
- h.update(chunk)
- return h.hexdigest()
-
-
-def find_dedup_match(message_path: Path, fm: dict[str, str], my_hash: str) -> tuple[str, str | None]:
- """Scan the message's directory for same-CONVERSATION_ID/SEQUENCE files.
-
- Returns (decision, reason) — decision is DEC_DEDUP for an exact-hash match,
- or DEC_PROCESS when no match or hash differs (sequence collision is OK).
- """
- parent = message_path.parent
- conv_id = fm["CONVERSATION_ID"]
- sequence = fm["SEQUENCE"]
- for sibling in parent.iterdir():
- if sibling == message_path or not sibling.is_file() or sibling.suffix != ".org":
- continue
- sib_fm = parse_frontmatter(sibling)
- if sib_fm.get("CONVERSATION_ID") != conv_id or sib_fm.get("SEQUENCE") != sequence:
- continue
- # Same conv-id + same sequence — check hash.
- if sha256_of(sibling) == my_hash:
- return DEC_DEDUP, f"identical retry of {sibling.name}"
- return DEC_PROCESS, None
-
-
-def check_requires_tools(fm: dict[str, str]) -> tuple[bool, list[str]]:
- """REQUIRES_TOOLS is a comma-separated list of tool names.
-
- For v5, "tool available" is a heuristic: an executable on PATH whose name
- matches the tool slug. MCP availability is currently out of scope (no
- portable way to query it from a CLI).
- """
- tools_field = fm.get("REQUIRES_TOOLS")
- if not tools_field:
- return True, []
- tools = [t.strip() for t in tools_field.split(",") if t.strip()]
- missing = [t for t in tools if shutil.which(t) is None]
- return len(missing) == 0, missing
-
-
-def main() -> int:
- parser = argparse.ArgumentParser(description="Receive and decide on a cross-agent message.")
- parser.add_argument("message_file", type=Path)
- parser.add_argument("--no-verify", action="store_true", help="Skip GPG verification (testing only)")
- parser.add_argument("--no-dedup", action="store_true", help="Skip SHA-256 dedup against existing files")
- parser.add_argument("--protocol-version", default=EXPECTED_PROTOCOL_VERSION,
- help="Override expected protocol version (default: 5)")
- parser.add_argument("--json", action="store_true", help="Emit JSON output")
- parser.add_argument("--compact-json", action="store_true", help="Compact JSON (no indent)")
- args = parser.parse_args()
-
- check_halt()
-
- if not args.message_file.is_file():
- err(f"message file not found: {args.message_file}")
- return EXIT_FOR_DECISION[DEC_REJECT]
-
- fm = parse_frontmatter(args.message_file)
- if "_parse_error" in fm:
- return emit_decision(DEC_REJECT, fm["_parse_error"], {}, None, args)
-
- # Step 1: frontmatter sanity-check.
- missing = [k for k in REQUIRED_FRONTMATTER if k not in fm]
- if missing:
- return emit_decision(
- DEC_REJECT, f"frontmatter missing required fields: {', '.join(missing)}", fm, None, args
- )
- if fm["MESSAGE_TYPE"] not in VALID_MESSAGE_TYPES:
- return emit_decision(
- DEC_REJECT, f"invalid MESSAGE_TYPE: {fm['MESSAGE_TYPE']!r}", fm, None, args
- )
-
- # Step 2: PROTOCOL_VERSION check.
- if fm["PROTOCOL_VERSION"] != args.protocol_version:
- return emit_decision(
- DEC_QUERY,
- f"PROTOCOL_VERSION mismatch: expected {args.protocol_version}, got {fm['PROTOCOL_VERSION']}",
- fm,
- None,
- args,
- )
-
- # Step 3: GPG verify.
- if not args.no_verify:
- sig_path = args.message_file.with_suffix(args.message_file.suffix + ".asc")
- if not sig_path.is_file():
- return emit_decision(DEC_REJECT, f"signature file missing: {sig_path.name}", fm, None, args)
- ok, gpg_err = gpg_verify(args.message_file, sig_path)
- if not ok:
- return emit_decision(DEC_REJECT, f"gpg verify failed: {gpg_err}", fm, None, args)
-
- # Step 4: SHA-256 dedup.
- my_hash = sha256_of(args.message_file)
- if not args.no_dedup:
- decision, reason = find_dedup_match(args.message_file, fm, my_hash)
- if decision == DEC_DEDUP:
- return emit_decision(DEC_DEDUP, reason, fm, my_hash, args)
-
- # Step 5: REQUIRES_TOOLS check.
- ok, missing_tools = check_requires_tools(fm)
- if not ok:
- return emit_decision(
- DEC_QUERY,
- f"required tools unavailable: {', '.join(missing_tools)}",
- fm,
- my_hash,
- args,
- )
-
- # Step 6: process.
- return emit_decision(DEC_PROCESS, None, fm, my_hash, args)
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-recv.md b/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-recv.md
deleted file mode 100644
index 247a27a..0000000
--- a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-recv.md
+++ /dev/null
@@ -1,218 +0,0 @@
-# cross-agent-recv
-
-**Purpose.** The canonical receiver-side processor. Reads a single incoming
-message file and reports a structured decision the agent acts on:
-process / dedup / query / reject.
-
-The script handles only mechanical checks (frontmatter, signature, dedup,
-version, tools). Substance-level decisions like `pushback` ("I disagree with
-this request") happen one layer up — after the agent reads the message body
-the script returns as `process`-able.
-
-This is the read-side counterpart to `cross-agent-send`. Together they are the
-two halves of the per-message contract. The agent's polling loop calls
-`cross-agent-recv` on every new file in `inbox/from-agents/` and dispatches on
-the decision.
-
-Without this script, every receiver implementation re-invents GPG verify +
-frontmatter sanity-check + SHA-256 dedup. With it, behavior is consistent
-across projects.
-
-## Usage
-
-```
-cross-agent-recv <message-file>
-```
-
-Single positional argument: a `.org` file in `inbox/from-agents/`. The matching
-`.asc` signature file must be present alongside it.
-
-### Flags
-
-| Flag | Default | Purpose |
-|---|---|---|
-| `--no-verify` | (verify on) | Skip GPG verification. Testing only. |
-| `--no-dedup` | (dedup on) | Skip SHA-256 dedup against existing files. Testing only. |
-| `--protocol-version <N>` | 5 | Override the expected protocol version. Useful for testing forward-compatibility checks. |
-| `--json` | off | Output decision as JSON for easier parsing by the agent. |
-
-## Behavior
-
-Runs the receiver checks in order. First failure determines the decision.
-
-### Step 1 — Frontmatter sanity-check
-
-Parse the message's org-mode frontmatter. Required fields:
-
-- `#+TITLE`
-- `#+CONVERSATION_ID`
-- `#+MESSAGE_TYPE` (must be one of: `request`, `progress`, `query`, `pushback`,
- `complete`, `release`, `escalate`)
-- `#+SEQUENCE` (integer)
-- `#+TIMESTAMP` (ISO 8601 with explicit offset)
-- `#+PROTOCOL_VERSION` (must match the expected version; default 5)
-
-Any required field missing, malformed, or the protocol version mismatched →
-decision = `reject` (frontmatter) or `query` (version mismatch — see below).
-
-### Step 2 — Protocol-version check
-
-If `PROTOCOL_VERSION` doesn't match the expected:
-
-- Decision = `query`. Action: receiver should write a `query` reply asking the
- sender to upgrade to the expected protocol version.
-
-### Step 3 — Signature verification
-
-Look for `<message-file>.asc` alongside the `.org`. If missing or `gpg
---verify` fails:
-
-- Decision = `reject` (signature). Surface to user; do not act.
-
-The `.asc` file MUST be present when the `.org` is — `cross-agent-send`
-guarantees this with its strict ordering (`.asc` lands first). If the `.asc`
-is missing despite the `.org` being present, the sender violated atomic-write
-ordering or the file was tampered with in transit.
-
-### Step 4 — SHA-256 dedup
-
-Compute SHA-256 of the message file. Scan the same directory for existing
-files matching `CONVERSATION_ID + SEQUENCE`:
-
-- No match → decision = `process` (new message, dispatch by type).
-- Match with **identical** SHA-256 → decision = `dedup` (silent retry; do not
- reprocess).
-- Match with **different** SHA-256 → decision = `process` (sequence collision
- with non-identical content; both are legitimate, ordered by `#+TIMESTAMP`).
-
-### Step 5 — REQUIRES_TOOLS optional check
-
-If the message has a `#+REQUIRES_TOOLS` field, verify each named tool/MCP is
-available in the receiver's environment.
-
-- All available → `process`.
-- One or more missing → decision = `query`. The agent should write a `query`
- reply naming the missing tools, asking the sender to reframe the request to
- avoid them.
-
-### Step 6 — Dispatch decision
-
-If all checks pass, decision = `process` with the parsed `MESSAGE_TYPE` so the
-agent's main loop knows which handler to invoke.
-
-## Output
-
-### Default (human-readable)
-
-```
-$ cross-agent-recv inbox/from-agents/20260427T091015Z-from-homelab-prep-fixup.org
-decision: process
-message_type: request
-conversation_id: prep-fixup
-sequence: 6
-sha256: a1b2c3d4...
-```
-
-### `--json`
-
-```json
-{
- "decision": "process",
- "reason": null,
- "message_type": "request",
- "conversation_id": "prep-fixup",
- "sequence": 6,
- "timestamp": "2026-04-27T04:11:42-05:00",
- "sha256": "a1b2c3d4..."
-}
-```
-
-For decisions other than `process`, `reason` carries a human-readable
-explanation:
-
-```json
-{
- "decision": "query",
- "reason": "PROTOCOL_VERSION mismatch: expected 5, got 4",
- "conversation_id": "prep-fixup",
- "sequence": 6
-}
-```
-
-## Decision exit codes
-
-| Decision | Exit code | Agent action |
-|---|---|---|
-| `process` | 0 | Dispatch to the message-type handler |
-| `dedup` | 1 | Silent — do nothing further |
-| `query` | 2 | Write a `query` reply (see `reason` for what to ask) |
-| `reject` | 3 | Surface to user; do not auto-reply |
-
-The agent reads stdout/JSON to learn the decision; it can also key off exit
-code for simpler bash-style dispatching.
-
-## Failure modes
-
-| Symptom | Cause | Fix |
-|---|---|---|
-| `decision: reject (frontmatter)` | Required field missing or malformed | Open the message; fix or surface to user. The sender should not have produced this file. |
-| `decision: reject (signature)` | `.asc` missing, GPG verify failed, or signer unknown | Check that `.asc` exists alongside `.org`. If yes, run `gpg --verify <msg>.asc <msg>` manually for diagnostic output. |
-| `decision: query (PROTOCOL_VERSION)` | Sender on older/newer protocol | Reply with a `query` asking sender to upgrade. Both sides should align before continuing. |
-| `decision: query (REQUIRES_TOOLS)` | Receiver lacks one of the named tools | Reply with a `query` naming the missing tools; sender should reframe to avoid. |
-| `decision: dedup` | Already-processed identical retry | No action. The script handled it correctly. |
-
-## HALT awareness
-
-Checks `~/.config/cross-agent-comms/HALT` at the start of every invocation. If
-HALT exists, exits with code 5 ("halt active; remove
-~/.config/cross-agent-comms/HALT to resume") without verifying, deduping, or
-returning a decision.
-
-**The inbound file is left in place** — not moved, not rejected, not
-deduped. When HALT clears and polling resumes, the file gets picked up via
-the normal cold-start handling (whichever surfaces first: watcher
-notification, startup workflow check, or the next agent poll). Reversibility
-is preserved.
-
-If the HALT file exists but is unreadable, fail-closed — treat as if HALT is
-set.
-
-See `cross-agent-halt.md` for the full halt mechanism.
-
-## Examples
-
-```bash
-# Basic invocation in an agent's polling loop
-for msg in inbox/from-agents/*.org; do
- decision=$(cross-agent-recv --json "$msg")
- case "$(echo "$decision" | jq -r '.decision')" in
- process) handle_message "$msg" ;;
- dedup) ;; # silent
- query) write_query_reply "$msg" "$decision" ;;
- reject) surface_to_user "$msg" "$decision" ;;
- esac
-done
-
-# Test signature verification only
-cross-agent-recv --no-dedup inbox/from-agents/test-msg.org
-
-# Test against a future protocol version
-cross-agent-recv --protocol-version 6 inbox/from-agents/future-msg.org
-```
-
-## Performance
-
-The script is fast (single SHA-256 compute, single GPG verify, frontmatter
-parse). For typical messages (single-digit KB), runs in well under 100ms.
-Dedup-scan is O(N) over files in the directory; if a project's
-`inbox/from-agents/` accumulates hundreds of files, archive released
-conversations to keep the scan fast.
-
-## See also
-
-- `cross-agent-send` — counterpart writer.
-- `cross-agent-watch` — fires when a new message arrives; agent then calls
- `cross-agent-recv` to process it.
-- `cross-agent-status` — pending-message snapshot (uses similar
- released-vs-unreleased logic, but doesn't process individual messages).
-- `cross-agent-comms.org` — protocol spec, the "what" the script implements.
diff --git a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-resume b/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-resume
deleted file mode 100755
index 1fb83bc..0000000
--- a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-resume
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python3
-"""Resume cross-agent comms after a halt.
-
-See cross-agent-resume.md. Removes ~/.config/cross-agent-comms/HALT and
-restarts the cross-agent-watch systemd user service. With --tailnet,
-propagates the removal to every peer in peers.toml via SSH; reports
-per-peer status with non-zero exit on partial resume.
-
-Per the asymmetry rule: clearing HALT does NOT auto-resume agent polling.
-Each session must explicitly re-engage.
-"""
-
-from __future__ import annotations
-
-import argparse
-import subprocess
-import sys
-import tomllib
-from pathlib import Path
-
-CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms"
-HALT_FILE = CONFIG_DIR / "HALT"
-PEERS_TOML = CONFIG_DIR / "peers.toml"
-
-EXIT_OK = 0
-EXIT_PARTIAL = 1
-
-
-def err(msg: str) -> None:
- print(msg, file=sys.stderr)
-
-
-def remove_halt_file() -> bool:
- """Returns True if HALT was removed, False if it didn't exist."""
- if HALT_FILE.exists():
- try:
- HALT_FILE.unlink()
- return True
- except OSError as e:
- err(f"could not remove HALT: {e}")
- return False
- return False
-
-
-def start_watcher_service() -> None:
- """Best-effort start of the systemd watcher path unit."""
- try:
- subprocess.run(
- ["systemctl", "--user", "start", "cross-agent-watch.path"],
- capture_output=True, text=True, timeout=5,
- )
- except (FileNotFoundError, subprocess.TimeoutExpired):
- pass
-
-
-def load_peers() -> dict:
- if not PEERS_TOML.exists():
- return {}
- try:
- return tomllib.loads(PEERS_TOML.read_text())
- except (tomllib.TOMLDecodeError, OSError) as e:
- err(f"cannot parse peers.toml: {e}")
- return {}
-
-
-def ssh_remove_halt(host: str, ssh_user: str | None) -> tuple[bool, str]:
- target = f"{ssh_user}@{host}" if ssh_user else host
- remote_cmd = "rm -f ~/.config/cross-agent-comms/HALT"
- try:
- result = subprocess.run(
- ["ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes", target, remote_cmd],
- capture_output=True, text=True, timeout=10,
- )
- except (FileNotFoundError, subprocess.TimeoutExpired):
- return False, "ssh unavailable or timed out"
- if result.returncode == 0:
- return True, "HALT cleared"
- return False, (result.stderr.strip().splitlines() or [f"exit {result.returncode}"])[-1]
-
-
-def print_re_engage_instructions() -> None:
- print()
- print("Halt cleared. Watcher restarted.")
- print()
- print("Agent polling does NOT auto-resume — per the failsafe asymmetry rule,")
- print("agents stay paused until you explicitly re-engage each session.")
- print("Open the relevant Claude session and tell the agent to resume polling")
- print("for its conversation.")
-
-
-def main() -> int:
- parser = argparse.ArgumentParser(description="Resume cross-agent comms after a halt.")
- parser.add_argument("--tailnet", action="store_true",
- help="Propagate HALT removal to every peer in peers.toml")
- args = parser.parse_args()
-
- removed = remove_halt_file()
- start_watcher_service()
- if removed:
- print("Resuming locally ✓ (HALT cleared)")
- else:
- print("Resuming locally ✓ (no HALT was active)")
-
- if not args.tailnet:
- print_re_engage_instructions()
- return EXIT_OK
-
- peers = load_peers().get("peers", {})
- if not peers:
- print()
- print("No peers configured in peers.toml — local-only resume complete.")
- print_re_engage_instructions()
- return EXIT_OK
-
- print()
- successes = 1
- failures = []
- for name, cfg in sorted(peers.items()):
- host = cfg.get("host", name)
- ssh_user = cfg.get("ssh_user")
- ok, detail = ssh_remove_halt(host, ssh_user)
- marker = "✓" if ok else "✗"
- print(f"Resuming {host:<27} {marker} ({detail})")
- if ok:
- successes += 1
- else:
- failures.append(f"{name} ({host}): {detail}")
-
- print()
- total = len(peers) + 1
- if failures:
- print(f"PARTIAL RESUME: {successes}/{total} machines cleared.")
- for f in failures:
- print(f" - {f}")
- print("Resolve the failures or manually clear HALT on each machine.")
- print_re_engage_instructions()
- return EXIT_PARTIAL
-
- print(f"Resume complete across {total} machine(s).")
- print_re_engage_instructions()
- return EXIT_OK
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-resume.md b/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-resume.md
deleted file mode 100644
index 8aa8357..0000000
--- a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-resume.md
+++ /dev/null
@@ -1,117 +0,0 @@
-# cross-agent-resume
-
-**Purpose.** Clear the HALT file and restart the watcher service. Counterpart
-to `cross-agent-halt`. Resuming agent polling is **explicit per-session** —
-this script doesn't auto-revive halted polling loops; you tell each session
-to re-engage.
-
-## Usage
-
-```
-cross-agent-resume [--tailnet]
-```
-
-### Flags
-
-| Flag | Default | Purpose |
-|---|---|---|
-| `--tailnet` | local only | Clear HALT on every peer in `peers.toml` via SSH over Tailscale. |
-
-## Behavior
-
-### Local resume (default)
-
-1. Remove the HALT file: `rm -f ~/.config/cross-agent-comms/HALT`. (Use `-f`
- so a missing file isn't an error — running resume when not halted is safe.)
-2. Restart the watcher service: `systemctl --user start cross-agent-watch.path`.
-3. Print a summary:
- ```
- ✓ HALT file removed
- ✓ Watcher service started (cross-agent-watch.path)
- - cross-agent-send and cross-agent-recv will accept new operations.
- - Inbound messages held during halt will be picked up by the watcher.
- - Agent polling does NOT auto-resume. To re-engage polling in a paused
- session, open that Claude session and tell the agent to resume.
- ```
-4. Exit 0.
-
-### Cross-tailnet resume (`--tailnet`)
-
-1. Apply local resume steps 1-2 first.
-2. Read `peers.toml` for the list of remote machines.
-3. For each peer, SSH:
- ```
- ssh <user>@<host> "rm -f ~/.config/cross-agent-comms/HALT && \
- systemctl --user start cross-agent-watch.path"
- ```
-4. Track per-peer success/failure:
- ```
- Resuming velox.local ✓ (HALT cleared, watcher started)
- Resuming bastion.local ✗ (ssh exit 255: no route to host)
- Resuming locally ✓
-
- PARTIAL RESUME: 2/3 machines resumed. bastion.local still halted.
- ```
-5. Exit 0 if all peers resumed; exit 1 on any failure.
-
-## Why agent polling doesn't auto-resume
-
-Two reasons the asymmetry is deliberate:
-
-1. *Auto-resume could silently invert intentional kills.* If you halted
- because a session was misbehaving, removing HALT shouldn't quietly revive
- that session's polling. You re-engage explicitly so you're aware of which
- sessions came back online.
-
-2. *You may want to inspect before resuming.* After a halt, you might want to
- read pending messages, fix configuration, or kill a particular Claude
- session entirely. Per-session resume forces that pause.
-
-## Re-engaging polling in a Claude session
-
-After `cross-agent-resume`, open the relevant Claude session and say something
-like:
-
-```
-HALT is cleared; resume polling.
-```
-
-The agent will check the HALT file (now absent), re-create its polling
-schedule, and continue the in-flight conversation from wherever it left off.
-The conversation file is intact; the receiver will pick up any new messages
-that arrived during the halt window.
-
-## Failure modes
-
-| Symptom | Cause | Fix |
-|---|---|---|
-| HALT file doesn't exist | Already resumed (or never halted) | OK — `-f` makes this a no-op. |
-| `systemctl --user start` fails | Watcher service not installed | Install per `cross-agent-watch.md`'s systemd recipe. |
-| `--tailnet` resumes some peers but not others | Same as halt: peer unreachable | Per-peer status reported; resolve manually for unreachable peers. |
-| Permission denied removing HALT file | File owned by another user | Check ownership; HALT files should be owned by the running user. |
-
-## Examples
-
-```bash
-# Local resume after a halt
-cross-agent-resume
-
-# Resume all tailnet peers + local
-cross-agent-resume --tailnet
-```
-
-## Recovery flow
-
-After a halt:
-
-1. Investigate whatever caused the halt (runaway loop, bad config, etc.).
-2. Fix the underlying issue.
-3. Run `cross-agent-resume`.
-4. Open each Claude session that was polling and tell its agent to re-engage.
-5. Confirm operation with `cross-agent-status`.
-
-## See also
-
-- `cross-agent-halt` — counterpart that creates the HALT file.
-- `cross-agent-status` — verify HALT cleared and see pending messages.
-- `cross-agent-comms.org` — protocol spec, `* Halt mechanism` section.
diff --git a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-send b/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-send
deleted file mode 100755
index 68c010a..0000000
--- a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-send
+++ /dev/null
@@ -1,356 +0,0 @@
-#!/usr/bin/env python3
-"""Cross-agent message sender.
-
-See cross-agent-send.md for the full contract. Briefly:
-
-- Destination as <machine>.<project>; resolved via peers.toml.
-- Same-machine: cp to receiver's inbox/from-agents/ with atomic rename.
-- Cross-machine: rsync over SSH (typically Tailscale) with retry+backoff.
-- GPG-signs by default; .asc renames before .org so receivers never see
- a .org without its sibling signature.
-- Generates the canonical filename; user's input filename is ignored.
-- Honors the HALT file: refuses to send and exits with code 5 when set.
-"""
-
-from __future__ import annotations
-
-import argparse
-import datetime as _dt
-import json
-import os
-import re
-import shutil
-import socket
-import subprocess
-import sys
-import tempfile
-import time
-import tomllib
-from pathlib import Path
-
-CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms"
-PEERS_TOML = CONFIG_DIR / "peers.toml"
-HALT_FILE = CONFIG_DIR / "HALT"
-STATE_DIR = Path.home() / ".local" / "state" / "cross-agent-comms"
-FAILED_SENDS_DIR = STATE_DIR / "failed-sends"
-
-EXIT_OK = 0
-EXIT_GENERAL = 1
-EXIT_DEST_NOT_FOUND = 2
-EXIT_CROSS_MACHINE_FAILED = 3
-EXIT_FRONTMATTER = 4
-EXIT_HALT = 5
-
-REQUIRED_FRONTMATTER = ["CONVERSATION_ID", "MESSAGE_TYPE", "SEQUENCE", "TIMESTAMP", "PROTOCOL_VERSION"]
-VALID_MESSAGE_TYPES = {"request", "progress", "query", "pushback", "complete", "release", "escalate"}
-
-
-def err(msg: str) -> None:
- print(msg, file=sys.stderr)
-
-
-def check_halt() -> None:
- """Exit with code 5 if HALT file exists."""
- if HALT_FILE.exists():
- try:
- reason = HALT_FILE.read_text().strip()
- except OSError:
- # Fail-closed on unreadable HALT.
- err("halt active (HALT file present but unreadable; treated as halted)")
- err(f"remove {HALT_FILE} to resume")
- sys.exit(EXIT_HALT)
- msg = "halt active"
- if reason:
- msg += f": {reason}"
- err(msg)
- err(f"remove {HALT_FILE} to resume")
- sys.exit(EXIT_HALT)
-
-
-def parse_frontmatter(path: Path) -> dict[str, str]:
- """Extract org-mode #+KEY: value frontmatter from the top of the file."""
- try:
- text = path.read_text()
- except OSError as e:
- err(f"cannot read message file: {e}")
- sys.exit(EXIT_GENERAL)
-
- frontmatter: dict[str, str] = {}
- for line in text.splitlines():
- line = line.rstrip()
- if not line:
- # Blank line ends the frontmatter block.
- if frontmatter:
- break
- continue
- m = re.match(r"#\+([A-Z_]+):\s*(.*)", line)
- if m:
- frontmatter[m.group(1)] = m.group(2).strip()
- else:
- # First non-frontmatter line ends parsing.
- if frontmatter:
- break
- return frontmatter
-
-
-def validate_frontmatter(fm: dict[str, str]) -> None:
- missing = [k for k in REQUIRED_FRONTMATTER if k not in fm]
- if missing:
- err(f"frontmatter missing required fields: {', '.join(missing)}")
- sys.exit(EXIT_FRONTMATTER)
- if fm["MESSAGE_TYPE"] not in VALID_MESSAGE_TYPES:
- err(f"invalid MESSAGE_TYPE: {fm['MESSAGE_TYPE']!r}; expected one of {sorted(VALID_MESSAGE_TYPES)}")
- sys.exit(EXIT_FRONTMATTER)
- try:
- int(fm["SEQUENCE"])
- except ValueError:
- err(f"SEQUENCE must be an integer; got {fm['SEQUENCE']!r}")
- sys.exit(EXIT_FRONTMATTER)
-
-
-def load_peers() -> dict:
- if not PEERS_TOML.exists():
- return {}
- try:
- return tomllib.loads(PEERS_TOML.read_text())
- except (tomllib.TOMLDecodeError, OSError) as e:
- err(f"cannot read {PEERS_TOML}: {e}")
- sys.exit(EXIT_GENERAL)
-
-
-def resolve_destination(dest: str, peers: dict) -> tuple[str, str, str | None, str | None]:
- """Resolve <machine>.<project> to (machine, project, host, ssh_user).
-
- host is None for same-machine destinations.
- """
- if "." not in dest:
- err(f"destination must be <machine>.<project>; got {dest!r}")
- sys.exit(EXIT_DEST_NOT_FOUND)
- machine, project = dest.split(".", 1)
-
- local_hostname = socket.gethostname().split(".")[0]
- is_local = machine == local_hostname or machine == "local"
-
- host = None
- ssh_user = None
- if not is_local:
- peer_cfg = peers.get("peers", {}).get(machine)
- if peer_cfg is None:
- available = list(peers.get("peers", {}).keys())
- err(f"destination not found in peers.toml; available peers: {available or '(none)'}")
- sys.exit(EXIT_DEST_NOT_FOUND)
- host = peer_cfg.get("host", machine)
- ssh_user = peer_cfg.get("ssh_user", os.environ.get("USER"))
-
- return machine, project, host, ssh_user
-
-
-def resolve_inbox_path(project: str, peers: dict) -> str:
- """Inbox path on the receiver. Defaults to ~/projects/<project>/inbox/from-agents."""
- proj_cfg = peers.get("projects", {}).get(project)
- if proj_cfg and "inbox_path" in proj_cfg:
- return os.path.expanduser(proj_cfg["inbox_path"])
- return f"~/projects/{project}/inbox/from-agents"
-
-
-def derive_sender_project() -> str:
- """Walk up from CWD looking for ~/projects/<name>/.
-
- Returns the project name if found; falls back to the basename of CWD.
- """
- cwd = Path.cwd().resolve()
- projects_root = (Path.home() / "projects").resolve()
- try:
- rel = cwd.relative_to(projects_root)
- return rel.parts[0]
- except ValueError:
- return cwd.name
-
-
-def generate_canonical_filename(sender: str, conv_id: str) -> str:
- """YYYYMMDDTHHMMSSZ-from-<sender>-<conv-id>.org"""
- now = _dt.datetime.now(_dt.timezone.utc)
- timestamp = now.strftime("%Y%m%dT%H%M%SZ")
- return f"{timestamp}-from-{sender}-{conv_id}.org"
-
-
-def sign(message_path: Path, sig_path: Path, key: str | None) -> None:
- """gpg --detach-sign --armor --output <sig> [--local-user <key>] <message>"""
- cmd = ["gpg", "--detach-sign", "--armor", "--yes", "--output", str(sig_path)]
- if key:
- cmd.extend(["--local-user", key])
- cmd.append(str(message_path))
- try:
- result = subprocess.run(cmd, capture_output=True, text=True)
- except FileNotFoundError:
- err("gpg not found; install gnupg or use --no-sign for testing")
- sys.exit(EXIT_GENERAL)
- if result.returncode != 0:
- err(f"signing failed: {result.stderr.strip()}")
- sys.exit(EXIT_GENERAL)
-
-
-def same_machine_deliver(message_path: Path, sig_path: Path | None, target_dir: Path, canonical_name: str) -> None:
- """Atomic-write delivery: stage .asc, mv to final, then stage .org, mv to final."""
- target_dir.mkdir(parents=True, exist_ok=True)
- final_msg = target_dir / canonical_name
- final_sig = target_dir / f"{canonical_name}.asc"
-
- if sig_path is not None:
- # Stage .asc first, mv to final, THEN stage .org and mv to final.
- with tempfile.NamedTemporaryFile(
- mode="wb", dir=target_dir, prefix=f".tmp.{canonical_name}.asc.", delete=False
- ) as tmp:
- tmp.write(sig_path.read_bytes())
- tmp_sig_path = Path(tmp.name)
- os.replace(tmp_sig_path, final_sig)
-
- # Re-check HALT between .asc and .org per the layered-checks rule.
- check_halt()
-
- with tempfile.NamedTemporaryFile(
- mode="wb", dir=target_dir, prefix=f".tmp.{canonical_name}.", delete=False
- ) as tmp:
- tmp.write(message_path.read_bytes())
- tmp_msg_path = Path(tmp.name)
- os.replace(tmp_msg_path, final_msg)
-
-
-def cross_machine_deliver(
- message_path: Path,
- sig_path: Path | None,
- canonical_name: str,
- host: str,
- ssh_user: str,
- inbox_path: str,
- retries: int,
-) -> bool:
- """rsync push the .asc first (if signed), re-check HALT, then push the .org.
-
- Returns True on success, False on persistent failure (after retries).
- """
- # Stage local copies with the canonical name so rsync sets the right
- # destination filename.
- with tempfile.TemporaryDirectory(prefix="cross-agent-send-") as staging:
- staging_dir = Path(staging)
- local_msg = staging_dir / canonical_name
- local_msg.write_bytes(message_path.read_bytes())
- local_sig = None
- if sig_path is not None:
- local_sig = staging_dir / f"{canonical_name}.asc"
- local_sig.write_bytes(sig_path.read_bytes())
-
- backoffs = [5, 30, 120]
- # Step 1: push .asc first if signed.
- if local_sig is not None:
- if not _rsync_with_retries(local_sig, host, ssh_user, inbox_path, retries, backoffs):
- return False
-
- # Re-check HALT between .asc and .org per the layered-checks rule.
- check_halt()
-
- # Step 2: push .org.
- if not _rsync_with_retries(local_msg, host, ssh_user, inbox_path, retries, backoffs):
- return False
-
- return True
-
-
-def _rsync_with_retries(
- src: Path, host: str, ssh_user: str, inbox_path: str, retries: int, backoffs: list[int]
-) -> bool:
- target = f"{ssh_user}@{host}:{inbox_path}/"
- last_err = ""
- for attempt in range(retries + 1):
- if attempt > 0:
- check_halt()
- wait = backoffs[min(attempt - 1, len(backoffs) - 1)]
- err(f"rsync attempt {attempt} failed: {last_err}; retrying in {wait}s")
- time.sleep(wait)
- try:
- result = subprocess.run(
- ["rsync", "-a", str(src), target],
- capture_output=True,
- text=True,
- )
- except FileNotFoundError:
- err("rsync not found; install rsync")
- return False
- if result.returncode == 0:
- return True
- last_err = result.stderr.strip() or f"exit {result.returncode}"
- err(f"rsync failed after {retries + 1} attempts: {last_err}")
- return False
-
-
-def write_failed_send_marker(dest: str, message_path: Path, error: str, retry_log: list[str]) -> None:
- FAILED_SENDS_DIR.mkdir(parents=True, exist_ok=True)
- timestamp = _dt.datetime.now(_dt.timezone.utc).strftime("%Y%m%dT%H%M%SZ")
- safe_basename = re.sub(r"[^A-Za-z0-9._-]", "_", message_path.name)
- marker = FAILED_SENDS_DIR / f"{timestamp}-{dest.replace('.', '-')}-{safe_basename}.json"
- marker.write_text(json.dumps(
- {
- "timestamp": timestamp,
- "destination": dest,
- "message_path": str(message_path),
- "error": error,
- "retry_log": retry_log,
- },
- indent=2,
- ))
- err(f"marker written: {marker}")
-
-
-def main() -> int:
- parser = argparse.ArgumentParser(description="Send a cross-agent message.")
- parser.add_argument("destination", help="Destination as <machine>.<project>")
- parser.add_argument("message_file", type=Path, help="Path to the message body file")
- parser.add_argument("--no-sign", action="store_true", help="Skip GPG signing (testing only)")
- parser.add_argument("--retries", type=int, default=3, help="Retry count for cross-machine sends")
- parser.add_argument("--key", help="GPG key id to sign with (default: user's primary)")
- args = parser.parse_args()
-
- check_halt()
-
- if not args.message_file.is_file():
- err(f"message file not found: {args.message_file}")
- return EXIT_GENERAL
-
- fm = parse_frontmatter(args.message_file)
- validate_frontmatter(fm)
-
- peers = load_peers()
- machine, project, host, ssh_user = resolve_destination(args.destination, peers)
- inbox_path = resolve_inbox_path(project, peers)
-
- sender = derive_sender_project()
- canonical_name = generate_canonical_filename(sender, fm["CONVERSATION_ID"])
-
- sig_tmp = None
- if not args.no_sign:
- sig_tmp = args.message_file.with_suffix(args.message_file.suffix + ".asc.tmp")
- sign(args.message_file, sig_tmp, args.key)
-
- try:
- if host is None:
- # Same-machine delivery.
- target_dir = Path(os.path.expanduser(inbox_path))
- same_machine_deliver(args.message_file, sig_tmp, target_dir, canonical_name)
- print(f"sent: {target_dir}/{canonical_name}")
- return EXIT_OK
- else:
- ok = cross_machine_deliver(
- args.message_file, sig_tmp, canonical_name, host, ssh_user, inbox_path, args.retries
- )
- if ok:
- print(f"sent: {ssh_user}@{host}:{inbox_path}/{canonical_name}")
- return EXIT_OK
- write_failed_send_marker(args.destination, args.message_file, "rsync failed after retries", [])
- return EXIT_CROSS_MACHINE_FAILED
- finally:
- if sig_tmp is not None and sig_tmp.exists():
- sig_tmp.unlink()
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-send.md b/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-send.md
deleted file mode 100644
index 29bfb24..0000000
--- a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-send.md
+++ /dev/null
@@ -1,199 +0,0 @@
-# cross-agent-send
-
-**Purpose.** Send a cross-agent message file to a specific destination. Handles
-peer-config lookup, GPG signing, atomic write (same-machine) or rsync push
-(cross-machine), retry-with-backoff, and failure surfacing.
-
-This is the canonical writer. The protocol spec defers all writer mechanics to
-this script.
-
-## Usage
-
-```
-cross-agent-send <destination> <message-file> [--no-sign] [--retries N]
-```
-
-### Positional arguments
-
-| Position | Meaning | Example |
-|---|---|---|
-| 1 | Destination as `<machine>.<project>` | `homelab.career`, `velox.career` |
-| 2 | Message file (already-formatted `.org`) | `/tmp/my-message.org` |
-
-### Flags
-
-| Flag | Default | Purpose |
-|---|---|---|
-| `--no-sign` | (signing on) | Skip GPG signing. Use only for testing; receivers reject unsigned messages by default. |
-| `--retries N` | 3 | Override retry count for cross-machine sends. |
-| `--key <key-id>` | (user's primary key) | GPG key to sign with. Resolution order: `--key` flag, `GPG_USER` env, `git config user.signingkey`, then the first secret key in the keyring. |
-
-## Behavior
-
-### Filename generation (script-controlled)
-
-The script generates the canonical destination filename from the message's
-frontmatter and sender context. The user's input filename is ignored — pass any
-path, the script names the destination correctly:
-
-```
-<UTC-now>T<HHMMSS>Z-from-<sender-slug>-<short-conv-id>.org
-```
-
-`<sender-slug>` comes from the sender machine's project name (config or
-hostname-based). `<short-conv-id>` is read from the message's
-`#+CONVERSATION_ID` frontmatter field. UTC timestamp is generated at send time.
-
-The script also performs the **sender-side max-seen scan** before writing: it
-reads the receiver's `from-agents/` directory, finds the highest existing
-sequence in this conversation across both sender prefixes, and (best-effort)
-suggests `max(seen) + 1` for the next sequence. The user/agent is responsible
-for setting `#+SEQUENCE` in the message body; the script only advises.
-
-### Same-machine destinations
-
-Resolved when the destination's machine matches the current hostname (or is
-not in `peers.toml` as a remote). Steps:
-
-1. Parse frontmatter; extract `CONVERSATION_ID` and `TIMESTAMP`. Validate per
- the *Validation before send* section below.
-2. Generate canonical filename per *Filename generation* above.
-3. Sign: `gpg --detach-sign --armor --output <canonical>.asc --local-user <key> <input>`.
-4. Compute target: read `peers.toml` for the project's `inbox_path`. If
- missing, fall back to `~/projects/<project>/inbox/from-agents/`.
-5. **Atomic write with strict ordering** (signature must precede message):
- - Stage `.asc`: write to `<target>/.tmp.XXXXXX-<canonical>.asc`,
- then `mv` to `<target>/<canonical>.asc`.
- - **Then** stage `.org`: write to `<target>/.tmp.XXXXXX-<canonical>`,
- then `mv` to `<target>/<canonical>`.
- - Receivers only act on `.org` files; staging the `.asc` first guarantees
- the signature is present when the receiver opens the message. Out-of-order
- would race: receiver could read the `.org` before the `.asc` lands and
- fail GPG verify even though the sender did everything right.
-6. Exit 0 on success. Exit non-zero if any step fails.
-
-### Cross-machine destinations
-
-Steps:
-
-1. Parse + generate canonical filename, as same-machine steps 1-2.
-2. Sign locally to `<input>.asc` (or a tmp staging file).
-3. rsync push **with the same .asc-first ordering**:
- - `rsync -a <input>.asc <ssh-user>@<host>:<inbox_path>/<canonical>.asc`
- - **Then** `rsync -a <input> <ssh-user>@<host>:<inbox_path>/<canonical>`
- rsync writes to a hidden temp file then renames atomically by default
- (`--inplace` would defeat this; do not pass it).
-4. Retry on failure: 5s, 30s, 120s backoff, then surface error.
-5. On persistent failure: write a marker file to
- `~/.local/state/cross-agent-comms/failed-sends/<timestamp>-<dest>-<canonical>.json`
- containing the destination, message path, error, and retry log. Exit non-zero.
-
-### Validation before send
-
-- Destination resolves via `peers.toml` (or local fallback). If neither, exit
- immediately with `destination not found in peers.toml; available: <list>`.
-- Message file must be readable, non-empty, and have valid org-mode frontmatter
- with **all** of the following required fields:
- - `#+TITLE`
- - `#+CONVERSATION_ID`
- - `#+MESSAGE_TYPE`
- - `#+SEQUENCE`
- - `#+TIMESTAMP`
- - `#+PROTOCOL_VERSION` (must equal `5` for v5)
-
- If any required field is missing or malformed, exit immediately with a parse
- error naming the offending field.
-
-- Optional fields the script recognizes and passes through (no special
- handling beyond preservation):
- - `#+REQUIRES_TOOLS` — comma-separated tool/MCP slugs the receiver needs.
- - `#+RELEASE_STATUS` — valid only on `MESSAGE_TYPE: release`. Values per
- spec: `complete`, `cancelled`, `withdrawn-after-pushback`,
- `abandoned-after-escalation`.
- - `#+WORKFLOW_VERSION` — sender's version of the cross-agent-comms workflow
- file. Currently advisory; receiver may warn on mismatch but does not block.
-
-## Configuration
-
-Reads `~/.config/cross-agent-comms/peers.toml` for peer routing:
-
-```toml
-[peers.velox]
-host = "velox.local"
-ssh_user = "cjennings"
-
-# Optional: per-project inbox-path overrides for non-default layouts.
-[projects.work]
-inbox_path = "~/projects/work/inbox/from-agents"
-
-[projects.homelab]
-inbox_path = "~/projects/homelab/inbox/from-agents"
-```
-
-If a project entry is omitted, defaults to `~/projects/<project>/inbox/from-agents`.
-
-## Failure modes
-
-| Symptom | Cause | Fix |
-|---|---|---|
-| `destination not found in peers.toml` | Misspelled destination, or peer not configured | Run `cross-agent-discover` to see available destinations. |
-| `signing failed: no secret key` | GPG key missing or not in keyring | `gpg --list-secret-keys` to confirm. Override with `--key <id>`. |
-| `signing failed: pinentry timed out` | Headless session, GUI pinentry unavailable | Confirm `pinentry-program` in `gpg-agent.conf` matches available pinentry. Per protocols.org, GUI pinentry works from Claude Code. |
-| `rsync exit 255` | SSH unreachable | `cross-agent-discover --peer <name>` to confirm reachability. |
-| `rsync exit 23` | Permission denied at destination | Check destination directory perms (`chmod 700`) and ownership. |
-| Marker file written to `failed-sends/` | Persistent cross-machine failure | Inspect the marker's `error` field. After fixing, retry: `cross-agent-send <dest> <msg>` (the marker is for visibility; it does not auto-retry). |
-| Receiver complains "unsigned message" | `--no-sign` was used in production | Don't use `--no-sign` outside testing. |
-
-## HALT awareness
-
-Checks `~/.config/cross-agent-comms/HALT` at the start of every send AND
-between the `.asc` and `.org` rsync calls AND between each retry iteration.
-On HALT exists, exits with code 5 ("halt active; remove
-~/.config/cross-agent-comms/HALT to resume") without writing or pushing
-further.
-
-Worst case: one in-flight send completes its current rsync step within a few
-seconds before halt kicks in for the next step. New sends are blocked
-immediately. No `pkill` needed — the per-iteration check stops things
-naturally.
-
-If the HALT file exists but is unreadable (permissions wrong), fail-closed —
-treat as if HALT is set. Safer than fail-open.
-
-See `cross-agent-halt.md` for the full halt mechanism.
-
-## Examples
-
-```bash
-# Same-machine send
-cross-agent-send homelab.career /tmp/my-message.org
-
-# Cross-machine send via Tailscale
-cross-agent-send velox.career /tmp/my-message.org
-
-# Test send without signing (receiver will reject)
-cross-agent-send homelab.career /tmp/test.org --no-sign
-
-# Override retry count for a flaky link
-cross-agent-send velox.career /tmp/my-message.org --retries 10
-
-# After a delivery failure, inspect the marker
-cat ~/.local/state/cross-agent-comms/failed-sends/*.json | jq .
-```
-
-## Exit codes
-
-| Code | Meaning |
-|---|---|
-| 0 | Sent successfully. |
-| 1 | General error (parse failure, signing failure, etc.). |
-| 2 | Destination not found in peers.toml. |
-| 3 | Cross-machine delivery failed after retries. Marker file written. |
-| 4 | Frontmatter validation failed. |
-
-## See also
-
-- `cross-agent-discover` — validate destinations before sending.
-- `cross-agent-watch` — receiver-side notification.
-- `cross-agent-status` — see what's queued.
-- `cross-agent-comms.org` — protocol spec, the "what" the script implements.
diff --git a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-status b/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-status
deleted file mode 100755
index 4eee75b..0000000
--- a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-status
+++ /dev/null
@@ -1,185 +0,0 @@
-#!/usr/bin/env python3
-"""Point-in-time snapshot of pending cross-agent messages across local projects.
-
-See cross-agent-status.md. Pending = messages in inbox/from-agents/ whose
-CONVERSATION_ID has no MESSAGE_TYPE: release at a later #+TIMESTAMP.
-
-HALT: prints a prominent banner before normal output, but continues to enumerate.
-"""
-
-from __future__ import annotations
-
-import argparse
-import glob
-import json
-import os
-import re
-import sys
-from pathlib import Path
-
-CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms"
-HALT_FILE = CONFIG_DIR / "HALT"
-DEFAULT_GLOB = str(Path.home() / "projects" / "*" / "inbox" / "from-agents") + "/"
-
-
-def parse_frontmatter(path: Path) -> dict[str, str]:
- try:
- text = path.read_text()
- except OSError:
- return {}
- fm: dict[str, str] = {}
- for line in text.splitlines():
- line = line.rstrip()
- if not line:
- if fm:
- break
- continue
- m = re.match(r"#\+([A-Z_]+):\s*(.*)", line)
- if m:
- fm[m.group(1)] = m.group(2).strip()
- elif fm:
- break
- return fm
-
-
-def project_name_from_path(path: str) -> str:
- """Walk up from path to find ~/projects/<name>/..."""
- home = str(Path.home())
- parts = Path(path).parts
- for i, part in enumerate(parts):
- if part == "projects" and i + 1 < len(parts) and str(Path(*parts[: i + 1])) == os.path.join(home, "projects"):
- return parts[i + 1]
- # Fallback: dir three levels up from the .org file (project/inbox/from-agents/file.org)
- return Path(path).parent.parent.parent.name
-
-
-def scan_project(inbox_dir: Path) -> tuple[int, str | None, int | None]:
- """Return (pending_count, most_recent_filename_or_None, most_recent_age_seconds_or_None)."""
- if not inbox_dir.is_dir():
- return 0, None, None
-
- # Group .org files by CONVERSATION_ID, also collect release timestamps per conv.
- org_files = sorted(inbox_dir.glob("*.org"))
- if not org_files:
- return 0, None, None
-
- by_conv: dict[str, list[tuple[str, str, Path]]] = {} # conv_id -> [(timestamp, msg_type, path)]
- for f in org_files:
- fm = parse_frontmatter(f)
- conv = fm.get("CONVERSATION_ID")
- ts = fm.get("TIMESTAMP")
- mt = fm.get("MESSAGE_TYPE")
- if not conv or not ts or not mt:
- # Malformed file: count as pending under conv "_unparseable".
- by_conv.setdefault("_unparseable", []).append(("", "request", f))
- continue
- by_conv.setdefault(conv, []).append((ts, mt, f))
-
- pending_files: list[Path] = []
- for conv, entries in by_conv.items():
- entries.sort(key=lambda e: e[0])
- # Find the latest release timestamp.
- release_ts = None
- for ts, mt, _f in entries:
- if mt == "release" and (release_ts is None or ts > release_ts):
- release_ts = ts
- for ts, mt, f in entries:
- if mt == "release":
- continue
- if release_ts is not None and ts <= release_ts:
- continue
- pending_files.append(f)
-
- if not pending_files:
- return 0, None, None
-
- # Most-recent by mtime (proxy for arrival order).
- most_recent = max(pending_files, key=lambda p: p.stat().st_mtime)
- import time
- age = int(time.time() - most_recent.stat().st_mtime)
- return len(pending_files), most_recent.name, age
-
-
-def fmt_age(seconds: int | None) -> str:
- if seconds is None:
- return "—"
- if seconds < 60:
- return f"{seconds}s ago"
- if seconds < 3600:
- return f"{seconds // 60} min ago"
- if seconds < 86400:
- return f"{seconds // 3600} hr ago"
- return f"{seconds // 86400} day(s) ago"
-
-
-def render_banner_if_halt() -> None:
- if not HALT_FILE.exists():
- return
- try:
- reason = HALT_FILE.read_text().strip()
- except OSError:
- reason = "(HALT file unreadable; treated as halted)"
- print("⚠ HALT ACTIVE — cross-agent comms paused")
- if reason:
- print(f" reason: {reason}")
- print(f" clear: rm {HALT_FILE} (or: cross-agent-resume)")
- print()
-
-
-def main() -> int:
- parser = argparse.ArgumentParser(description="Snapshot of pending cross-agent messages across local projects.")
- parser.add_argument("--json", action="store_true", help="Emit JSON output")
- parser.add_argument("--projects-glob", default=DEFAULT_GLOB,
- help=f"Glob for project from-agents dirs (default: {DEFAULT_GLOB})")
- args = parser.parse_args()
-
- render_banner_if_halt()
-
- matched = sorted(glob.glob(args.projects_glob))
- rows = []
- for path in matched:
- inbox = Path(path)
- if not inbox.is_dir():
- continue
- proj = project_name_from_path(path)
- count, most_recent, age = scan_project(inbox)
- rows.append({
- "name": proj,
- "pending_count": count,
- "most_recent": (
- {"filename": most_recent, "age_seconds": age}
- if most_recent else None
- ),
- })
-
- # Sort: pending-first, then alphabetical by name.
- rows.sort(key=lambda r: (-r["pending_count"], r["name"]))
-
- if args.json:
- import datetime as _dt
- payload = {
- "scanned_at": _dt.datetime.now(_dt.timezone.utc).isoformat(),
- "halt_active": HALT_FILE.exists(),
- "projects": rows,
- }
- print(json.dumps(payload, indent=2))
- return 0
-
- if not rows:
- print("No projects with inbox/from-agents/ found — 0 pending.")
- return 0
-
- # Human-readable table.
- name_w = max(len("project"), max(len(r["name"]) for r in rows))
- print(f"{'project':<{name_w}} pending most-recent")
- for r in rows:
- most_recent_str = "—"
- if r["most_recent"]:
- most_recent_str = f"{r['most_recent']['filename']} ({fmt_age(r['most_recent']['age_seconds'])})"
- print(f"{r['name']:<{name_w}} {r['pending_count']:<7} {most_recent_str}")
-
- return 0
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-status.md b/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-status.md
deleted file mode 100644
index 070330c..0000000
--- a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-status.md
+++ /dev/null
@@ -1,139 +0,0 @@
-# cross-agent-status
-
-**Purpose.** Point-in-time snapshot of pending cross-agent messages across
-every project on this machine. Run from any terminal. No daemon required.
-
-This is the user-pull layer of the cold-start story — `cross-agent-watch`
-pushes notifications, `cross-agent-status` lets the user query.
-
-## Usage
-
-```
-cross-agent-status [--json] [--projects-glob <glob>]
-```
-
-No args required.
-
-### Flags
-
-| Flag | Default | Purpose |
-|---|---|---|
-| `--json` | off (table) | Output as JSON for scripting. |
-| `--projects-glob <glob>` | `~/projects/*/inbox/from-agents/` | Override which directories to scan. |
-
-## Output
-
-### Default (table)
-
-```
-$ cross-agent-status
-project pending most-recent
-career 0 —
-claude-templates 0 —
-clipper 0 —
-homelab 1 20260427T085611Z-from-career-question.org (3 min ago)
-finances 0 —
-... (other 9 projects)
-```
-
-Sort: pending-first, then alphabetical.
-
-### `--json`
-
-```json
-{
- "scanned_at": "2026-04-27T04:13:00-05:00",
- "projects": [
- {
- "name": "homelab",
- "pending_count": 1,
- "most_recent": {
- "filename": "20260427T085611Z-from-career-question.org",
- "age_seconds": 180
- }
- },
- ...
- ]
-}
-```
-
-## Pending semantics
-
-A message is "pending" if it sits in `inbox/from-agents/` AND no
-`MESSAGE_TYPE: release` exists for the same `CONVERSATION_ID` after it.
-
-Concretely:
-
-1. Scan each project's `inbox/from-agents/` for `.org` files.
-2. Group by `CONVERSATION_ID` from frontmatter.
-3. For each conversation, find the highest-`#+TIMESTAMP` message with
- `MESSAGE_TYPE: release`.
-4. Messages with `#+TIMESTAMP` after that release (or in conversations with no
- release) count as pending.
-
-Files without parseable frontmatter are counted as pending and noted in the
-output (single warning row per project).
-
-## Failure modes
-
-| Symptom | Likely cause | Fix |
-|---|---|---|
-| Project missing from output | Project's `.ai/` directory exists but `inbox/from-agents/` does not | Created lazily on first cross-agent message; `mkdir -p` to surface in output. |
-| All projects show "0 pending" but you know one has messages | Glob misresolved, OR all messages are post-release | `cross-agent-status --projects-glob` with explicit path to confirm. |
-| Warning row "N files unparseable in <project>" | Message file has invalid frontmatter | Open the file, fix or move out. |
-
-## Performance
-
-Scans every `.org` file in every watched directory. For Craig's setup (14
-projects, single-digit messages each), runs in <100ms. If a project
-accumulates hundreds of post-release messages, archive them per the persistence
-guidance in the protocol spec.
-
-## HALT awareness
-
-Checks `~/.config/cross-agent-comms/HALT` at start. If HALT exists, prints a
-prominent banner before normal output:
-
-```
-$ cross-agent-status
-⚠ HALT ACTIVE — cross-agent comms paused
- Reason: investigating runaway poll loop, 2026-04-27
- HALT file: ~/.config/cross-agent-comms/HALT
- Resume with: cross-agent-resume
-
-(snapshot continues normally — HALT does not suppress visibility)
-
-project pending most-recent
-career 0 —
-homelab 1 20260427T085611Z-from-career-question.org (3 min ago)
-...
-```
-
-Status is read-only, so it always runs. The banner ensures the user can't
-miss that halt is active when checking inbox state. Reason text comes from
-the HALT file's body; if empty, omit the reason line.
-
-If the HALT file exists but is unreadable, print a warning banner ("HALT
-file present but unreadable; treat as halted") and continue with normal
-output.
-
-See `cross-agent-halt.md` for the full halt mechanism.
-
-## Examples
-
-```bash
-# Snapshot
-cross-agent-status
-
-# JSON for piping
-cross-agent-status --json | jq '.projects[] | select(.pending_count > 0)'
-
-# Single-project query
-cross-agent-status --projects-glob ~/projects/work/inbox/from-agents/
-```
-
-## See also
-
-- `cross-agent-watch` — push notifications on new arrivals.
-- `cross-agent-discover` — enumerate available agents (cross-machine).
-- `cross-agent-comms.org` — protocol spec.
diff --git a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-watch b/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-watch
deleted file mode 100755
index f50ba26..0000000
--- a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-watch
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/usr/bin/env bash
-# cross-agent-watch — desktop-notify on new cross-agent messages.
-#
-# See cross-agent-watch.md. Watches every ~/projects/*/inbox/from-agents/ by
-# default. inotifywait fires create + moved_to events; .tmp.* files are
-# filtered out. HALT suppresses notifications but the watcher keeps running
-# and logs each event with "(suppressed by HALT)".
-
-set -uo pipefail
-
-# Defaults.
-PROJECTS_GLOB="${HOME}/projects/*/inbox/from-agents/"
-LOG_FILE="${HOME}/.local/state/cross-agent-comms/watch.log"
-HALT_FILE="${HOME}/.config/cross-agent-comms/HALT"
-QUIET=0
-NO_NOTIFY=0
-
-# Arg parsing.
-while [[ $# -gt 0 ]]; do
- case "$1" in
- --projects-glob)
- PROJECTS_GLOB="$2"; shift 2 ;;
- --log)
- LOG_FILE="$2"; shift 2 ;;
- --quiet)
- QUIET=1; shift ;;
- --no-notify)
- NO_NOTIFY=1; shift ;;
- -h|--help)
- cat <<EOF
-Usage: cross-agent-watch [--projects-glob GLOB] [--log PATH] [--quiet] [--no-notify]
-
-Watches inbox/from-agents/ directories for new cross-agent messages and fires
-desktop notifications. See cross-agent-watch.md for details.
-EOF
- exit 0 ;;
- *)
- echo "unknown flag: $1" >&2; exit 1 ;;
- esac
-done
-
-# Resolve glob to a concrete list of directories.
-# shellcheck disable=SC2086
-DIRS=( $PROJECTS_GLOB )
-# Filter out non-existent paths (glob may include literal pattern when no match).
-EXISTING=()
-for d in "${DIRS[@]}"; do
- if [[ -d "$d" ]]; then
- EXISTING+=( "$d" )
- fi
-done
-
-if [[ ${#EXISTING[@]} -eq 0 ]]; then
- echo "cross-agent-watch: glob resolved 0 directories: $PROJECTS_GLOB" >&2
- exit 1
-fi
-
-# Ensure log dir exists.
-mkdir -p "$(dirname "$LOG_FILE")"
-
-[[ $QUIET -eq 0 ]] && echo "cross-agent-watch: watching ${#EXISTING[@]} dir(s); log: $LOG_FILE"
-
-# Helper: project name from path like /home/.../projects/<name>/inbox/from-agents/...
-project_name() {
- local path="$1"
- # Match ~/projects/<name>/...
- if [[ "$path" =~ ${HOME}/projects/([^/]+)/ ]]; then
- echo "${BASH_REMATCH[1]}"
- else
- basename "$(dirname "$(dirname "$path")")"
- fi
-}
-
-# Main loop. inotifywait emits one line per event in the format
-# "<full-path>" because we passed --format '%w%f'.
-inotifywait -m -e create,moved_to --format '%w%f' "${EXISTING[@]}" 2>/dev/null \
- | while IFS= read -r path; do
- filename="$(basename "$path")"
-
- # Filter .tmp.* staging files.
- case "$filename" in
- .tmp.*) continue ;;
- esac
-
- # Filter .asc sidecars — they land first per the atomic-write ordering;
- # the .org event will fire after.
- case "$filename" in
- *.asc) continue ;;
- esac
-
- proj="$(project_name "$path")"
- iso="$(date -u "+%Y-%m-%dT%H:%M:%SZ")"
-
- if [[ -e "$HALT_FILE" ]]; then
- printf '%s\t%s\t%s\t(suppressed by HALT)\n' "$iso" "$proj" "$filename" >> "$LOG_FILE"
- [[ $QUIET -eq 0 ]] && echo "[$iso] $proj: $filename (suppressed by HALT)"
- continue
- fi
-
- printf '%s\t%s\t%s\n' "$iso" "$proj" "$filename" >> "$LOG_FILE"
- [[ $QUIET -eq 0 ]] && echo "[$iso] $proj: $filename"
-
- if [[ $NO_NOTIFY -eq 0 ]]; then
- notify info "Cross-agent message" "${proj}: ${filename}" --persist 2>/dev/null || true
- fi
- done
diff --git a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-watch.md b/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-watch.md
deleted file mode 100644
index 04e8005..0000000
--- a/claude-templates/.ai/scripts/cross-agent-comms/cross-agent-watch.md
+++ /dev/null
@@ -1,130 +0,0 @@
-# cross-agent-watch
-
-**Purpose.** Long-running watcher that fires desktop notifications when new
-cross-agent messages land in any project's `inbox/from-agents/` directory.
-This is the primary cold-start mechanism: messages get noticed even when no
-Claude session is active.
-
-## Usage
-
-```
-cross-agent-watch [--projects-glob <glob>] [--log <path>]
-```
-
-No args required. Defaults:
-
-- Watches `~/projects/*/inbox/from-agents/` (matches every project with the
- cross-agent-comms convention).
-- Logs each event to `~/.local/state/cross-agent-comms/watch.log`.
-
-### Flags
-
-| Flag | Default | Purpose |
-|---|---|---|
-| `--projects-glob <glob>` | `~/projects/*/inbox/from-agents/` | Override which directories to watch. Useful for testing on a single project. |
-| `--log <path>` | `~/.local/state/cross-agent-comms/watch.log` | Override log location. Set to `/dev/null` to disable logging. |
-| `--quiet` | off | Suppress stdout output. Notifications still fire. |
-| `--no-notify` | off | Skip `notify` calls. Useful for testing the watcher loop without spamming notifications. |
-
-## Behavior
-
-1. Resolves the projects-glob to a concrete list of directories at startup.
- New projects added to `~/projects/` after startup are NOT picked up — restart
- the watcher to re-resolve.
-2. Runs `inotifywait -m -e create,moved_to --format '%w%f'` against each
- watched directory.
-3. For each event, calls
- `notify info "Cross-agent message" "<project>: <filename>" --persist`. The
- `--persist` flag keeps the page on screen until dismissed, so an inbound
- message that arrives while Craig is away from the desk isn't missed.
-4. Appends an event line to the log:
- `<ISO-8601-timestamp>\t<project>\t<filename>`.
-
-## Event filtering
-
-- Watches `create` AND `moved_to` events. The `moved_to` part is critical for
- the atomic-write convention (`mktemp` + `mv` produces a `moved_to`, not a
- `create`).
-- Files starting with `.tmp.` are ignored — they're staging files from
- in-progress writes that should never produce a notification.
-
-## Installation
-
-### Option A — tmux pane (personal, easy)
-
-Run in a tmux pane that survives session disconnects:
-
-```
-tmux new -d -s cross-agent-watch 'cross-agent-watch'
-```
-
-### Option B — systemd user service (production)
-
-Provided files:
-
-- `~/.config/systemd/user/cross-agent-watch.service`
-- `~/.config/systemd/user/cross-agent-watch.path`
-
-Enable with:
-
-```
-systemctl --user enable --now cross-agent-watch.path
-```
-
-The path unit triggers the service unit on filesystem changes; the service
-unit re-execs `cross-agent-watch` if it dies. Survives reboot.
-
-## Failure modes
-
-| Symptom | Likely cause | Fix |
-|---|---|---|
-| No notifications fire on new files | inotifywait not running, or glob resolved to zero dirs | Check `cross-agent-watch --projects-glob ... --quiet` exits non-zero immediately. Log shows `"resolved 0 directories"`. |
-| Notifications fire on `.tmp.` files | Filter regression | Verify `inotifywait` events show the `.tmp.` files; if so check this script's filter logic. |
-| Some files missed under rapid bursts | inotify queue overflow | Increase `fs.inotify.max_queued_events` sysctl. Default 16384 is usually fine. |
-| Permission denied on a watched dir | Directory perms wrong | `chmod 700 <dir>` and confirm owner. |
-
-## HALT awareness
-
-Checks `~/.config/cross-agent-comms/HALT` on each iteration (each inotifywait
-event fired). If HALT exists, the watcher continues running but **suppresses
-the `notify` call**. The event is still logged, with `(suppressed by HALT)`
-appended:
-
-```
-2026-04-27T04:42:00-05:00 career 20260427T094200Z-from-homelab-test.org (suppressed by HALT)
-```
-
-Logged-but-suppressed events are useful for the operator to see what would
-have fired during the halt window — helpful for diagnosing whatever caused
-the halt.
-
-When HALT clears, suppression stops; subsequent events fire normally. Backlog
-events that arrived during halt are NOT replayed — they get picked up via
-cold-start handling (status CLI, agent startup check, or the next agent
-poll once polling resumes).
-
-If the HALT file exists but is unreadable, fail-closed (suppress) — safer
-than fail-open.
-
-See `cross-agent-halt.md` for the full halt mechanism.
-
-## Examples
-
-```bash
-# Watch all projects, log everything, fire notifications
-cross-agent-watch
-
-# Test against a single project, no notifications, verbose
-cross-agent-watch \
- --projects-glob "$HOME/projects/work/inbox/from-agents/" \
- --no-notify
-
-# Production-style: quiet stdout, log only
-cross-agent-watch --quiet
-```
-
-## See also
-
-- `cross-agent-status` — point-in-time snapshot of pending messages.
-- `cross-agent-send` — counterpart writer.
-- `cross-agent-comms.org` — protocol spec.
diff --git a/claude-templates/.ai/scripts/tests/test_cross_agent_discover.py b/claude-templates/.ai/scripts/tests/test_cross_agent_discover.py
deleted file mode 100644
index f0d2bb7..0000000
--- a/claude-templates/.ai/scripts/tests/test_cross_agent_discover.py
+++ /dev/null
@@ -1,204 +0,0 @@
-"""Tests for cross-agent-discover (TDD: tests written before implementation)."""
-
-from __future__ import annotations
-
-import json
-import os
-import subprocess
-import textwrap
-from pathlib import Path
-
-import pytest
-
-SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-discover"
-
-
-def _run(args: list[str], env: dict | None = None) -> subprocess.CompletedProcess:
- return subprocess.run([str(SCRIPT), *args], capture_output=True, text=True, env=env)
-
-
-@pytest.fixture
-def fake_home(tmp_path, monkeypatch):
- home = tmp_path / "home"
- home.mkdir()
- monkeypatch.setenv("HOME", str(home))
- return home
-
-
-def _make_project(home: Path, name: str) -> Path:
- proj = home / "projects" / name
- (proj / ".ai").mkdir(parents=True)
- return proj
-
-
-def _write_peers_toml(home: Path, content: str) -> Path:
- cfg = home / ".config" / "cross-agent-comms"
- cfg.mkdir(parents=True, exist_ok=True)
- peers = cfg / "peers.toml"
- peers.write_text(content)
- return peers
-
-
-def test_discover_help(fake_home):
- result = _run(["--help"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- assert "discover" in result.stdout.lower() or "enumerate" in result.stdout.lower()
-
-
-def test_discover_local_only_no_projects(fake_home):
- """Empty home → reports zero local projects, zero peers."""
- result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- # No crash; mentions local somehow.
- assert "local" in result.stdout.lower() or "0 project" in result.stdout.lower()
-
-
-def test_discover_lists_local_projects(fake_home):
- _make_project(fake_home, "homelab")
- _make_project(fake_home, "career")
- _make_project(fake_home, "claude-templates")
- result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- assert "homelab" in result.stdout
- assert "career" in result.stdout
- assert "claude-templates" in result.stdout
-
-
-def test_discover_excludes_dirs_without_ai_subdir(fake_home):
- """Directories under ~/projects/ that lack .ai/ are NOT projects."""
- _make_project(fake_home, "real-project")
- (fake_home / "projects" / "not-a-project").mkdir(parents=True)
- result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- assert "real-project" in result.stdout
- assert "not-a-project" not in result.stdout
-
-
-def test_discover_no_peers_toml_just_local(fake_home):
- _make_project(fake_home, "homelab")
- result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- # No peers section since no toml.
- assert "homelab" in result.stdout
-
-
-def test_discover_lists_peers_from_toml(fake_home):
- _write_peers_toml(fake_home, textwrap.dedent("""\
- [peers.velox]
- host = "velox"
- ssh_user = "cjennings"
-
- [peers.bastion]
- host = "bastion.local"
- ssh_user = "cjennings"
- """))
- _make_project(fake_home, "homelab")
- result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- assert "velox" in result.stdout
- assert "bastion" in result.stdout
-
-
-def test_discover_malformed_peers_toml_errors_clearly(fake_home):
- _write_peers_toml(fake_home, "not valid toml at all = = =")
- result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode != 0
- assert "peers.toml" in result.stderr or "TOML" in result.stderr or "parse" in result.stderr.lower()
-
-
-def test_discover_json_output_schema(fake_home):
- _make_project(fake_home, "homelab")
- _make_project(fake_home, "career")
- _write_peers_toml(fake_home, textwrap.dedent("""\
- [peers.velox]
- host = "velox"
- """))
- result = _run(["--json", "--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- payload = json.loads(result.stdout)
- assert "local" in payload
- assert "peers" in payload
- assert isinstance(payload["local"], list)
- assert isinstance(payload["peers"], list)
- assert "homelab" in payload["local"]
- assert "career" in payload["local"]
- velox = next((p for p in payload["peers"] if p["name"] == "velox"), None)
- assert velox is not None
- # Reachability is a key — value depends on actual SSH state.
- assert "reachable" in velox
-
-
-def test_discover_peer_scope(fake_home):
- _write_peers_toml(fake_home, textwrap.dedent("""\
- [peers.velox]
- host = "velox"
-
- [peers.bastion]
- host = "bastion.local"
- """))
- result = _run(["--peer", "velox", "--no-cache", "--json"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- payload = json.loads(result.stdout)
- peer_names = [p["name"] for p in payload["peers"]]
- assert "velox" in peer_names
- assert "bastion" not in peer_names
-
-
-def test_discover_unreachable_peer_marked(fake_home):
- """A peer with a definitely-unreachable host gets reachable=False."""
- _write_peers_toml(fake_home, textwrap.dedent("""\
- [peers.bogus]
- host = "definitely-not-a-real-host.invalid"
- ssh_user = "nobody"
- """))
- result = _run(["--no-cache", "--json"], env={**os.environ, "HOME": str(fake_home)}, )
- assert result.returncode == 0
- payload = json.loads(result.stdout)
- bogus = next((p for p in payload["peers"] if p["name"] == "bogus"), None)
- assert bogus is not None
- assert bogus["reachable"] is False
-
-
-def test_discover_cache_hit_within_window(fake_home):
- """Second invocation within 5 min reads cache (skip the SSH probe)."""
- _make_project(fake_home, "homelab")
- # First call populates cache.
- result1 = _run(["--json"], env={**os.environ, "HOME": str(fake_home)})
- assert result1.returncode == 0
- cache = fake_home / ".cache" / "cross-agent-comms" / "discovery.json"
- assert cache.exists()
- # Tamper with the cache to a marker only the cache path can produce.
- payload = json.loads(cache.read_text())
- payload["_test_marker"] = True
- cache.write_text(json.dumps(payload))
- # Second call (no --no-cache) should return the tampered payload.
- result2 = _run(["--json"], env={**os.environ, "HOME": str(fake_home)})
- assert result2.returncode == 0
- payload2 = json.loads(result2.stdout)
- assert payload2.get("_test_marker") is True
-
-
-def test_discover_no_cache_flag_bypasses(fake_home):
- """--no-cache ignores even a fresh cache."""
- _make_project(fake_home, "homelab")
- cache_dir = fake_home / ".cache" / "cross-agent-comms"
- cache_dir.mkdir(parents=True)
- cache_dir.joinpath("discovery.json").write_text(json.dumps({
- "_test_marker": True, "local": [], "peers": []
- }))
- result = _run(["--no-cache", "--json"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0
- payload = json.loads(result.stdout)
- # Cache marker should NOT appear in fresh result.
- assert payload.get("_test_marker") is None or payload.get("_test_marker") is False
- assert "homelab" in payload["local"]
-
-
-def test_discover_halt_shows_banner(fake_home):
- halt = fake_home / ".config" / "cross-agent-comms" / "HALT"
- halt.parent.mkdir(parents=True)
- halt.write_text("halted")
- _make_project(fake_home, "homelab")
- result = _run(["--no-cache"], env={**os.environ, "HOME": str(fake_home)})
- assert result.returncode == 0 # discover continues to print under HALT
- assert "HALT" in result.stdout
diff --git a/claude-templates/.ai/scripts/tests/test_cross_agent_halt.py b/claude-templates/.ai/scripts/tests/test_cross_agent_halt.py
deleted file mode 100644
index f8bf0b3..0000000
--- a/claude-templates/.ai/scripts/tests/test_cross_agent_halt.py
+++ /dev/null
@@ -1,204 +0,0 @@
-"""Tests for cross-agent-halt and cross-agent-resume (TDD)."""
-
-from __future__ import annotations
-
-import os
-import subprocess
-import textwrap
-from pathlib import Path
-
-import pytest
-
-HALT_SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-halt"
-RESUME_SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-resume"
-
-
-def _run(script: Path, args: list[str], env: dict | None = None) -> subprocess.CompletedProcess:
- return subprocess.run([str(script), *args], capture_output=True, text=True, env=env)
-
-
-@pytest.fixture
-def isolated_env(tmp_path, monkeypatch):
- """Isolated HOME + a fake systemctl that records calls without acting."""
- fake_home = tmp_path / "home"
- fake_home.mkdir()
- fake_bin = tmp_path / "bin"
- fake_bin.mkdir()
- # Fake systemctl: no-op, exit 0.
- fake_systemctl = fake_bin / "systemctl"
- fake_systemctl.write_text("#!/usr/bin/env bash\nexit 0\n")
- fake_systemctl.chmod(0o755)
- # Fake ssh: succeed only for known-good host.
- fake_ssh = fake_bin / "ssh"
- fake_ssh.write_text(textwrap.dedent("""\
- #!/usr/bin/env bash
- # Find the destination arg (skip flags).
- target=""
- for arg in "$@"; do
- case "$arg" in
- -*|*=*) ;;
- *@*|localhost|*.local|*.invalid) target="$arg"; break ;;
- *) target="$arg"; break ;;
- esac
- done
- case "$target" in
- *invalid*|*unreachable*) exit 255 ;;
- *) exit 0 ;;
- esac
- """))
- fake_ssh.chmod(0o755)
-
- monkeypatch.setenv("HOME", str(fake_home))
- # Prepend our fake bin so systemctl + ssh are intercepted, but keep real /bin etc.
- monkeypatch.setenv("PATH", f"{fake_bin}:{os.environ.get('PATH', '')}")
- return fake_home
-
-
-# ---- cross-agent-halt ----
-
-
-def test_halt_help(isolated_env):
- result = _run(HALT_SCRIPT, ["--help"], env={**os.environ, "HOME": str(isolated_env),
- "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- assert "halt" in result.stdout.lower()
-
-
-def test_halt_creates_halt_file(isolated_env):
- halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- assert not halt_file.exists()
- result = _run(HALT_SCRIPT, [], env={**os.environ, "HOME": str(isolated_env),
- "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- assert halt_file.exists()
-
-
-def test_halt_with_reason_writes_body(isolated_env):
- result = _run(HALT_SCRIPT, ["pausing for incident review"],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- assert halt_file.exists()
- assert "pausing for incident review" in halt_file.read_text()
-
-
-def test_halt_idempotent(isolated_env):
- """Running halt twice doesn't error."""
- halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- r1 = _run(HALT_SCRIPT, [], env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert r1.returncode == 0
- assert halt_file.exists()
- r2 = _run(HALT_SCRIPT, [], env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert r2.returncode == 0
- assert halt_file.exists()
-
-
-def test_halt_does_not_pkill(isolated_env):
- """Per design: halt does NOT call pkill. Verify by checking no pkill process gets launched."""
- # Replace pkill in PATH with something that fails loudly so we'd see if halt invoked it.
- fake_bin = isolated_env.parent / "bin"
- pkill = fake_bin / "pkill"
- pkill.write_text("#!/usr/bin/env bash\necho 'PKILL CALLED' >&2\nexit 99\n")
- pkill.chmod(0o755)
- result = _run(HALT_SCRIPT, [], env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- assert "PKILL CALLED" not in result.stderr
-
-
-def test_halt_tailnet_reports_per_peer(isolated_env):
- """--tailnet iterates peers.toml and reports per-peer status."""
- cfg = isolated_env / ".config" / "cross-agent-comms"
- cfg.mkdir(parents=True)
- (cfg / "peers.toml").write_text(textwrap.dedent("""\
- [peers.velox]
- host = "velox"
- ssh_user = "cjennings"
-
- [peers.bogus]
- host = "definitely-unreachable.invalid"
- ssh_user = "cjennings"
- """))
- result = _run(HALT_SCRIPT, ["--tailnet"],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- # Partial halt → exit 1.
- assert result.returncode == 1
- assert "velox" in result.stdout
- assert "bogus" in result.stdout
- # ✓ marker for velox, ✗ for bogus.
- assert "✓" in result.stdout
- assert "✗" in result.stdout
- assert "PARTIAL" in result.stdout or "partial" in result.stdout.lower()
-
-
-def test_halt_tailnet_all_reachable_exits_zero(isolated_env):
- cfg = isolated_env / ".config" / "cross-agent-comms"
- cfg.mkdir(parents=True)
- (cfg / "peers.toml").write_text(textwrap.dedent("""\
- [peers.velox]
- host = "velox"
- ssh_user = "cjennings"
- """))
- result = _run(HALT_SCRIPT, ["--tailnet"],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- assert "velox" in result.stdout
-
-
-# ---- cross-agent-resume ----
-
-
-def test_resume_help(isolated_env):
- result = _run(RESUME_SCRIPT, ["--help"],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- assert "resume" in result.stdout.lower()
-
-
-def test_resume_removes_halt_file(isolated_env):
- halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- halt_file.parent.mkdir(parents=True)
- halt_file.write_text("halted")
- assert halt_file.exists()
- result = _run(RESUME_SCRIPT, [],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- assert not halt_file.exists()
-
-
-def test_resume_when_no_halt_active_succeeds(isolated_env):
- """No HALT to clear is not an error."""
- result = _run(RESUME_SCRIPT, [],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 0
-
-
-def test_resume_prints_per_session_instructions(isolated_env):
- """Resume must surface that polling does NOT auto-resume."""
- halt_file = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- halt_file.parent.mkdir(parents=True)
- halt_file.write_text("halted")
- result = _run(RESUME_SCRIPT, [],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 0
- out = result.stdout.lower()
- assert "polling" in out
- assert "auto" in out or "explicit" in out or "session" in out
-
-
-def test_resume_tailnet_partial_failure_exit_1(isolated_env):
- cfg = isolated_env / ".config" / "cross-agent-comms"
- cfg.mkdir(parents=True)
- (cfg / "peers.toml").write_text(textwrap.dedent("""\
- [peers.velox]
- host = "velox"
-
- [peers.bogus]
- host = "unreachable-host.invalid"
- """))
- halt_file = cfg / "HALT"
- halt_file.write_text("halted")
- result = _run(RESUME_SCRIPT, ["--tailnet"],
- env={**os.environ, "HOME": str(isolated_env), "PATH": os.environ["PATH"]})
- assert result.returncode == 1
- assert "velox" in result.stdout
- assert "bogus" in result.stdout
diff --git a/claude-templates/.ai/scripts/tests/test_cross_agent_recv.py b/claude-templates/.ai/scripts/tests/test_cross_agent_recv.py
deleted file mode 100644
index 27c53a5..0000000
--- a/claude-templates/.ai/scripts/tests/test_cross_agent_recv.py
+++ /dev/null
@@ -1,176 +0,0 @@
-"""Tests for cross-agent-recv."""
-
-from __future__ import annotations
-
-import json
-import os
-import subprocess
-from pathlib import Path
-
-import pytest
-
-SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-recv"
-
-
-def _make_message(path: Path, *, conv_id: str = "test-conv", seq: int = 1, msg_type: str = "request",
- proto_version: str = "5", title: str = "Test", requires_tools: str | None = None,
- body: str = "Body.\n") -> Path:
- fm_lines = [
- f"#+TITLE: {title}",
- f"#+CONVERSATION_ID: {conv_id}",
- f"#+MESSAGE_TYPE: {msg_type}",
- f"#+SEQUENCE: {seq}",
- "#+TIMESTAMP: 2026-04-27T05:00:00-05:00",
- f"#+PROTOCOL_VERSION: {proto_version}",
- ]
- if requires_tools:
- fm_lines.append(f"#+REQUIRES_TOOLS: {requires_tools}")
- path.write_text("\n".join(fm_lines) + "\n\n" + body)
- return path
-
-
-def _run(args: list[str], env: dict | None = None) -> subprocess.CompletedProcess:
- return subprocess.run([str(SCRIPT), *args], capture_output=True, text=True, env=env)
-
-
-@pytest.fixture
-def isolated_env(tmp_path, monkeypatch):
- fake_home = tmp_path / "home"
- fake_home.mkdir()
- monkeypatch.setenv("HOME", str(fake_home))
- return fake_home
-
-
-def test_recv_help(isolated_env):
- result = _run(["--help"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 0
- assert "Receive and decide" in result.stdout
-
-
-def test_recv_missing_file_rejects(isolated_env, tmp_path):
- result = _run([str(tmp_path / "nope.org")], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 3 # reject
-
-
-def test_recv_malformed_frontmatter_rejects(isolated_env, tmp_path):
- bad = tmp_path / "bad.org"
- bad.write_text("not org-mode at all\n")
- result = _run([str(bad), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 3
- assert "decision: reject" in result.stdout
-
-
-def test_recv_missing_required_field_rejects(isolated_env, tmp_path):
- msg = tmp_path / "msg.org"
- # Missing PROTOCOL_VERSION among others.
- msg.write_text("#+TITLE: x\n#+CONVERSATION_ID: c\n\nBody.\n")
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 3
- assert "missing required" in result.stdout
-
-
-def test_recv_protocol_version_mismatch_query(isolated_env, tmp_path):
- msg = _make_message(tmp_path / "msg.org", proto_version="4")
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 2 # query
- assert "PROTOCOL_VERSION mismatch" in result.stdout
-
-
-def test_recv_invalid_message_type_rejects(isolated_env, tmp_path):
- msg = _make_message(tmp_path / "msg.org", msg_type="banana")
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 3
- assert "invalid MESSAGE_TYPE" in result.stdout
-
-
-def test_recv_missing_signature_rejects(isolated_env, tmp_path):
- """When verify is on, a missing .asc sibling rejects."""
- msg = _make_message(tmp_path / "msg.org")
- # No .asc sidecar.
- result = _run([str(msg)], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 3
- assert "signature file missing" in result.stdout
-
-
-def test_recv_valid_processes(isolated_env, tmp_path):
- """A valid message with --no-verify and no dedup match → process."""
- msg = _make_message(tmp_path / "msg.org")
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 0 # process
- assert "decision: process" in result.stdout
- assert "sha256:" in result.stdout
-
-
-def test_recv_dedup_against_identical_existing(isolated_env, tmp_path):
- """Same content + same SEQUENCE in same dir → dedup."""
- inbox = tmp_path / "inbox"
- inbox.mkdir()
- first = _make_message(inbox / "20260427T100000Z-from-x-c.org", conv_id="c", seq=5)
- # Second message with same content — name differs (canonical-style would have different timestamp).
- second = _make_message(inbox / "20260427T100100Z-from-x-c.org", conv_id="c", seq=5)
- # Bodies must be byte-identical for hash equality.
- second.write_bytes(first.read_bytes())
- result = _run([str(second), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 1 # dedup
- assert "decision: dedup" in result.stdout
-
-
-def test_recv_collision_with_different_content_processes(isolated_env, tmp_path):
- """Same SEQUENCE + same CONVERSATION_ID but different content → process both."""
- inbox = tmp_path / "inbox"
- inbox.mkdir()
- _make_message(inbox / "20260427T100000Z-from-x-c.org", conv_id="c", seq=5, body="First body.\n")
- second = _make_message(inbox / "20260427T100100Z-from-x-c.org", conv_id="c", seq=5, body="Different body.\n")
- result = _run([str(second), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 0 # process
- assert "decision: process" in result.stdout
-
-
-def test_recv_requires_tools_missing_query(isolated_env, tmp_path):
- """REQUIRES_TOOLS naming a definitely-missing binary → query."""
- msg = _make_message(tmp_path / "msg.org", requires_tools="definitely-not-installed-xyzzy-9000")
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 2 # query
- assert "required tools unavailable" in result.stdout
-
-
-def test_recv_requires_tools_present_processes(isolated_env, tmp_path):
- """REQUIRES_TOOLS naming a real binary → process."""
- msg = _make_message(tmp_path / "msg.org", requires_tools="ls,cat")
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 0
- assert "decision: process" in result.stdout
-
-
-def test_recv_json_output(isolated_env, tmp_path):
- msg = _make_message(tmp_path / "msg.org")
- result = _run([str(msg), "--no-verify", "--json"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 0
- payload = json.loads(result.stdout)
- assert payload["decision"] == "process"
- assert payload["message_type"] == "request"
- assert payload["conversation_id"] == "test-conv"
-
-
-def test_recv_halt_blocks(isolated_env, tmp_path):
- halt = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- halt.parent.mkdir(parents=True)
- halt.write_text("halted\n")
- msg = _make_message(tmp_path / "msg.org")
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 5
- assert "halt active" in result.stderr.lower()
-
-
-def test_recv_halt_leaves_message_in_place(isolated_env, tmp_path):
- """Per spec: under HALT, recv must NOT move/dedup/reject — leave file in place."""
- halt = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- halt.parent.mkdir(parents=True)
- halt.write_text("halted\n")
- msg = _make_message(tmp_path / "msg.org")
- pre_content = msg.read_text()
- result = _run([str(msg), "--no-verify"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 5
- # File still exists with same content.
- assert msg.exists()
- assert msg.read_text() == pre_content
diff --git a/claude-templates/.ai/scripts/tests/test_cross_agent_send.py b/claude-templates/.ai/scripts/tests/test_cross_agent_send.py
deleted file mode 100644
index f716e95..0000000
--- a/claude-templates/.ai/scripts/tests/test_cross_agent_send.py
+++ /dev/null
@@ -1,210 +0,0 @@
-"""Tests for cross-agent-send.
-
-Subprocess-based: treat the script as a black-box CLI and assert on its
-exit codes, stdout, and the files it produces.
-"""
-
-from __future__ import annotations
-
-import os
-import subprocess
-import textwrap
-from pathlib import Path
-
-import pytest
-
-SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-send"
-
-
-def _make_message(tmp_path: Path, conv_id: str = "test-conv", seq: int = 1, msg_type: str = "request",
- proto_version: str = "5") -> Path:
- msg = tmp_path / "msg.org"
- msg.write_text(textwrap.dedent(f"""\
- #+TITLE: Test message
- #+CONVERSATION_ID: {conv_id}
- #+MESSAGE_TYPE: {msg_type}
- #+SEQUENCE: {seq}
- #+TIMESTAMP: 2026-04-27T05:00:00-05:00
- #+PROTOCOL_VERSION: {proto_version}
-
- Body.
- """))
- return msg
-
-
-def _run(args: list[str], env: dict | None = None, cwd: Path | None = None) -> subprocess.CompletedProcess:
- return subprocess.run(
- [str(SCRIPT), *args],
- capture_output=True,
- text=True,
- env=env,
- cwd=cwd,
- )
-
-
-@pytest.fixture
-def isolated_env(tmp_path, monkeypatch):
- """Redirect HOME so peers.toml, HALT, marker files are scoped to the test."""
- fake_home = tmp_path / "home"
- fake_home.mkdir()
- monkeypatch.setenv("HOME", str(fake_home))
- # Pre-create projects/ so derive_sender_project has somewhere to look.
- (fake_home / "projects" / "homelab").mkdir(parents=True)
- return fake_home
-
-
-def test_send_help(isolated_env):
- """--help works without side effects."""
- result = _run(["--help"], env={**os.environ, "HOME": str(isolated_env)})
- assert result.returncode == 0
- assert "Send a cross-agent message" in result.stdout
-
-
-def test_send_missing_message_file(isolated_env):
- """Nonexistent message file returns general error."""
- import socket
- machine = socket.gethostname().split(".")[0]
- result = _run(
- [f"{machine}.homelab", str(isolated_env / "nonexistent.org")],
- env={**os.environ, "HOME": str(isolated_env)},
- )
- assert result.returncode == 1
- assert "not found" in result.stderr.lower()
-
-
-def test_send_invalid_destination_format(isolated_env, tmp_path):
- """Destination without . returns dest-not-found exit code."""
- msg = _make_message(tmp_path)
- result = _run(
- ["bogus", str(msg)],
- env={**os.environ, "HOME": str(isolated_env)},
- )
- assert result.returncode == 2
- assert "<machine>.<project>" in result.stderr or "destination" in result.stderr.lower()
-
-
-def test_send_dest_not_in_peers(isolated_env, tmp_path):
- """Cross-machine destination with no peers.toml entry exits 2."""
- msg = _make_message(tmp_path)
- result = _run(
- ["unknownmachine.homelab", str(msg)],
- env={**os.environ, "HOME": str(isolated_env)},
- )
- assert result.returncode == 2
- assert "not found in peers" in result.stderr
-
-
-def test_send_frontmatter_missing_required(isolated_env, tmp_path):
- """Message missing required fields exits 4."""
- bad = tmp_path / "bad.org"
- bad.write_text("#+TITLE: nope\n\nBody.\n")
- import socket
- machine = socket.gethostname().split(".")[0]
- result = _run(
- [f"{machine}.homelab", str(bad)],
- env={**os.environ, "HOME": str(isolated_env)},
- )
- assert result.returncode == 4
- assert "missing required fields" in result.stderr
-
-
-def test_send_invalid_message_type(isolated_env, tmp_path):
- """Unknown MESSAGE_TYPE exits 4."""
- msg = _make_message(tmp_path, msg_type="frobnicate")
- import socket
- machine = socket.gethostname().split(".")[0]
- result = _run(
- [f"{machine}.homelab", str(msg)],
- env={**os.environ, "HOME": str(isolated_env)},
- )
- assert result.returncode == 4
- assert "MESSAGE_TYPE" in result.stderr
-
-
-def test_send_halt_blocks(isolated_env, tmp_path):
- """When HALT exists, send refuses with exit 5."""
- halt = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- halt.parent.mkdir(parents=True)
- halt.write_text("test halt\n")
- msg = _make_message(tmp_path)
- import socket
- machine = socket.gethostname().split(".")[0]
- result = _run(
- [f"{machine}.homelab", str(msg)],
- env={**os.environ, "HOME": str(isolated_env)},
- )
- assert result.returncode == 5
- assert "halt active" in result.stderr.lower()
-
-
-def test_send_same_machine_no_sign_delivers(isolated_env, tmp_path):
- """Same-machine delivery with --no-sign produces a canonically named file."""
- msg = _make_message(tmp_path, conv_id="my-conv")
- import socket
- machine = socket.gethostname().split(".")[0]
- # Sender is derived from CWD walking up to ~/projects/<name>/
- cwd = isolated_env / "projects" / "homelab"
- result = _run(
- [f"{machine}.homelab", str(msg), "--no-sign"],
- env={**os.environ, "HOME": str(isolated_env)},
- cwd=cwd,
- )
- assert result.returncode == 0, f"stderr={result.stderr}"
- inbox = isolated_env / "projects" / "homelab" / "inbox" / "from-agents"
- files = list(inbox.glob("*-from-homelab-my-conv.org"))
- assert len(files) == 1
- # No sig file with --no-sign.
- assert not list(inbox.glob("*.asc"))
- # Canonical filename pattern.
- assert files[0].name.startswith("2026") and files[0].name.endswith("-from-homelab-my-conv.org")
-
-
-def test_send_same_machine_signed_writes_asc(isolated_env, tmp_path):
- """Signed delivery writes both .org and .asc."""
- msg = _make_message(tmp_path, conv_id="signed-conv")
- import socket
- machine = socket.gethostname().split(".")[0]
- cwd = isolated_env / "projects" / "homelab"
- # Use the real GPG keyring (not isolating GPG — Craig's existing keys are fine for tests).
- real_env = {**os.environ, "HOME": str(isolated_env), "GNUPGHOME": str(Path.home() / ".gnupg")}
- result = _run(
- [f"{machine}.homelab", str(msg)],
- env=real_env,
- cwd=cwd,
- )
- if result.returncode != 0:
- pytest.skip(f"GPG signing unavailable in this environment: {result.stderr}")
- inbox = isolated_env / "projects" / "homelab" / "inbox" / "from-agents"
- org_files = list(inbox.glob("*-from-homelab-signed-conv.org"))
- asc_files = list(inbox.glob("*-from-homelab-signed-conv.org.asc"))
- assert len(org_files) == 1
- assert len(asc_files) == 1
-
-
-def test_send_filename_ignores_input_basename(isolated_env, tmp_path):
- """User's input filename is ignored; canonical filename is generated."""
- weird = tmp_path / "weird-user-name.org"
- weird.write_text(textwrap.dedent("""\
- #+TITLE: Title
- #+CONVERSATION_ID: ignored-input
- #+MESSAGE_TYPE: request
- #+SEQUENCE: 1
- #+TIMESTAMP: 2026-04-27T05:00:00-05:00
- #+PROTOCOL_VERSION: 5
-
- Body.
- """))
- import socket
- machine = socket.gethostname().split(".")[0]
- cwd = isolated_env / "projects" / "homelab"
- result = _run(
- [f"{machine}.homelab", str(weird), "--no-sign"],
- env={**os.environ, "HOME": str(isolated_env)},
- cwd=cwd,
- )
- assert result.returncode == 0
- inbox = isolated_env / "projects" / "homelab" / "inbox" / "from-agents"
- # No file named after the user's input.
- assert not (inbox / "weird-user-name.org").exists()
- # Canonical naming used.
- assert list(inbox.glob("*-from-homelab-ignored-input.org"))
diff --git a/claude-templates/.ai/scripts/tests/test_cross_agent_status.py b/claude-templates/.ai/scripts/tests/test_cross_agent_status.py
deleted file mode 100644
index bb5b8ba..0000000
--- a/claude-templates/.ai/scripts/tests/test_cross_agent_status.py
+++ /dev/null
@@ -1,165 +0,0 @@
-"""Tests for cross-agent-status (TDD: tests written before implementation)."""
-
-from __future__ import annotations
-
-import json
-import os
-import subprocess
-import textwrap
-from pathlib import Path
-
-import pytest
-
-SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-status"
-
-
-def _make_msg(path: Path, *, conv_id: str, seq: int, msg_type: str = "request",
- proto_version: str = "5", timestamp: str = "2026-04-27T05:00:00-05:00") -> Path:
- path.parent.mkdir(parents=True, exist_ok=True)
- path.write_text(textwrap.dedent(f"""\
- #+TITLE: T
- #+CONVERSATION_ID: {conv_id}
- #+MESSAGE_TYPE: {msg_type}
- #+SEQUENCE: {seq}
- #+TIMESTAMP: {timestamp}
- #+PROTOCOL_VERSION: {proto_version}
-
- Body.
- """))
- return path
-
-
-def _run(args: list[str], env: dict | None = None) -> subprocess.CompletedProcess:
- return subprocess.run([str(SCRIPT), *args], capture_output=True, text=True, env=env)
-
-
-@pytest.fixture
-def fake_projects(tmp_path, monkeypatch):
- """Create a fake ~/projects/<name>/inbox/from-agents/ tree under tmp_path."""
- home = tmp_path / "home"
- home.mkdir()
- monkeypatch.setenv("HOME", str(home))
- return home
-
-
-def test_status_help(fake_projects):
- result = _run(["--help"], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- assert "snapshot" in result.stdout.lower() or "pending" in result.stdout.lower()
-
-
-def test_status_no_projects_clean_output(fake_projects):
- result = _run([], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- # Empty machine prints either header-only table or "no projects" — accept either.
- # No crash, no pending claims.
- assert "pending" in result.stdout.lower() or result.stdout.strip() == ""
-
-
-def test_status_one_pending_shows_up(fake_projects):
- inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents"
- _make_msg(inbox / "20260427T100000Z-from-career-fixup.org", conv_id="fixup", seq=1)
- result = _run([], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- assert "homelab" in result.stdout
- assert "1" in result.stdout # pending count
- assert "20260427T100000Z-from-career-fixup.org" in result.stdout
-
-
-def test_status_released_conversation_zero_pending(fake_projects):
- """A conversation with a release message in it counts as 0 pending."""
- inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents"
- _make_msg(inbox / "20260427T100000Z-from-career-done.org", conv_id="done", seq=1)
- _make_msg(inbox / "20260427T100100Z-from-homelab-done.org", conv_id="done", seq=2, msg_type="release")
- result = _run([], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- # Check the homelab row shows 0 pending.
- lines = [ln for ln in result.stdout.splitlines() if "homelab" in ln]
- # At least one homelab line should show 0 pending or "—".
- assert any("0" in ln or "—" in ln for ln in lines)
-
-
-def test_status_partial_release(fake_projects):
- """Conversation with release + a later message → that later message counts as pending."""
- inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents"
- _make_msg(inbox / "20260427T100000Z-from-career-x.org", conv_id="x", seq=1,
- timestamp="2026-04-27T05:00:00-05:00")
- _make_msg(inbox / "20260427T100100Z-from-homelab-x.org", conv_id="x", seq=2, msg_type="release",
- timestamp="2026-04-27T05:01:00-05:00")
- # New message AFTER release: starts a fresh thread that's pending.
- _make_msg(inbox / "20260427T200000Z-from-career-x.org", conv_id="x", seq=3,
- timestamp="2026-04-27T15:00:00-05:00")
- result = _run([], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- homelab_line = next(ln for ln in result.stdout.splitlines() if "homelab" in ln)
- assert "1" in homelab_line # the post-release message is pending
-
-
-def test_status_multiple_projects(fake_projects):
- inbox_a = fake_projects / "projects" / "homelab" / "inbox" / "from-agents"
- inbox_b = fake_projects / "projects" / "career" / "inbox" / "from-agents"
- _make_msg(inbox_a / "20260427T100000Z-from-x-a.org", conv_id="a", seq=1)
- _make_msg(inbox_b / "20260427T100100Z-from-x-b.org", conv_id="b", seq=1)
- _make_msg(inbox_b / "20260427T100200Z-from-x-c.org", conv_id="c", seq=1)
- result = _run([], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- # career has 2 pending, homelab has 1.
- career_line = next(ln for ln in result.stdout.splitlines() if "career" in ln)
- homelab_line = next(ln for ln in result.stdout.splitlines() if "homelab" in ln)
- assert "2" in career_line
- assert "1" in homelab_line
-
-
-def test_status_json_output(fake_projects):
- inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents"
- _make_msg(inbox / "20260427T100000Z-from-career-test.org", conv_id="test", seq=1)
- result = _run(["--json"], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- payload = json.loads(result.stdout)
- assert "projects" in payload
- assert isinstance(payload["projects"], list)
- homelab = next((p for p in payload["projects"] if p["name"] == "homelab"), None)
- assert homelab is not None
- assert homelab["pending_count"] == 1
-
-
-def test_status_sort_pending_first(fake_projects):
- """Projects with pending messages sort before projects with 0."""
- (fake_projects / "projects" / "alpha" / "inbox" / "from-agents").mkdir(parents=True)
- inbox_zeta = fake_projects / "projects" / "zeta" / "inbox" / "from-agents"
- _make_msg(inbox_zeta / "20260427T100000Z-from-x-z.org", conv_id="z", seq=1)
- result = _run([], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0
- lines = result.stdout.splitlines()
- zeta_idx = next(i for i, ln in enumerate(lines) if "zeta" in ln)
- alpha_idx = next(i for i, ln in enumerate(lines) if "alpha" in ln)
- assert zeta_idx < alpha_idx, "pending project should sort before zero-pending project"
-
-
-def test_status_halt_shows_banner(fake_projects):
- halt = fake_projects / ".config" / "cross-agent-comms" / "HALT"
- halt.parent.mkdir(parents=True)
- halt.write_text("halted for test")
- inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents"
- _make_msg(inbox / "20260427T100000Z-from-x-x.org", conv_id="x", seq=1)
- result = _run([], env={**os.environ, "HOME": str(fake_projects)})
- assert result.returncode == 0 # status continues to print under HALT
- assert "HALT" in result.stdout
- # Banner should mention the reason.
- assert "halted for test" in result.stdout
-
-
-def test_status_projects_glob_override(fake_projects):
- inbox = fake_projects / "projects" / "homelab" / "inbox" / "from-agents"
- _make_msg(inbox / "20260427T100000Z-from-x-a.org", conv_id="a", seq=1)
- other_inbox = fake_projects / "projects" / "career" / "inbox" / "from-agents"
- _make_msg(other_inbox / "20260427T100100Z-from-x-b.org", conv_id="b", seq=1)
- # Glob limits to homelab only.
- result = _run(
- ["--projects-glob", str(fake_projects / "projects" / "homelab" / "inbox" / "from-agents") + "/"],
- env={**os.environ, "HOME": str(fake_projects)},
- )
- assert result.returncode == 0
- assert "homelab" in result.stdout
- # career not in scope.
- assert "career" not in result.stdout
diff --git a/claude-templates/.ai/scripts/tests/test_cross_agent_watch.py b/claude-templates/.ai/scripts/tests/test_cross_agent_watch.py
deleted file mode 100644
index 417cc19..0000000
--- a/claude-templates/.ai/scripts/tests/test_cross_agent_watch.py
+++ /dev/null
@@ -1,155 +0,0 @@
-"""Tests for cross-agent-watch.
-
-Black-box: spawn the script, drop files into a watched dir, read the log.
-Tests use --no-notify to avoid firing real desktop notifications.
-"""
-
-from __future__ import annotations
-
-import os
-import subprocess
-import time
-from pathlib import Path
-
-import pytest
-
-SCRIPT = Path(__file__).resolve().parent.parent / "cross-agent-comms" / "cross-agent-watch"
-
-
-def _spawn(watched_dir: Path, log_path: Path, env: dict) -> subprocess.Popen:
- return subprocess.Popen(
- [
- str(SCRIPT),
- "--projects-glob", str(watched_dir) + "/",
- "--log", str(log_path),
- "--no-notify",
- "--quiet",
- ],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.PIPE,
- env=env,
- )
-
-
-def _wait_for_log_lines(log_path: Path, expected: int, timeout: float = 5.0) -> list[str]:
- deadline = time.time() + timeout
- while time.time() < deadline:
- if log_path.exists():
- lines = [ln for ln in log_path.read_text().splitlines() if ln]
- if len(lines) >= expected:
- return lines
- time.sleep(0.1)
- if log_path.exists():
- return [ln for ln in log_path.read_text().splitlines() if ln]
- return []
-
-
-@pytest.fixture
-def isolated_env(tmp_path, monkeypatch):
- fake_home = tmp_path / "home"
- fake_home.mkdir()
- monkeypatch.setenv("HOME", str(fake_home))
- return fake_home
-
-
-def test_watch_help(isolated_env):
- result = subprocess.run(
- [str(SCRIPT), "--help"],
- capture_output=True, text=True,
- env={**os.environ, "HOME": str(isolated_env)},
- )
- assert result.returncode == 0
- assert "Usage:" in result.stdout
-
-
-def test_watch_empty_glob_exits_nonzero(isolated_env):
- """Glob resolving to zero dirs should exit non-zero with a clear message."""
- result = subprocess.run(
- [str(SCRIPT), "--projects-glob", "/nonexistent/path/*/foo/", "--no-notify", "--quiet"],
- capture_output=True, text=True,
- env={**os.environ, "HOME": str(isolated_env)},
- timeout=3,
- )
- assert result.returncode != 0
- assert "0 directories" in result.stderr
-
-
-def test_watch_logs_org_file_create(isolated_env, tmp_path):
- watched = tmp_path / "watched"
- watched.mkdir()
- log = tmp_path / "watch.log"
- proc = _spawn(watched, log, {**os.environ, "HOME": str(isolated_env)})
- try:
- # Give inotifywait a moment to attach.
- time.sleep(0.3)
- (watched / "test-msg.org").write_text("hello")
- lines = _wait_for_log_lines(log, expected=1, timeout=3.0)
- assert len(lines) >= 1
- assert "test-msg.org" in lines[-1]
- finally:
- proc.terminate()
- proc.wait(timeout=2)
-
-
-def test_watch_filters_tmp_files(isolated_env, tmp_path):
- """Files starting with .tmp. must NOT trigger log entries."""
- watched = tmp_path / "watched"
- watched.mkdir()
- log = tmp_path / "watch.log"
- proc = _spawn(watched, log, {**os.environ, "HOME": str(isolated_env)})
- try:
- time.sleep(0.3)
- (watched / ".tmp.staging-file.org").write_text("hello")
- # Wait briefly to confirm nothing logs.
- time.sleep(0.5)
- if log.exists():
- content = log.read_text()
- assert ".tmp.staging-file" not in content
- # Then drop a real file to confirm watcher is alive.
- (watched / "real.org").write_text("real")
- lines = _wait_for_log_lines(log, expected=1, timeout=3.0)
- assert any("real.org" in ln for ln in lines)
- finally:
- proc.terminate()
- proc.wait(timeout=2)
-
-
-def test_watch_filters_asc_sidecars(isolated_env, tmp_path):
- """Only .org events fire; .asc sidecars are silent."""
- watched = tmp_path / "watched"
- watched.mkdir()
- log = tmp_path / "watch.log"
- proc = _spawn(watched, log, {**os.environ, "HOME": str(isolated_env)})
- try:
- time.sleep(0.3)
- (watched / "msg.org.asc").write_text("sig")
- time.sleep(0.5)
- if log.exists():
- assert "msg.org.asc" not in log.read_text()
- # .org event still works.
- (watched / "msg.org").write_text("body")
- lines = _wait_for_log_lines(log, expected=1, timeout=3.0)
- assert any(ln.endswith("msg.org") for ln in lines)
- finally:
- proc.terminate()
- proc.wait(timeout=2)
-
-
-def test_watch_halt_suppresses_but_logs(isolated_env, tmp_path):
- """When HALT is set, watcher logs the event with (suppressed by HALT) marker."""
- halt = isolated_env / ".config" / "cross-agent-comms" / "HALT"
- halt.parent.mkdir(parents=True)
- halt.write_text("halted")
- watched = tmp_path / "watched"
- watched.mkdir()
- log = tmp_path / "watch.log"
- proc = _spawn(watched, log, {**os.environ, "HOME": str(isolated_env)})
- try:
- time.sleep(0.3)
- (watched / "halted-event.org").write_text("body")
- lines = _wait_for_log_lines(log, expected=1, timeout=3.0)
- assert len(lines) >= 1
- assert "suppressed by HALT" in lines[-1]
- finally:
- proc.terminate()
- proc.wait(timeout=2)
diff --git a/claude-templates/.ai/workflows/INDEX.org b/claude-templates/.ai/workflows/INDEX.org
index 42119b4..a45807e 100644
--- a/claude-templates/.ai/workflows/INDEX.org
+++ b/claude-templates/.ai/workflows/INDEX.org
@@ -107,7 +107,6 @@ This index must list every =.org= file in =.ai/workflows/= except this one and e
- Triggers: "session harvest", "harvest the sessions", "let's run the session-harvest workflow", "monthly harvest", "mine the sessions"
- =no-approvals.org= — drop the interaction-level approval gates for a pre-agreed batch while keeping engineering-discipline gates (=/review-code=, =/voice personal=, tests, session-log updates, subagent reviews, destructive-action consent). Mode stays on until Craig turns it off, a real question arises, the queue empties, or the conversation switches topics.
- Triggers: "no-approvals mode", "no approvals", "no-approval", "no need for approval gates", "stop asking, just keep going", "I'll check back in when you're done or stuck", "do all =<selector>= with no-approval"
-- =cross-agent-comms.org= — protocol for cross-project agent coordination via =inbox/from-agents/= (file-based IPC, GPG-signed, supports cross-machine over Tailscale). Auto: when =cross-agent-watch= detects a new inbound message, or when an agent decides to initiate a cross-project conversation. Operational scripts (=cross-agent-send=, =-recv=, =-watch=, =-status=, =-discover=, =-halt=, =-resume=) and their READMEs live at =.ai/scripts/cross-agent-comms/=.
* Living Document
diff --git a/claude-templates/.ai/workflows/cross-agent-comms.org b/claude-templates/.ai/workflows/cross-agent-comms.org
deleted file mode 100644
index 430b4b0..0000000
--- a/claude-templates/.ai/workflows/cross-agent-comms.org
+++ /dev/null
@@ -1,334 +0,0 @@
-#+TITLE: Cross-Agent Communication Workflow (v5)
-#+AUTHOR: Craig Jennings & Claude (homelab + career sessions)
-#+DATE: 2026-04-27
-#+VERSION: 5
-
-* Status
-
-Draft. Iterating between the homelab and career sessions through a multi-round design discussion. Awaiting Craig's review for promotion to =~/code/rulesets/claude-templates/.ai/workflows/=.
-
-v5 changes from v4:
-- *Script absorption.* Seven operational scripts (=cross-agent-send=, =cross-agent-recv=, =cross-agent-watch=, =cross-agent-status=, =cross-agent-discover=, =cross-agent-halt=, =cross-agent-resume=) now own most implementation detail. Their READMEs are the operational source of truth. The spec stays declarative.
-- *Failsafe halt.* Layered HALT-file mechanism stops all cross-agent activity on a machine within ~5 min, without visiting individual sessions or restarting Claude Code. =cross-agent-halt= and =cross-agent-resume= are the convenience entry points; every other component checks the HALT file independently.
-- *Identity.* Messages are GPG-signed by sender and verified by receiver. Combined with POSIX permissions on =from-agents/= and Tailscale-level network auth, identity becomes a three-layer story.
-- *Atomic writes.* Writers MUST use temp-file + rename. =cross-agent-send= handles this; the spec just states the contract.
-- *Dedup.* Sequence-collision dedup is now binary SHA-256 equality, not a fuzzy ">90% match" threshold.
-- *Cold-start handling.* Layered: =cross-agent-watch= (push notifications via =inotifywait=) is the primary mechanism; startup-workflow check and user-direct-injection are coverage layers.
-- *Spec stays roughly the same length but does more protocol work.* Operational detail (rsync retry numbers, inotifywait recipes, peers.toml schema, GPG flags, dedup mechanics) moved to the script READMEs. The spec adds new protocol elements (identity layer, atomic-writes contract, SHA-256 dedup, =escalate= type, =RELEASE_STATUS= values, =REQUIRES_TOOLS= optional field) in the freed space. Total documentation surface (spec + seven READMEs ≈ 1000 lines) is larger than v4's 259 lines, but the spec and the READMEs serve different audiences — protocol-thinkers and CLI-users — and a reader of just the spec can comprehend the protocol without consulting any README.
-
-* When to use
-
-When two Claude sessions in different projects (same machine or different machines on the same Tailscale tailnet) need to coordinate on a shared task that one session can't complete alone — typically because one has tooling, context, or MCP access the other doesn't.
-
-Examples that fit:
-- Session A asks session B to apply a workflow patch in B's project, then verify it.
-- Session A runs a long task and needs session B to monitor results in B's domain.
-- Two sessions co-design a workflow.
-
-Examples that don't fit:
-- A simple file handoff that doesn't require iteration.
-- A task one session can do alone.
-- Cross-tailnet or cross-organization. The protocol is local-tailnet-scoped.
-
-* Protocol
-
-** File location
-
-Each project has =inbox/from-agents/= as its agent-comms mailbox. Create the directory if it doesn't exist; set permissions =chmod 700= and ownership to the user.
-
-- Sender writes to receiver's =inbox/from-agents/=.
-- Receiver polls (or watches) =inbox/from-agents/=, *not* the parent =inbox/=.
-- The parent =inbox/= stays reserved for human-triage items.
-- Out-of-band artifacts (PDFs, datasets) live at =inbox/from-agents/artifacts/=. Reference by relative path in the message body.
-
-The user does NOT write directly to =from-agents/=. To inject input into a running conversation, the user tells one of the agents in that agent's session; the agent writes the input as a normal message attributed to the user.
-
-** File naming
-
-=YYYYMMDDTHHMMSSZ-from-<sender>-<short-conv-id>.org=
-
-- Timestamp is UTC ISO 8601 compact. The trailing =Z= is mandatory.
-- =from-<sender>= prefix.
-- =<short-conv-id>= is a stable kebab-case slug across the back-and-forth. Reusable across time; ordering relies on filename timestamps.
-
-Frontmatter =#+TIMESTAMP= carries the same instant in local time with explicit offset. The two MUST refer to the same instant.
-
-The implementation (=cross-agent-send=) generates the canonical filename from the message's frontmatter (=CONVERSATION_ID=, current UTC time) and the sender's project context. Senders supply only the message body file; the script handles naming. Senders MUST NOT pre-name files in this format and pass them through; the script overwrites with its own canonical name to ensure consistency and enable the sender-side max-seen sequence-collision-reduction scan.
-
-GPG signatures live in a sibling file =YYYYMMDDTHHMMSSZ-from-<sender>-<short-conv-id>.org.asc=. Receivers verify before processing. See =* Writes are atomic= for the two-file delivery ordering rule.
-
-** Frontmatter
-
-Required:
-
-#+begin_example
-#+TITLE: <human-readable subject>
-#+CONVERSATION_ID: <stable across the thread>
-#+MESSAGE_TYPE: <see types below>
-#+SEQUENCE: <integer hint>
-#+TIMESTAMP: <ISO 8601 with explicit offset>
-#+PROTOCOL_VERSION: 5
-#+end_example
-
-Optional:
-
-#+begin_example
-#+REQUIRES_TOOLS: <comma-separated tool/MCP slugs, e.g. gmail-mcp, slack-mcp>
-#+RELEASE_STATUS: <see release-statuses; valid only on MESSAGE_TYPE: release>
-#+WORKFLOW_VERSION: <sender's version of cross-agent-comms.org; informational only in v5 — no enforcement>
-#+end_example
-
-Receiver sanity-checks frontmatter before acting. Missing or malformed frontmatter → surface to user, don't proceed. Mismatched =PROTOCOL_VERSION= → receiver writes a =query= asking the originator to upgrade.
-
-** Identity
-
-Messages are GPG-signed by the sender. Receivers verify the detached signature before processing the message body.
-
-The implementation (=cross-agent-send=) signs automatically with the sender's configured key (the user's primary GPG key by default; configurable via =--key= flag or environment). Receivers verify automatically against the keys in their GPG keyring.
-
-Identity is a three-layer story:
-
-1. *Tailscale layer.* Only tailnet members can reach the rsync-over-SSH endpoint at all.
-2. *POSIX layer.* =chmod 700= on =from-agents/= means only processes running as the directory's owner can write.
-3. *GPG layer.* Sender's signature on each message proves the message originated from a process holding the key.
-
-Three independent layers. Per-user GPG (using existing keys) gives a correctness check more than a security boundary — unsigned messages are almost certainly bugs, not attackers. That's still load-bearing.
-
-** Writes are atomic
-
-Writers MUST use a temp-file + rename pattern (=mktemp= + =mv= within the same filesystem) so receivers never see partial files. The implementation script (=cross-agent-send=) handles this.
-
-Receivers ignore =.tmp.*= files, processing only the final renamed name.
-
-*Two-file ordering.* When a message has a sibling GPG signature file (=.org.asc=), the writer MUST rename the =.asc= to its final name *before* renaming the =.org=. Two =mv= operations are not atomic together — without this ordering, a receiver could read the =.org= in the window between the two renames and fail GPG verify because the =.asc= hasn't landed yet. The rule: receiver only acts on =.org= files, and a =.org= without a corresponding =.asc= means the signature is genuinely missing (not still in flight).
-
-** Sequence numbering
-
-=#+SEQUENCE= is a *hint*, not a strict counter. Canonical order is =#+TIMESTAMP=. Sequences may collide under rapid back-and-forth (both sides write what they think is sequence N near-simultaneously). Treat collision as a normal protocol event.
-
-*Receiver-side dedup rule.* When a new file shares =CONVERSATION_ID= + =SEQUENCE= with an already-processed message, compare SHA-256 hashes. Identical hashes → silent dedup, treat as a retry. Different hashes → process both, ordered by =#+TIMESTAMP=.
-
-*Sender-side collision-reduction (best-effort).* Before picking sequence, scan the receiver's =from-agents/= for the highest existing sequence in this conversation across both sender prefixes. Use =max(seen) + 1=.
-
-** Message types
-
-- *request* — a side asks for work, input, or a decision. Sequence 1 is always =request=.
-- *progress* — work-in-progress checkpoint. "Here's where I am, no action needed from you, more coming." Originator's poll loop should NOT page the user on progress messages.
-- *query* — either side asks a clarifying question that blocks further work. Originator's poll loop SHOULD surface this immediately. Originator answers and work continues.
-- *pushback* — receiver formally disagrees with the request and has *not* started the work. Carries reasoning. Distinct from =query= because the originator's response path differs.
-- *complete* — receiver signals the requested work is done. Triggers verification.
-- *release* — terminal type. Originator writes after verifying =complete=. Carries =RELEASE_STATUS= to disambiguate the closure mode.
-- *escalate* — punts the conversation to the user for adjudication. Both sides pause polling on =escalate=; the user resolves.
-
-Reply expectation is implied by type: =request=, =query=, =pushback=, =escalate= expect a reply; =progress=, =complete=, =release= don't.
-
-** Conversation lifecycle
-
-A conversation is a directed loop between an originator (issued sequence 1) and a receiver:
-
-1. Originator writes =request= (sequence 1). Begins polling for replies.
-2. *Optional acknowledgment.* Receiver may write a =progress= at sequence 2 to acknowledge receipt and set expectations. Required if work will take >5 minutes (so the originator's poll loop doesn't waste wakes).
-3. *Optional echo-back.* For ambiguous or large requests, receiver writes a =progress= that restates work items and announces "starting now unless you push back within N minutes."
-4. Receiver works. May write =progress= updates. =query= mid-work if blocked. =pushback= if the request is wrong.
-5. Receiver writes =complete=. Begins polling for =release=.
-6. Originator reads, *verifies the deliverable directly*. For subjective deliverables, verification is the originator's editorial accept.
-7. If verified: =release= with =RELEASE_STATUS: complete=. If problems: new =request= (next sequence number).
-8. Receiver sees =release=, stops polling.
-
-The verification step is load-bearing. =complete= is a *claim*; =release= is *verification*.
-
-** Pushback path
-
-On receiving a =pushback=, the originator chooses:
-
-1. *Revise* — new =request= with adjusted scope.
-2. *Insist* — new =request= addressing the pushback's reasoning, standing by direction.
-3. *Withdraw* — =release= with =RELEASE_STATUS: withdrawn-after-pushback=.
-
-*Deadlock cap.* After two pushback-insist exchanges, the next message MUST be =MESSAGE_TYPE: escalate=. Both agents pause polling; the user resolves.
-
-** =RELEASE_STATUS= values
-
-| Status | Meaning |
-|---+---|
-| =complete= | Goal achieved, originator verified |
-| =cancelled= | Originator changed their mind mid-conversation |
-| =withdrawn-after-pushback= | Originator chose option 3 on receiver's =pushback= |
-| =abandoned-after-escalation= | User adjudicated and chose to close the conversation |
-| =abandoned-after-timeout= | Receiver auto-closed after originator never returned to verify |
-
-** Async fallback
-
-If the originator session ends between =request= and =complete=, the receiver's =complete= goes unverified. Receiver behavior:
-
-- Polls for =release= up to ~24 hours of cycles (implementation default).
-- After timeout, writes a final =progress= message ("treating as terminal-without-verification; originator never returned to release") and stops polling. Receiver does NOT write =release= itself — that would contradict the lifecycle rule that =release= is the originator's terminal action.
-- Next time the originator project starts, the unreleased =complete= is surfaced as a startup item. The user can issue a late =release= (with whichever =RELEASE_STATUS= fits) or open a fresh conversation to revisit. =RELEASE_STATUS: abandoned-after-timeout= is used at that point if the user wants to formally close the orphaned thread.
-
-** Escalation
-
-A side writes =escalate= when:
-- Pushback-insist deadlock cap reached.
-- Conversation has stalled (no productive movement in N exchanges).
-- A reply-expecting message has gone unanswered past timeout.
-
-Body summarizes both sides' positions in 60 seconds of reading. Both agents pause polling; the user resolves.
-
-* Implementation notes
-
-This sub-section describes how to operate the protocol. Operational detail lives in the seven scripts' READMEs.
-
-** Recommended scripts
-
-| Script | Replaces user action | README |
-|---+---+---|
-| =cross-agent-send <dest> <msg>= | Filename generation, GPG sign, atomic write, peer lookup, rsync push, retry+backoff, failure surfacing — seven mechanical sender-side steps. Frontmatter and message body are still author-supplied. | =cross-agent-send.md= |
-| =cross-agent-recv <msg>= | Frontmatter sanity-check, =PROTOCOL_VERSION= verify, GPG verify, SHA-256 dedup, =REQUIRES_TOOLS= check — five mechanical receiver-side steps. Output is a structured decision (=process= / =dedup= / =query= / =reject=) the agent acts on. | =cross-agent-recv.md= |
-| =cross-agent-watch= | Manually checking inboxes; "did I get a message?" | =cross-agent-watch.md= |
-| =cross-agent-status= | Walking each project to count pending messages | =cross-agent-status.md= |
-| =cross-agent-discover= | Remembering project topology and reachability | =cross-agent-discover.md= |
-| =cross-agent-halt [reason] [--tailnet]= | Visiting each session to stop polling, restarting Claude Code, or hand-killing processes when comms go runaway. =--tailnet= propagates HALT to all peers. | =cross-agent-halt.md= |
-| =cross-agent-resume [--tailnet]= | Manually clearing the HALT state and restarting the watcher. Per-session polling does NOT auto-resume — the user re-engages each session explicitly. | =cross-agent-resume.md= |
-
-The scripts are tools the user runs from any terminal. They do not depend on agent context — =cross-agent-status= run from a fresh shell works.
-
-A reader can comprehend this protocol from this spec alone. Script READMEs add operational detail that makes the protocol practical to use, but understanding the protocol's semantics requires only this document.
-
-** Polling
-
-Default cadence: 270 seconds (≈4.5 min). Sits just under the 5-minute prompt-cache TTL.
-
-If a side needs to slow down (heads-down work, idle wait), it writes a =progress= message saying so in prose. The other side adapts. There are no named polling modes.
-
-After ~12 empty polls in a row, the poll loop surfaces the silence to the user.
-
-A future runtime with native filesystem-event support could replace polling for active sessions; =cross-agent-watch= already provides event-driven notifications outside active sessions.
-
-** User multi-tasking
-
-- *Deferral.* If the user's last message in the agent's session was less than 60 seconds ago AND a poll fires, queue the inbox check until either the user sends another message OR 5 minutes pass without further input.
-- *Surfacing.* On the next user-facing response: "While we were working on X, a cross-agent message landed from <project>. It's a =<type>= — want me to handle it now or after we finish?"
-- *Mid-question.* Answer the user first.
-- *Project switch.* If the user moves to the receiver project mid-conversation, the receiver agent surfaces the in-flight thread on first user prompt.
-- *Conversation state.* Always include in any response that mentions a cross-agent thread: "<conv-id> at sequence N, awaiting <event>."
-
-** Failure modes
-
-The seven scripts surface most failures with concrete error messages. Spec-level failure modes:
-
-- *Malformed frontmatter on a received file.* Surface to user; do not act.
-- *Mismatched =PROTOCOL_VERSION=.* Receiver writes =query= asking originator to upgrade.
-- *Missing or invalid GPG signature.* Receiver surfaces "unsigned/unverified message"; refuses to act.
-- *Sequence collision* with non-matching SHA-256. Process both, ordered by timestamp.
-- *Required tool unavailable.* Receiver checks =REQUIRES_TOOLS= during frontmatter-sanity-check (before any work begins). On a missing tool, receiver writes =query= asking the originator to reframe the request to avoid the unavailable tool. Originator may revise (new =request=) or withdraw (=release= with =RELEASE_STATUS: cancelled=). =query= is the right type rather than =pushback= because missing-tool is a capability gap, not disagreement.
-- *Runaway resource usage.* User invokes =cross-agent-halt= globally (or =cross-agent-halt --tailnet= for cross-machine). HALT file stops all components within one polling cycle (~5 min). See =* Halt mechanism= for the layered checks.
-- *User halts mid-conversation.* Both sides write a final =progress= note ("HALT fired; pausing"); polling stops within one cadence; conversations resume on explicit per-session re-engage after HALT clears.
-- *HALT file accidentally created* (typo, errant =touch=). =cross-agent-status= prominently flags HALT active; user clears with =cross-agent-resume=. Cost: no messages send during the typo window.
-- *HALT file unreadable* (perms wrong, partial write). Each component fails-closed (treats as halted) and reports "HALT file present but unreadable; treat as halted." Safer than fail-open.
-
-Operational failures (rsync push fails, watcher dies, peer unreachable) live in the script READMEs' failure-mode tables.
-
-* Halt mechanism
-
-A failsafe to stop all cross-agent activity on a machine without visiting individual sessions or restarting Claude Code. Designed for the runaway-polling case: an agent has spun up conversations with N other agents, polling is eating CPU, and the user needs to stop everything *now*.
-
-** The HALT file
-
-Path: =~/.config/cross-agent-comms/HALT=.
-
-Existence triggers halt across all components on the machine. The file's body may carry an optional human-readable reason (reviewed by the user later when deciding to resume).
-
-User commands:
-
-#+begin_example
-$ touch ~/.config/cross-agent-comms/HALT # halt
-$ rm ~/.config/cross-agent-comms/HALT # resume
-#+end_example
-
-Or via convenience scripts (=cross-agent-halt= / =cross-agent-resume=) that also handle the watcher service and cross-machine propagation.
-
-** Layered checks (the failsafe property)
-
-Every component MUST check the HALT file. The "any one component stops the system independently" property is what makes this failsafe — the system doesn't depend on a single point doing the right thing.
-
-| Component | Check timing | Behavior on HALT |
-|---+---+---|
-| =cross-agent-send= | At start of send + between =.asc= and =.org= rsync + between retry iterations | Refuse to start new send; complete current step then exit. Worst case: one in-flight send finishes within a few seconds. |
-| =cross-agent-recv= | Before any verify or dedup | Leave inbound message in place — do NOT dedup, reject, or move. Resume picks it up via cold-start handling. |
-| =cross-agent-watch= | At iteration start | Suppress notifications; log only. Continues running, no-op until HALT clears. |
-| =cross-agent-status= | At start | Print prominent "⚠ HALT ACTIVE" banner before normal output. Read-only, continues. |
-| =cross-agent-discover= | At start | Print HALT banner; continue read-only enumeration. |
-| Agent polling loop | First action on every wake | Write a final =progress= note to any active conversation ("HALT fired; pausing"), do NOT reschedule, surface "halt active" to user. Polling decays within one cadence (~5 min). |
-| Agent user-facing responses | Every response while HALT is set | Append "(HALT active; cross-agent comms paused)" to the response. On HALT clear, the next response says "(HALT cleared; cross-agent comms ready to resume — say so to re-engage polling)." Persistent, not just first-response — keeps awareness alive. |
-| Conversation initiator | Before writing sequence 1 of any new conversation | Refuse and surface to user. |
-| Startup workflow | Phase A on session start | If HALT exists, surface immediately and skip cross-agent inbox checks. |
-
-The agent polling-loop check is the load-bearing one for "stops eating CPU." Wake-ups already scheduled fire, but each wake on-HALT is a no-op + reschedule-prevention. Within one polling cadence (~5 min) all polling stops.
-
-*Fail-closed on unreadable HALT.* If the HALT file exists but is unreadable (wrong permissions, partial write), components MUST treat as halted. Safer than fail-open.
-
-** Resume asymmetry (deliberate)
-
-Halt is automatic everywhere. Resume requires explicit user intent per-session.
-
-When the user removes HALT (or runs =cross-agent-resume=), components stop refusing to act, but agent polling does NOT auto-resume. The user must open each session and tell that agent to resume polling for its conversations.
-
-The asymmetry exists because:
-
-1. Auto-resume could silently invert intentional kills. If the user halted because a session was misbehaving, removing HALT shouldn't quietly revive it.
-2. Per-session resume forces the user to look at each session and confirm the situation is resolved before re-engaging.
-
-** Cross-machine halt
-
-=cross-agent-halt --tailnet= iterates =peers.toml= and SSH-touches HALT on each peer. Same shape for resume.
-
-Reports per-peer status with non-zero exit on partial halt:
-
-#+begin_example
-$ cross-agent-halt --tailnet
-Halting velox.local ✓ (HALT file written)
-Halting bastion.local ✗ (ssh exit 255: no route to host)
-Halting locally ✓ (HALT file written)
-
-PARTIAL HALT: 2/3 machines halted. bastion.local needs manual halt.
-Exit 1.
-#+end_example
-
-Scripting can detect partial halt via the exit code. Same pattern for =--tailnet= on resume.
-
-* Limitations
-
-- *Local-tailnet only.* Filesystem IPC + rsync over SSH. Cross-tailnet or cross-organization is out of scope.
-- *Identity has three layers (Tailscale + POSIX + GPG)* but no message-content encryption. Confidentiality is not the goal; signing is correctness, not secrecy.
-- *Single-receiver per conversation.* Fan-out to multiple receivers requires manually orchestrating multiple parallel conversations.
-- *Polling is best-effort.* A wake may be delayed by an in-flight tool call until the runtime is idle. =cross-agent-watch= mitigates by offering event-driven notifications.
-- *Project-extension drift.* If two projects' =.ai/project-workflows/= modify shared workflow definitions in incompatible ways, cross-agent assumptions can diverge silently. The optional =#+WORKFLOW_VERSION= advisory field is informational only in v5 — no implementation reads or acts on it. A future version may add enforcement on mismatch (e.g. receiver writes =query= asking which side is stale). Today, alignment is verified manually before high-stakes conversations.
-
-* Persistence after release
-
-Conversation files persist by default. The conversation log is the audit trail.
-
-Manual archival is fine if the inbox grows unmanageable. Suggested cadence: once the conversation has been =release='d AND the work it produced has shipped, archive both projects' message files into =.ai/sessions/cross-agent/= as a flat directory — no per-conversation subdirectories. Rename each archived file to lead with the conversation-id so messages from the same conversation cluster on =ls=: =<conv-id>-<TIMESTAMP>-from-<sender>.org= (and the matching =.asc= sibling, if present). Inbox filenames lead with the timestamp because chronological arrival is what matters in =from-agents/=; archives invert that because grouping by conversation is what matters when reading history. Keep the =.asc= signatures alongside the =.org= files in archive — they're small and document the GPG verification chain.
-
-Old messages don't affect protocol behavior (=cross-agent-status='s pending semantics correctly ignore released messages) but the =from-agents/= directory grows indefinitely without manual archival. =cross-agent-status= performance degrades noticeably when a project's =from-agents/= exceeds a few hundred files. =cross-agent-init= (deferred to v6) would include an archival sub-command.
-
-* Open questions
-
-- *=cross-agent-init= and =cross-agent-compose= helper scripts.* =-init= would be one-command project bootstrap (creates =inbox/from-agents/= with =chmod 700=, installs the =cross-agent-watch= systemd path unit, validates peer config, runs a discovery probe). =-compose= would be interactive frontmatter authoring (prompts for required fields, produces a draft message file). Both deferred to v6. Current onboarding requires manual =mkdir= + systemd setup per =cross-agent-watch.md='s install recipe; current message authoring requires writing the file by hand or via a small in-agent template.
-- *Hard conversation timeout.* The async-fallback timeout is implementation-default ~24 hours. Right number depends on use case; tighten as patterns emerge.
-- *=paused= polling state.* Today there's no clean signal for "pause without ending." Add when first user complaint surfaces.
-- *Multi-LLM context.* If we ever bring in a non-Claude agent, the protocol's natural-language framing may need formalization.
-
-* Examples
-
-** =prep-fixup= conversation (2026-04-26 → 2026-04-27)
-
-Eleven exchanges between homelab and career produced the v4 spec by iterative critique-and-simplification. Three real-time sequence collisions during the conversation drove the sequence-as-hint rule that landed in v4 and persists in v5.
-
-Files at =~/projects/{homelab,career}/inbox/from-agents/= named =*-prep-fixup.org=. Worth re-reading when designing future cross-agent flows.
-
-** =comms-cold-start-discovery= conversation (2026-04-27)
-
-The follow-up that produced this v5 spec. Cold-start, watcher tooling, agent discovery, GPG identity, sha256 dedup, atomic writes, POSIX perms, script absorption, and process-vs-text simplification. Tonight's first cold-start in real time (career session went dormant after =prep-fixup= release; Craig's user-injection re-engaged it) is the worked demonstration of the v5 user-injection rule.
-
-Files at =~/projects/{homelab,career}/inbox/from-agents/= named =*-comms-cold-start-discovery.org=.
diff --git a/claude-templates/.ai/workflows/helper-mode.org b/claude-templates/.ai/workflows/helper-mode.org
index 8ead37b..cdec200 100644
--- a/claude-templates/.ai/workflows/helper-mode.org
+++ b/claude-templates/.ai/workflows/helper-mode.org
@@ -65,7 +65,7 @@ The git ban is concurrency-scoped. /Helper wrap-up/ below lifts it for exactly o
** Escalation
-Anything the contract blocks routes through the cross-agent message form (=machine.project.agent-id=), or just gets reported to Craig. The helper leaves its tree changes for the primary's next commit, or describes them in a targeted message.
+Anything the contract blocks gets reported to Craig, or — for a cross-project handoff — routed through =inbox-send= to the owning project's =inbox/=. The helper leaves its tree changes for the primary's next commit, or describes them in a note to Craig.
* Data-Integrity Rules
diff --git a/claude-templates/.ai/workflows/startup.org b/claude-templates/.ai/workflows/startup.org
index 59c9c54..fe7778f 100644
--- a/claude-templates/.ai/workflows/startup.org
+++ b/claude-templates/.ai/workflows/startup.org
@@ -10,8 +10,8 @@ The workflow is structured into four phases. *Phase A.0* is a sequential pre-fli
Quick contract — runs / produces:
- *Phase A.0* (sequential): refresh rulesets, then the project repo.
-- *Phase A* (parallel batch): timestamp, session-context check, guarded =.ai/= sync, recent sessions, inbox-status, cross-agent status, notes.org, staleness, language-bundle freshness.
-- *Phase B* (parallel batch): read the crash-recovery anchor if present, the recent session summaries, new inbox items, pending cross-agent messages.
+- *Phase A* (parallel batch): timestamp, session-context check, guarded =.ai/= sync, recent sessions, inbox-status, notes.org, staleness, language-bundle freshness.
+- *Phase B* (parallel batch): read the crash-recovery anchor if present, the recent session summaries, new inbox items.
- *Phase C* (interactive): surface findings, process the inbox, run project startup-extras, ask priorities.
* Execution
@@ -146,12 +146,11 @@ These calls have no dependencies on each other. Issue them all together in one m
4. =\ls -t .ai/sessions/ 2>/dev/null | head -5= — list 5 most recent session files. The backslash bypasses any =ls= alias in the user's profile. Without it, bare =ls -t= silently returns no output under =exa= (a common =ls= replacement) — which makes a sessions directory full of files look empty, and the agent then skips Phase B step 2.
5. =\ls -la inbox/ 2>/dev/null= — inventory the inbox. Same reason for the backslash escape, applied uniformly across the Phase A =ls= calls.
-6. =cross-agent-status 2>/dev/null || true= — snapshot of pending cross-agent messages across local projects. This is layer A of the cold-start design from =cross-agent-comms.org=: pending messages from other agents (delivered while no session was active here) get surfaced on session start. The =|| true= keeps Phase A from failing if =cross-agent-status= isn't installed yet — older projects without the script still boot cleanly. If HALT is active, =cross-agent-status= prints a banner; surface that prominently in Phase C.
-7. Read =.ai/notes.org= — Project-Specific Context, Active Reminders, Pending Decisions sections (skip About This File).
-8. Read =.ai/project-workflows/startup-extras.org= if it exists.
-9. =[ -f todo.org ] && .ai/scripts/task-review-staleness.sh todo.org 7 || true= — count top-level tasks overdue for review (the daily task-review habit's startup nudge). The =[ -f todo.org ]= guard skips projects without a root todo.org; =|| true= keeps Phase A from failing if the script isn't synced yet. Threshold 7 days is one review cycle of slack — softer than the wrap-up health check's 30-day alarm.
-10. =bash ~/code/rulesets/scripts/sync-language-bundle.sh "$PWD" 2>/dev/null || true= — language-bundle freshness for the current project. Fingerprint-detects which bundle (if any) the project has, auto-fixes drifted rulesets-owned files (=.claude/rules/*.md=, =.claude/hooks/*=, =githooks/*=), and surfaces drift in =settings.json= without writing it (a project may have customized it). =CLAUDE.md= is deliberately left untracked — it's seed-only in =install-lang= and project-owned afterward, mirroring how =diff-lang= skips it. Quiet when there's no bundle or everything's clean. Hardcodes the rulesets path because =languages/= is the canonical source and lives only there — the same absolute-path dependency the rsyncs already carry. =|| true= keeps Phase A from failing on older checkouts where the script isn't present yet. The =.ai/= rsyncs and this call write to disjoint paths (=.ai/= vs =.claude/=/=githooks/=), so the batch stays parallel-safe.
-11. =[ -f "$HOME/org/roam/inbox.org" ] && grep -cE '^\*\* ' "$HOME/org/roam/inbox.org" || true= — count items in the roam global inbox (=~/org/roam/inbox.org=), the inbox-zero startup nudge. Silent if the roam clone isn't on this machine. Phase C reads the file when the count is non-zero, splits total vs items related to this project, and surfaces the offer (see =inbox-zero.org=). Read-only; never files at startup.
+6. Read =.ai/notes.org= — Project-Specific Context, Active Reminders, Pending Decisions sections (skip About This File).
+7. Read =.ai/project-workflows/startup-extras.org= if it exists.
+8. =[ -f todo.org ] && .ai/scripts/task-review-staleness.sh todo.org 7 || true= — count top-level tasks overdue for review (the daily task-review habit's startup nudge). The =[ -f todo.org ]= guard skips projects without a root todo.org; =|| true= keeps Phase A from failing if the script isn't synced yet. Threshold 7 days is one review cycle of slack — softer than the wrap-up health check's 30-day alarm.
+9. =bash ~/code/rulesets/scripts/sync-language-bundle.sh "$PWD" 2>/dev/null || true= — language-bundle freshness for the current project. Fingerprint-detects which bundle (if any) the project has, auto-fixes drifted rulesets-owned files (=.claude/rules/*.md=, =.claude/hooks/*=, =githooks/*=), and surfaces drift in =settings.json= without writing it (a project may have customized it). =CLAUDE.md= is deliberately left untracked — it's seed-only in =install-lang= and project-owned afterward, mirroring how =diff-lang= skips it. Quiet when there's no bundle or everything's clean. Hardcodes the rulesets path because =languages/= is the canonical source and lives only there — the same absolute-path dependency the rsyncs already carry. =|| true= keeps Phase A from failing on older checkouts where the script isn't present yet. The =.ai/= rsyncs and this call write to disjoint paths (=.ai/= vs =.claude/=/=githooks/=), so the batch stays parallel-safe.
+10. =[ -f "$HOME/org/roam/inbox.org" ] && grep -cE '^\*\* ' "$HOME/org/roam/inbox.org" || true= — count items in the roam global inbox (=~/org/roam/inbox.org=), the inbox-zero startup nudge. Silent if the roam clone isn't on this machine. Phase C reads the file when the count is non-zero, splits total vs items related to this project, and surfaces the offer (see =inbox-zero.org=). Read-only; never files at startup.
Notes on the rsync commands:
- Trailing slashes on both source and destination matter — they tell rsync to sync /contents/ rather than nest a directory inside.
@@ -170,7 +169,6 @@ These calls depend on Phase A outputs, but are independent of each other. Issue
1. *Read =.ai/session-context.org= if Phase A reported it exists.* The file is the crash-recovery anchor — if it's there, the previous session was interrupted and the context lives only in this file.
2. *Read each of the 5 most recent session files* from Phase A's =\ls -t .ai/sessions/= output. Read just the =* Summary= section of each — not the full file. The Summary gives Active Goal / Decisions / Data Collected / Findings / Files Modified / Next Steps. That's enough to pick up where things left off. Drill into a specific =* Session Log= later only if you need the /why/ or sequence on something. *If Phase A's listing came back empty, sanity-check with =\ls -la .ai/sessions/= before treating empty as definitive — sessions/ should normally be populated, and an empty result usually means the listing got swallowed somewhere, not that the directory is genuinely empty.*
3. *Read each new inbox file* from Phase A's =\ls -la inbox/= output. For =.eml= files, defer to Phase C — those need the extract script (below) rather than a raw Read.
-4. *Process pending cross-agent messages.* For each project with a pending count >0 in Phase A's =cross-agent-status= output (typically the current project; cross-project pending is surfaced too but only acted on if the user asks), run =cross-agent-recv <message-file>= on the file path =cross-agent-status= named. The script returns a structured decision (=process= / =dedup= / =query= / =reject=) per the protocol. For =process=, read the message body to determine the action. For =query=, prepare a clarifying reply. For =reject=, surface to user with the reason. For =dedup=, no action — silent retry already handled. Surface all decisions in Phase C alongside other findings.
Rationale: Reads are independent and benign. Batching them means the whole session-history view + inbox view lands in one round-trip instead of one per file.
@@ -197,7 +195,6 @@ This phase touches the user and runs sequentially:
#+end_src
If it reports a count, surface one line: wrap-up's Step 4.0 will commit it as =chore: sync .ai tooling from templates=, or offer to commit it now. If silent, say nothing. This is the crashed-session counterpart to the wrap-up commit step (the primary fix). From the 2026-05-31 jr-estate + work handoffs.
- - *Surface pending cross-agent messages.* If =cross-agent-status= reported any pending messages, list them with their =cross-agent-recv= decision (process / query / reject) per file. For =process= messages in this project's inbox, propose handling now or after the current task. For pending in other projects, mention the count so the user knows to switch projects when ready. If HALT was active, surface that prominently — cross-agent activity is paused until =cross-agent-resume= clears it.
2. *Process inbox if non-empty.* Mandatory — don't ask, just delegate to [[file:process-inbox.org][process-inbox.org]]. That workflow owns the value gate (advances an existing TODO / improves the project / serves the mission), the per-source rejection flow (Craig / project handoff / script), the priority-scheme check before filing, and the =.eml= extraction path. Single source of truth for the discipline.
3. *Execute project-specific startup extras* (the contents of =.ai/project-workflows/startup-extras.org= read in Phase A). If the file didn't exist, skip.
4. *Ask about priorities.* "What would you like to work on, or is there something urgent you need?"
diff --git a/docs/design/2026-05-28-generic-agent-runtime-spec.org b/docs/design/2026-05-28-generic-agent-runtime-spec.org
index 01be6d4..7d7a549 100644
--- a/docs/design/2026-05-28-generic-agent-runtime-spec.org
+++ b/docs/design/2026-05-28-generic-agent-runtime-spec.org
@@ -3,6 +3,10 @@
#+DATE: 2026-05-28
#+STARTUP: showall
+* Status note (2026-06-16)
+
+The cross-agent-comms subsystem this spec references as an existing substrate (=cross-agent-send= / =-recv= / =-watch= / =-status= / =-discover= / =-halt= / =-resume=, the =inbox/from-agents/= file-IPC protocol) was *removed* on 2026-06-16 as unused — every real cross-project handoff goes through =inbox-send= instead. Sections below that propose extending the cross-agent protocol (e.g. "Cross-agent updates", the =machine.project.agent-id= targeting) are historical: if this arc is revived, that layer would be rebuilt on =inbox-send=, not the deleted scripts.
+
* Introductory note
Craig asked for a design pass on making =rulesets= generic rather than