diff options
| author | Craig Jennings <c@cjennings.net> | 2026-05-06 21:59:52 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-05-06 21:59:52 -0500 |
| commit | d81b23ad6b6e437dfe3c338a00a4be39bc555146 (patch) | |
| tree | 2d4b0d7890fd1fc70d81282b81fed2808c28a106 /.ai/scripts/cross-agent-comms/cross-agent-resume | |
| parent | 201377f57430ef28d02e703a2191434bbee55c75 (diff) | |
| download | rulesets-d81b23ad6b6e437dfe3c338a00a4be39bc555146.tar.gz rulesets-d81b23ad6b6e437dfe3c338a00a4be39bc555146.zip | |
chore(ai): initialize project notes and Claude tooling surfaces
Replace the seed notes.org with project-specific context (layout, install modes, task tracker location, recent inflection point). Bring in the synced template surfaces (protocols, workflows, scripts, references, retrospectives, someday-maybe) as tracked content for this content/documentation project.
Diffstat (limited to '.ai/scripts/cross-agent-comms/cross-agent-resume')
| -rwxr-xr-x | .ai/scripts/cross-agent-comms/cross-agent-resume | 145 |
1 files changed, 145 insertions, 0 deletions
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-resume b/.ai/scripts/cross-agent-comms/cross-agent-resume new file mode 100755 index 0000000..1fb83bc --- /dev/null +++ b/.ai/scripts/cross-agent-comms/cross-agent-resume @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +"""Resume cross-agent comms after a halt. + +See cross-agent-resume.md. Removes ~/.config/cross-agent-comms/HALT and +restarts the cross-agent-watch systemd user service. With --tailnet, +propagates the removal to every peer in peers.toml via SSH; reports +per-peer status with non-zero exit on partial resume. + +Per the asymmetry rule: clearing HALT does NOT auto-resume agent polling. +Each session must explicitly re-engage. +""" + +from __future__ import annotations + +import argparse +import subprocess +import sys +import tomllib +from pathlib import Path + +CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms" +HALT_FILE = CONFIG_DIR / "HALT" +PEERS_TOML = CONFIG_DIR / "peers.toml" + +EXIT_OK = 0 +EXIT_PARTIAL = 1 + + +def err(msg: str) -> None: + print(msg, file=sys.stderr) + + +def remove_halt_file() -> bool: + """Returns True if HALT was removed, False if it didn't exist.""" + if HALT_FILE.exists(): + try: + HALT_FILE.unlink() + return True + except OSError as e: + err(f"could not remove HALT: {e}") + return False + return False + + +def start_watcher_service() -> None: + """Best-effort start of the systemd watcher path unit.""" + try: + subprocess.run( + ["systemctl", "--user", "start", "cross-agent-watch.path"], + capture_output=True, text=True, timeout=5, + ) + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + +def load_peers() -> dict: + if not PEERS_TOML.exists(): + return {} + try: + return tomllib.loads(PEERS_TOML.read_text()) + except (tomllib.TOMLDecodeError, OSError) as e: + err(f"cannot parse peers.toml: {e}") + return {} + + +def ssh_remove_halt(host: str, ssh_user: str | None) -> tuple[bool, str]: + target = f"{ssh_user}@{host}" if ssh_user else host + remote_cmd = "rm -f ~/.config/cross-agent-comms/HALT" + try: + result = subprocess.run( + ["ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes", target, remote_cmd], + capture_output=True, text=True, timeout=10, + ) + except (FileNotFoundError, subprocess.TimeoutExpired): + return False, "ssh unavailable or timed out" + if result.returncode == 0: + return True, "HALT cleared" + return False, (result.stderr.strip().splitlines() or [f"exit {result.returncode}"])[-1] + + +def print_re_engage_instructions() -> None: + print() + print("Halt cleared. Watcher restarted.") + print() + print("Agent polling does NOT auto-resume — per the failsafe asymmetry rule,") + print("agents stay paused until you explicitly re-engage each session.") + print("Open the relevant Claude session and tell the agent to resume polling") + print("for its conversation.") + + +def main() -> int: + parser = argparse.ArgumentParser(description="Resume cross-agent comms after a halt.") + parser.add_argument("--tailnet", action="store_true", + help="Propagate HALT removal to every peer in peers.toml") + args = parser.parse_args() + + removed = remove_halt_file() + start_watcher_service() + if removed: + print("Resuming locally ✓ (HALT cleared)") + else: + print("Resuming locally ✓ (no HALT was active)") + + if not args.tailnet: + print_re_engage_instructions() + return EXIT_OK + + peers = load_peers().get("peers", {}) + if not peers: + print() + print("No peers configured in peers.toml — local-only resume complete.") + print_re_engage_instructions() + return EXIT_OK + + print() + successes = 1 + failures = [] + for name, cfg in sorted(peers.items()): + host = cfg.get("host", name) + ssh_user = cfg.get("ssh_user") + ok, detail = ssh_remove_halt(host, ssh_user) + marker = "✓" if ok else "✗" + print(f"Resuming {host:<27} {marker} ({detail})") + if ok: + successes += 1 + else: + failures.append(f"{name} ({host}): {detail}") + + print() + total = len(peers) + 1 + if failures: + print(f"PARTIAL RESUME: {successes}/{total} machines cleared.") + for f in failures: + print(f" - {f}") + print("Resolve the failures or manually clear HALT on each machine.") + print_re_engage_instructions() + return EXIT_PARTIAL + + print(f"Resume complete across {total} machine(s).") + print_re_engage_instructions() + return EXIT_OK + + +if __name__ == "__main__": + sys.exit(main()) |
