aboutsummaryrefslogtreecommitdiff
path: root/.ai/scripts/cross-agent-comms/cross-agent-resume
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-05-06 21:59:52 -0500
committerCraig Jennings <c@cjennings.net>2026-05-06 21:59:52 -0500
commitd81b23ad6b6e437dfe3c338a00a4be39bc555146 (patch)
tree2d4b0d7890fd1fc70d81282b81fed2808c28a106 /.ai/scripts/cross-agent-comms/cross-agent-resume
parent201377f57430ef28d02e703a2191434bbee55c75 (diff)
downloadrulesets-d81b23ad6b6e437dfe3c338a00a4be39bc555146.tar.gz
rulesets-d81b23ad6b6e437dfe3c338a00a4be39bc555146.zip
chore(ai): initialize project notes and Claude tooling surfaces
Replace the seed notes.org with project-specific context (layout, install modes, task tracker location, recent inflection point). Bring in the synced template surfaces (protocols, workflows, scripts, references, retrospectives, someday-maybe) as tracked content for this content/documentation project.
Diffstat (limited to '.ai/scripts/cross-agent-comms/cross-agent-resume')
-rwxr-xr-x.ai/scripts/cross-agent-comms/cross-agent-resume145
1 files changed, 145 insertions, 0 deletions
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-resume b/.ai/scripts/cross-agent-comms/cross-agent-resume
new file mode 100755
index 0000000..1fb83bc
--- /dev/null
+++ b/.ai/scripts/cross-agent-comms/cross-agent-resume
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+"""Resume cross-agent comms after a halt.
+
+See cross-agent-resume.md. Removes ~/.config/cross-agent-comms/HALT and
+restarts the cross-agent-watch systemd user service. With --tailnet,
+propagates the removal to every peer in peers.toml via SSH; reports
+per-peer status with non-zero exit on partial resume.
+
+Per the asymmetry rule: clearing HALT does NOT auto-resume agent polling.
+Each session must explicitly re-engage.
+"""
+
+from __future__ import annotations
+
+import argparse
+import subprocess
+import sys
+import tomllib
+from pathlib import Path
+
+CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms"
+HALT_FILE = CONFIG_DIR / "HALT"
+PEERS_TOML = CONFIG_DIR / "peers.toml"
+
+EXIT_OK = 0
+EXIT_PARTIAL = 1
+
+
+def err(msg: str) -> None:
+ print(msg, file=sys.stderr)
+
+
+def remove_halt_file() -> bool:
+ """Returns True if HALT was removed, False if it didn't exist."""
+ if HALT_FILE.exists():
+ try:
+ HALT_FILE.unlink()
+ return True
+ except OSError as e:
+ err(f"could not remove HALT: {e}")
+ return False
+ return False
+
+
+def start_watcher_service() -> None:
+ """Best-effort start of the systemd watcher path unit."""
+ try:
+ subprocess.run(
+ ["systemctl", "--user", "start", "cross-agent-watch.path"],
+ capture_output=True, text=True, timeout=5,
+ )
+ except (FileNotFoundError, subprocess.TimeoutExpired):
+ pass
+
+
+def load_peers() -> dict:
+ if not PEERS_TOML.exists():
+ return {}
+ try:
+ return tomllib.loads(PEERS_TOML.read_text())
+ except (tomllib.TOMLDecodeError, OSError) as e:
+ err(f"cannot parse peers.toml: {e}")
+ return {}
+
+
+def ssh_remove_halt(host: str, ssh_user: str | None) -> tuple[bool, str]:
+ target = f"{ssh_user}@{host}" if ssh_user else host
+ remote_cmd = "rm -f ~/.config/cross-agent-comms/HALT"
+ try:
+ result = subprocess.run(
+ ["ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes", target, remote_cmd],
+ capture_output=True, text=True, timeout=10,
+ )
+ except (FileNotFoundError, subprocess.TimeoutExpired):
+ return False, "ssh unavailable or timed out"
+ if result.returncode == 0:
+ return True, "HALT cleared"
+ return False, (result.stderr.strip().splitlines() or [f"exit {result.returncode}"])[-1]
+
+
+def print_re_engage_instructions() -> None:
+ print()
+ print("Halt cleared. Watcher restarted.")
+ print()
+ print("Agent polling does NOT auto-resume — per the failsafe asymmetry rule,")
+ print("agents stay paused until you explicitly re-engage each session.")
+ print("Open the relevant Claude session and tell the agent to resume polling")
+ print("for its conversation.")
+
+
+def main() -> int:
+ parser = argparse.ArgumentParser(description="Resume cross-agent comms after a halt.")
+ parser.add_argument("--tailnet", action="store_true",
+ help="Propagate HALT removal to every peer in peers.toml")
+ args = parser.parse_args()
+
+ removed = remove_halt_file()
+ start_watcher_service()
+ if removed:
+ print("Resuming locally ✓ (HALT cleared)")
+ else:
+ print("Resuming locally ✓ (no HALT was active)")
+
+ if not args.tailnet:
+ print_re_engage_instructions()
+ return EXIT_OK
+
+ peers = load_peers().get("peers", {})
+ if not peers:
+ print()
+ print("No peers configured in peers.toml — local-only resume complete.")
+ print_re_engage_instructions()
+ return EXIT_OK
+
+ print()
+ successes = 1
+ failures = []
+ for name, cfg in sorted(peers.items()):
+ host = cfg.get("host", name)
+ ssh_user = cfg.get("ssh_user")
+ ok, detail = ssh_remove_halt(host, ssh_user)
+ marker = "✓" if ok else "✗"
+ print(f"Resuming {host:<27} {marker} ({detail})")
+ if ok:
+ successes += 1
+ else:
+ failures.append(f"{name} ({host}): {detail}")
+
+ print()
+ total = len(peers) + 1
+ if failures:
+ print(f"PARTIAL RESUME: {successes}/{total} machines cleared.")
+ for f in failures:
+ print(f" - {f}")
+ print("Resolve the failures or manually clear HALT on each machine.")
+ print_re_engage_instructions()
+ return EXIT_PARTIAL
+
+ print(f"Resume complete across {total} machine(s).")
+ print_re_engage_instructions()
+ return EXIT_OK
+
+
+if __name__ == "__main__":
+ sys.exit(main())