aboutsummaryrefslogtreecommitdiff
path: root/.ai/scripts/cross-agent-comms/cross-agent-halt
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-05-06 21:59:52 -0500
committerCraig Jennings <c@cjennings.net>2026-05-06 21:59:52 -0500
commitd81b23ad6b6e437dfe3c338a00a4be39bc555146 (patch)
tree2d4b0d7890fd1fc70d81282b81fed2808c28a106 /.ai/scripts/cross-agent-comms/cross-agent-halt
parent201377f57430ef28d02e703a2191434bbee55c75 (diff)
downloadrulesets-d81b23ad6b6e437dfe3c338a00a4be39bc555146.tar.gz
rulesets-d81b23ad6b6e437dfe3c338a00a4be39bc555146.zip
chore(ai): initialize project notes and Claude tooling surfaces
Replace the seed notes.org with project-specific context (layout, install modes, task tracker location, recent inflection point). Bring in the synced template surfaces (protocols, workflows, scripts, references, retrospectives, someday-maybe) as tracked content for this content/documentation project.
Diffstat (limited to '.ai/scripts/cross-agent-comms/cross-agent-halt')
-rwxr-xr-x.ai/scripts/cross-agent-comms/cross-agent-halt134
1 files changed, 134 insertions, 0 deletions
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-halt b/.ai/scripts/cross-agent-comms/cross-agent-halt
new file mode 100755
index 0000000..df25115
--- /dev/null
+++ b/.ai/scripts/cross-agent-comms/cross-agent-halt
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+"""Failsafe halt for cross-agent comms.
+
+See cross-agent-halt.md. Touches ~/.config/cross-agent-comms/HALT and stops
+the cross-agent-watch systemd user service. With --tailnet, propagates the
+HALT file to every peer in peers.toml via SSH; reports per-peer status with
+non-zero exit on partial halt.
+
+Does NOT pkill in-flight scripts — they detect HALT on next iteration and
+stop themselves.
+"""
+
+from __future__ import annotations
+
+import argparse
+import subprocess
+import sys
+import tomllib
+from pathlib import Path
+
+CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms"
+HALT_FILE = CONFIG_DIR / "HALT"
+PEERS_TOML = CONFIG_DIR / "peers.toml"
+
+EXIT_OK = 0
+EXIT_PARTIAL = 1
+
+
+def err(msg: str) -> None:
+ print(msg, file=sys.stderr)
+
+
+def write_halt_file(reason: str) -> None:
+ CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+ HALT_FILE.write_text((reason + "\n") if reason else "")
+
+
+def stop_watcher_service() -> None:
+ """Best-effort stop of the systemd watcher service. Failures are logged but not fatal."""
+ try:
+ subprocess.run(
+ ["systemctl", "--user", "stop", "cross-agent-watch.path"],
+ capture_output=True, text=True, timeout=5,
+ )
+ except (FileNotFoundError, subprocess.TimeoutExpired):
+ # Watcher service may not be installed — fine.
+ pass
+
+
+def load_peers() -> dict:
+ if not PEERS_TOML.exists():
+ return {}
+ try:
+ return tomllib.loads(PEERS_TOML.read_text())
+ except (tomllib.TOMLDecodeError, OSError) as e:
+ err(f"cannot parse peers.toml: {e}")
+ return {}
+
+
+def ssh_touch_halt(host: str, ssh_user: str | None, reason: str) -> tuple[bool, str]:
+ target = f"{ssh_user}@{host}" if ssh_user else host
+ # Build the remote command. Quote the reason carefully.
+ remote_cmd = (
+ f"mkdir -p ~/.config/cross-agent-comms && "
+ f"printf %s {_sh_quote(reason)} > ~/.config/cross-agent-comms/HALT"
+ )
+ try:
+ result = subprocess.run(
+ ["ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes", target, remote_cmd],
+ capture_output=True, text=True, timeout=10,
+ )
+ except (FileNotFoundError, subprocess.TimeoutExpired):
+ return False, "ssh unavailable or timed out"
+ if result.returncode == 0:
+ return True, "HALT file written"
+ return False, (result.stderr.strip().splitlines() or [f"exit {result.returncode}"])[-1]
+
+
+def _sh_quote(s: str) -> str:
+ return "'" + s.replace("'", "'\"'\"'") + "'"
+
+
+def main() -> int:
+ parser = argparse.ArgumentParser(description="Halt all cross-agent comms on this machine (and optionally tailnet).")
+ parser.add_argument("reason", nargs="?", default="", help="Optional human-readable reason")
+ parser.add_argument("--tailnet", action="store_true",
+ help="Propagate HALT to every peer in peers.toml")
+ args = parser.parse_args()
+
+ # Local halt.
+ write_halt_file(args.reason)
+ stop_watcher_service()
+ print("Halting locally ✓ (HALT file written)")
+
+ if not args.tailnet:
+ print()
+ print(f"Halt active. Remove {HALT_FILE} or run cross-agent-resume to clear.")
+ print("Agent polling will stop within ~5 min (one cadence cycle).")
+ return EXIT_OK
+
+ peers = load_peers().get("peers", {})
+ if not peers:
+ print()
+ print("No peers configured in peers.toml — local-only halt complete.")
+ return EXIT_OK
+
+ print()
+ successes = 1 # local already counted
+ failures = []
+ for name, cfg in sorted(peers.items()):
+ host = cfg.get("host", name)
+ ssh_user = cfg.get("ssh_user")
+ ok, detail = ssh_touch_halt(host, ssh_user, args.reason)
+ marker = "✓" if ok else "✗"
+ print(f"Halting {host:<28} {marker} ({detail})")
+ if ok:
+ successes += 1
+ else:
+ failures.append(f"{name} ({host}): {detail}")
+
+ print()
+ total = len(peers) + 1
+ if failures:
+ print(f"PARTIAL HALT: {successes}/{total} machines halted.")
+ for f in failures:
+ print(f" - {f}")
+ print("Resolve the failures or manually halt each machine.")
+ return EXIT_PARTIAL
+ print(f"Halt active across {total} machine(s).")
+ return EXIT_OK
+
+
+if __name__ == "__main__":
+ sys.exit(main())