diff options
| author | Craig Jennings <c@cjennings.net> | 2026-05-06 21:59:52 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-05-06 21:59:52 -0500 |
| commit | d81b23ad6b6e437dfe3c338a00a4be39bc555146 (patch) | |
| tree | 2d4b0d7890fd1fc70d81282b81fed2808c28a106 /.ai/scripts/cross-agent-comms/cross-agent-halt | |
| parent | 201377f57430ef28d02e703a2191434bbee55c75 (diff) | |
| download | rulesets-d81b23ad6b6e437dfe3c338a00a4be39bc555146.tar.gz rulesets-d81b23ad6b6e437dfe3c338a00a4be39bc555146.zip | |
chore(ai): initialize project notes and Claude tooling surfaces
Replace the seed notes.org with project-specific context (layout, install modes, task tracker location, recent inflection point). Bring in the synced template surfaces (protocols, workflows, scripts, references, retrospectives, someday-maybe) as tracked content for this content/documentation project.
Diffstat (limited to '.ai/scripts/cross-agent-comms/cross-agent-halt')
| -rwxr-xr-x | .ai/scripts/cross-agent-comms/cross-agent-halt | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/.ai/scripts/cross-agent-comms/cross-agent-halt b/.ai/scripts/cross-agent-comms/cross-agent-halt new file mode 100755 index 0000000..df25115 --- /dev/null +++ b/.ai/scripts/cross-agent-comms/cross-agent-halt @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +"""Failsafe halt for cross-agent comms. + +See cross-agent-halt.md. Touches ~/.config/cross-agent-comms/HALT and stops +the cross-agent-watch systemd user service. With --tailnet, propagates the +HALT file to every peer in peers.toml via SSH; reports per-peer status with +non-zero exit on partial halt. + +Does NOT pkill in-flight scripts — they detect HALT on next iteration and +stop themselves. +""" + +from __future__ import annotations + +import argparse +import subprocess +import sys +import tomllib +from pathlib import Path + +CONFIG_DIR = Path.home() / ".config" / "cross-agent-comms" +HALT_FILE = CONFIG_DIR / "HALT" +PEERS_TOML = CONFIG_DIR / "peers.toml" + +EXIT_OK = 0 +EXIT_PARTIAL = 1 + + +def err(msg: str) -> None: + print(msg, file=sys.stderr) + + +def write_halt_file(reason: str) -> None: + CONFIG_DIR.mkdir(parents=True, exist_ok=True) + HALT_FILE.write_text((reason + "\n") if reason else "") + + +def stop_watcher_service() -> None: + """Best-effort stop of the systemd watcher service. Failures are logged but not fatal.""" + try: + subprocess.run( + ["systemctl", "--user", "stop", "cross-agent-watch.path"], + capture_output=True, text=True, timeout=5, + ) + except (FileNotFoundError, subprocess.TimeoutExpired): + # Watcher service may not be installed — fine. + pass + + +def load_peers() -> dict: + if not PEERS_TOML.exists(): + return {} + try: + return tomllib.loads(PEERS_TOML.read_text()) + except (tomllib.TOMLDecodeError, OSError) as e: + err(f"cannot parse peers.toml: {e}") + return {} + + +def ssh_touch_halt(host: str, ssh_user: str | None, reason: str) -> tuple[bool, str]: + target = f"{ssh_user}@{host}" if ssh_user else host + # Build the remote command. Quote the reason carefully. + remote_cmd = ( + f"mkdir -p ~/.config/cross-agent-comms && " + f"printf %s {_sh_quote(reason)} > ~/.config/cross-agent-comms/HALT" + ) + try: + result = subprocess.run( + ["ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes", target, remote_cmd], + capture_output=True, text=True, timeout=10, + ) + except (FileNotFoundError, subprocess.TimeoutExpired): + return False, "ssh unavailable or timed out" + if result.returncode == 0: + return True, "HALT file written" + return False, (result.stderr.strip().splitlines() or [f"exit {result.returncode}"])[-1] + + +def _sh_quote(s: str) -> str: + return "'" + s.replace("'", "'\"'\"'") + "'" + + +def main() -> int: + parser = argparse.ArgumentParser(description="Halt all cross-agent comms on this machine (and optionally tailnet).") + parser.add_argument("reason", nargs="?", default="", help="Optional human-readable reason") + parser.add_argument("--tailnet", action="store_true", + help="Propagate HALT to every peer in peers.toml") + args = parser.parse_args() + + # Local halt. + write_halt_file(args.reason) + stop_watcher_service() + print("Halting locally ✓ (HALT file written)") + + if not args.tailnet: + print() + print(f"Halt active. Remove {HALT_FILE} or run cross-agent-resume to clear.") + print("Agent polling will stop within ~5 min (one cadence cycle).") + return EXIT_OK + + peers = load_peers().get("peers", {}) + if not peers: + print() + print("No peers configured in peers.toml — local-only halt complete.") + return EXIT_OK + + print() + successes = 1 # local already counted + failures = [] + for name, cfg in sorted(peers.items()): + host = cfg.get("host", name) + ssh_user = cfg.get("ssh_user") + ok, detail = ssh_touch_halt(host, ssh_user, args.reason) + marker = "✓" if ok else "✗" + print(f"Halting {host:<28} {marker} ({detail})") + if ok: + successes += 1 + else: + failures.append(f"{name} ({host}): {detail}") + + print() + total = len(peers) + 1 + if failures: + print(f"PARTIAL HALT: {successes}/{total} machines halted.") + for f in failures: + print(f" - {f}") + print("Resolve the failures or manually halt each machine.") + return EXIT_PARTIAL + print(f"Halt active across {total} machine(s).") + return EXIT_OK + + +if __name__ == "__main__": + sys.exit(main()) |
