diff options
| author | Craig Jennings <c@cjennings.net> | 2026-06-11 17:05:03 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-06-11 17:05:03 -0500 |
| commit | da93ffd91dea133963ffceaff24d41bc76b8ff93 (patch) | |
| tree | 6aac57d5eb712463a852c74e75150331be2298b1 /scripts/update-skills.py | |
| parent | 61e37f55c044ff7bbd41cb142ce9dfe232934216 (diff) | |
| download | rulesets-da93ffd91dea133963ffceaff24d41bc76b8ff93.tar.gz rulesets-da93ffd91dea133963ffceaff24d41bc76b8ff93.zip | |
feat(commands): /update-skills syncs forks with upstream via 3-way merge
Upstream releases fixes worth pulling into the forks (arch-decide, playwright-js, playwright-py) without losing our local modifications. Each fork now has a manifest at upstreams/<name>/ plus a committed baseline snapshot that is the 3-way merge base. scripts/update-skills.py classifies each file's drift and merges to stdout. The command owns per-file confirmation, per-hunk conflict prompts, and every target write.
I centralized manifests under upstreams/ instead of per-skill dotfile dirs because arch-decide is now two flat files in commands/ and can't carry one. A "files" map in its manifest handles the upstream rename of SKILL.md to arch-decide.md.
I seeded baselines from today's upstream HEADs, so pre-existing local modifications classify as local-only from here on. git merge-file signals hard errors as exit 255, which subprocess reports as positive. The guard treats anything 128 and up as an error so a binary-file failure isn't misread as a conflict.
Diffstat (limited to 'scripts/update-skills.py')
| -rwxr-xr-x | scripts/update-skills.py | 295 |
1 files changed, 295 insertions, 0 deletions
diff --git a/scripts/update-skills.py b/scripts/update-skills.py new file mode 100755 index 0000000..3645596 --- /dev/null +++ b/scripts/update-skills.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 +"""Keep forked skills and commands in sync with their upstreams. + +Each fork has a manifest at upstreams/<name>/manifest.json: + + url upstream GitHub (or any git) URL + ref branch or tag to track + subpath path inside the upstream repo ("" = repo root) + target repo-relative path the fork lives at + files optional map of upstream-relative -> target-relative + paths; when present only mapped files are tracked + (a key starting with "/" is upstream-repo-root-relative) + license upstream license identifier (informational) + last_synced_commit upstream commit of the last completed sync + +The committed baseline snapshot at upstreams/<name>/baseline/ mirrors the +*target* layout and is the 3-way merge base. The script never writes a fork's +target files: check classifies, merge-file merges to stdout; only bootstrap +and mark-synced write, and only to the manifest and baseline. + +Commands: + list show forks and sync state + bootstrap NAME snapshot upstream@ref as the baseline + check NAME [--json] clone upstream to cache, classify every file + merge-file NAME PATH 3-way merge one file to stdout (exit 1 = conflict) + mark-synced NAME refresh baseline + manifest from the checked cache +""" +from __future__ import annotations + +import argparse +import json +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path + +EXCLUDE_DIRS = {".git", "node_modules", "__pycache__", ".pytest_cache"} +EXCLUDE_SUFFIXES = {".pyc"} + + +def fail(msg: str, code: int = 2) -> None: + print(f"error: {msg}", file=sys.stderr) + sys.exit(code) + + +def walk_files(base: Path) -> list[str]: + """Relative paths of all files under base, skipping excluded dirs/suffixes.""" + out: list[str] = [] + if not base.is_dir(): + return out + stack = [base] + while stack: + d = stack.pop() + for p in sorted(d.iterdir()): + if p.is_dir(): + if p.name not in EXCLUDE_DIRS: + stack.append(p) + elif p.is_file() and p.suffix not in EXCLUDE_SUFFIXES: + out.append(str(p.relative_to(base))) + return sorted(out) + + +class Fork: + def __init__(self, root: Path, name: str): + self.root = root + self.name = name + self.dir = root / "upstreams" / name + self.manifest_path = self.dir / "manifest.json" + if not self.manifest_path.is_file(): + fail(f"no manifest for fork '{name}' at {self.manifest_path}") + m = json.loads(self.manifest_path.read_text()) + self.url: str = m["url"] + self.ref: str = m["ref"] + self.subpath: str = m.get("subpath", "") + self.target: Path = root / m["target"] + self.files: dict[str, str] | None = m.get("files") + self.manifest = m + self.baseline = self.dir / "baseline" + + def checkout(self, cache: Path) -> Path: + return cache / self.name + + def clone(self, cache: Path) -> Path: + co = self.checkout(cache) + if co.exists(): + shutil.rmtree(co) + co.parent.mkdir(parents=True, exist_ok=True) + r = subprocess.run( + ["git", "clone", "--quiet", "--depth", "1", + "--branch", self.ref, self.url, str(co)], + capture_output=True, text=True, + ) + if r.returncode != 0: + fail(f"could not clone {self.name} upstream from {self.url}: " + f"{r.stderr.strip()}") + return co + + def upstream_commit(self, cache: Path) -> str: + r = subprocess.run(["git", "-C", str(self.checkout(cache)), + "rev-parse", "HEAD"], + capture_output=True, text=True, check=True) + return r.stdout.strip() + + def upstream_files(self, co: Path) -> dict[str, Path]: + """Map of target-relative path -> absolute upstream source path.""" + src = co / self.subpath if self.subpath else co + if self.files is not None: + out = {} + for ukey, trel in self.files.items(): + upath = co / ukey[1:] if ukey.startswith("/") else src / ukey + if upath.is_file(): + out[trel] = upath + return out + return {rel: src / rel for rel in walk_files(src)} + + def target_files(self) -> dict[str, Path]: + if self.files is not None: + return {trel: self.target / trel for trel in self.files.values() + if (self.target / trel).is_file()} + return {rel: self.target / rel for rel in walk_files(self.target)} + + def baseline_files(self) -> dict[str, Path]: + return {rel: self.baseline / rel for rel in walk_files(self.baseline)} + + def save_manifest(self) -> None: + self.manifest_path.write_text(json.dumps(self.manifest, indent=2) + "\n") + + +def classify(fork: Fork, co: Path) -> list[dict[str, str]]: + up = fork.upstream_files(co) + tg = fork.target_files() + bl = fork.baseline_files() + have_baseline = fork.baseline.is_dir() + + def read(p: Path | None) -> bytes | None: + return p.read_bytes() if p is not None and p.is_file() else None + + rows = [] + for rel in sorted(set(up) | set(tg) | set(bl)): + u, t, b = read(up.get(rel)), read(tg.get(rel)), read(bl.get(rel)) + if not have_baseline: + if u is not None and t is not None: + status = "unchanged" if u == t else "no-baseline" + elif u is not None: + status = "upstream-new" + else: + status = "local-new" + elif u is None and b is None: + status = "local-new" + elif t is None and b is None: + status = "upstream-new" + elif u is None and b is not None: + status = "upstream-deleted" + elif t is None and b is not None: + status = "local-deleted" + elif u == b and t == b: + status = "unchanged" + elif u != b and t == b: + status = "upstream-changed" + elif u == b and t != b: + status = "local-only" + elif u == t: + status = "unchanged" # converged independently + else: + status = "both-changed" + rows.append({"path": rel, "status": status}) + return rows + + +def cmd_list(root: Path, _cache: Path, _args: argparse.Namespace) -> int: + updir = root / "upstreams" + manifests = sorted(updir.glob("*/manifest.json")) if updir.is_dir() else [] + if not manifests: + print("no forks registered (no upstreams/*/manifest.json)") + return 0 + for mp in manifests: + m = json.loads(mp.read_text()) + sha = m.get("last_synced_commit") + state = f"last synced {sha[:10]}" if sha else "never synced" + print(f"{m['name']}: {state} ({m['url']} @ {m['ref']})") + return 0 + + +def cmd_bootstrap(root: Path, cache: Path, args: argparse.Namespace) -> int: + fork = Fork(root, args.name) + co = fork.clone(cache) + up = fork.upstream_files(co) + if fork.baseline.exists(): + shutil.rmtree(fork.baseline) + for rel, src in up.items(): + dest = fork.baseline / rel + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dest) + sha = fork.upstream_commit(cache) + fork.manifest["last_synced_commit"] = sha + fork.save_manifest() + print(f"bootstrapped {fork.name} at {sha} ({len(up)} files)") + return 0 + + +def cmd_check(root: Path, cache: Path, args: argparse.Namespace) -> int: + fork = Fork(root, args.name) + co = fork.clone(cache) + rows = classify(fork, co) + sha = fork.upstream_commit(cache) + if args.json: + print(json.dumps({"name": fork.name, "upstream_commit": sha, + "files": rows}, indent=2)) + else: + print(f"{fork.name} @ upstream {sha}") + for r in rows: + print(f" {r['status']:<17} {r['path']}") + pending = sum(r["status"] != "unchanged" for r in rows) + print(f"{pending} file(s) need attention" if pending + else "everything in sync") + return 0 + + +def cmd_merge_file(root: Path, cache: Path, args: argparse.Namespace) -> int: + fork = Fork(root, args.name) + co = fork.checkout(cache) + if not co.is_dir(): + fail(f"no cached checkout for {fork.name} — run check first") + up = fork.upstream_files(co) + if args.path not in up: + fail(f"{args.path} not present upstream for {fork.name}") + local = fork.target / args.path + if not local.is_file(): + fail(f"{args.path} not present locally under {fork.target}") + base = fork.baseline / args.path + with tempfile.NamedTemporaryFile() as empty: + base_arg = str(base) if base.is_file() else empty.name + r = subprocess.run( + ["git", "merge-file", "--stdout", + "-L", "local", "-L", "baseline", "-L", "upstream", + str(local), base_arg, str(up[args.path])], + capture_output=True, text=True, + ) + # git merge-file exits with the conflict count, or a negative value on + # hard error — which subprocess reports as >=128 (255 observed for -1). + if r.returncode >= 128 or r.returncode < 0: + fail(f"git merge-file failed on {args.path}: {r.stderr.strip()}") + sys.stdout.write(r.stdout) + return 0 if r.returncode == 0 else 1 + + +def cmd_mark_synced(root: Path, cache: Path, args: argparse.Namespace) -> int: + fork = Fork(root, args.name) + co = fork.checkout(cache) + if not co.is_dir(): + fail(f"no cached checkout for {fork.name} — run check first") + up = fork.upstream_files(co) + if fork.baseline.exists(): + shutil.rmtree(fork.baseline) + for rel, src in up.items(): + dest = fork.baseline / rel + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dest) + sha = fork.upstream_commit(cache) + fork.manifest["last_synced_commit"] = sha + fork.save_manifest() + print(f"{fork.name} synced at {sha}") + return 0 + + +def main() -> int: + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument("--root", type=Path, + default=Path(__file__).resolve().parents[1], + help="repo root (default: this script's repo)") + ap.add_argument("--cache", type=Path, + default=Path(tempfile.gettempdir()) / "update-skills", + help="dir for upstream checkouts") + sub = ap.add_subparsers(dest="cmd", required=True) + sub.add_parser("list") + p = sub.add_parser("bootstrap") + p.add_argument("name") + p = sub.add_parser("check") + p.add_argument("name") + p.add_argument("--json", action="store_true") + p = sub.add_parser("merge-file") + p.add_argument("name") + p.add_argument("path") + p = sub.add_parser("mark-synced") + p.add_argument("name") + args = ap.parse_args() + handlers = {"list": cmd_list, "bootstrap": cmd_bootstrap, + "check": cmd_check, "merge-file": cmd_merge_file, + "mark-synced": cmd_mark_synced} + return handlers[args.cmd](args.root.resolve(), args.cache, args) + + +if __name__ == "__main__": + sys.exit(main()) |
