aboutsummaryrefslogtreecommitdiff
path: root/scripts/update-skills.py
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-06-11 17:05:03 -0500
committerCraig Jennings <c@cjennings.net>2026-06-11 17:05:03 -0500
commitda93ffd91dea133963ffceaff24d41bc76b8ff93 (patch)
tree6aac57d5eb712463a852c74e75150331be2298b1 /scripts/update-skills.py
parent61e37f55c044ff7bbd41cb142ce9dfe232934216 (diff)
downloadrulesets-da93ffd91dea133963ffceaff24d41bc76b8ff93.tar.gz
rulesets-da93ffd91dea133963ffceaff24d41bc76b8ff93.zip
feat(commands): /update-skills syncs forks with upstream via 3-way merge
Upstream releases fixes worth pulling into the forks (arch-decide, playwright-js, playwright-py) without losing our local modifications. Each fork now has a manifest at upstreams/<name>/ plus a committed baseline snapshot that is the 3-way merge base. scripts/update-skills.py classifies each file's drift and merges to stdout. The command owns per-file confirmation, per-hunk conflict prompts, and every target write. I centralized manifests under upstreams/ instead of per-skill dotfile dirs because arch-decide is now two flat files in commands/ and can't carry one. A "files" map in its manifest handles the upstream rename of SKILL.md to arch-decide.md. I seeded baselines from today's upstream HEADs, so pre-existing local modifications classify as local-only from here on. git merge-file signals hard errors as exit 255, which subprocess reports as positive. The guard treats anything 128 and up as an error so a binary-file failure isn't misread as a conflict.
Diffstat (limited to 'scripts/update-skills.py')
-rwxr-xr-xscripts/update-skills.py295
1 files changed, 295 insertions, 0 deletions
diff --git a/scripts/update-skills.py b/scripts/update-skills.py
new file mode 100755
index 0000000..3645596
--- /dev/null
+++ b/scripts/update-skills.py
@@ -0,0 +1,295 @@
+#!/usr/bin/env python3
+"""Keep forked skills and commands in sync with their upstreams.
+
+Each fork has a manifest at upstreams/<name>/manifest.json:
+
+ url upstream GitHub (or any git) URL
+ ref branch or tag to track
+ subpath path inside the upstream repo ("" = repo root)
+ target repo-relative path the fork lives at
+ files optional map of upstream-relative -> target-relative
+ paths; when present only mapped files are tracked
+ (a key starting with "/" is upstream-repo-root-relative)
+ license upstream license identifier (informational)
+ last_synced_commit upstream commit of the last completed sync
+
+The committed baseline snapshot at upstreams/<name>/baseline/ mirrors the
+*target* layout and is the 3-way merge base. The script never writes a fork's
+target files: check classifies, merge-file merges to stdout; only bootstrap
+and mark-synced write, and only to the manifest and baseline.
+
+Commands:
+ list show forks and sync state
+ bootstrap NAME snapshot upstream@ref as the baseline
+ check NAME [--json] clone upstream to cache, classify every file
+ merge-file NAME PATH 3-way merge one file to stdout (exit 1 = conflict)
+ mark-synced NAME refresh baseline + manifest from the checked cache
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import shutil
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+EXCLUDE_DIRS = {".git", "node_modules", "__pycache__", ".pytest_cache"}
+EXCLUDE_SUFFIXES = {".pyc"}
+
+
+def fail(msg: str, code: int = 2) -> None:
+ print(f"error: {msg}", file=sys.stderr)
+ sys.exit(code)
+
+
+def walk_files(base: Path) -> list[str]:
+ """Relative paths of all files under base, skipping excluded dirs/suffixes."""
+ out: list[str] = []
+ if not base.is_dir():
+ return out
+ stack = [base]
+ while stack:
+ d = stack.pop()
+ for p in sorted(d.iterdir()):
+ if p.is_dir():
+ if p.name not in EXCLUDE_DIRS:
+ stack.append(p)
+ elif p.is_file() and p.suffix not in EXCLUDE_SUFFIXES:
+ out.append(str(p.relative_to(base)))
+ return sorted(out)
+
+
+class Fork:
+ def __init__(self, root: Path, name: str):
+ self.root = root
+ self.name = name
+ self.dir = root / "upstreams" / name
+ self.manifest_path = self.dir / "manifest.json"
+ if not self.manifest_path.is_file():
+ fail(f"no manifest for fork '{name}' at {self.manifest_path}")
+ m = json.loads(self.manifest_path.read_text())
+ self.url: str = m["url"]
+ self.ref: str = m["ref"]
+ self.subpath: str = m.get("subpath", "")
+ self.target: Path = root / m["target"]
+ self.files: dict[str, str] | None = m.get("files")
+ self.manifest = m
+ self.baseline = self.dir / "baseline"
+
+ def checkout(self, cache: Path) -> Path:
+ return cache / self.name
+
+ def clone(self, cache: Path) -> Path:
+ co = self.checkout(cache)
+ if co.exists():
+ shutil.rmtree(co)
+ co.parent.mkdir(parents=True, exist_ok=True)
+ r = subprocess.run(
+ ["git", "clone", "--quiet", "--depth", "1",
+ "--branch", self.ref, self.url, str(co)],
+ capture_output=True, text=True,
+ )
+ if r.returncode != 0:
+ fail(f"could not clone {self.name} upstream from {self.url}: "
+ f"{r.stderr.strip()}")
+ return co
+
+ def upstream_commit(self, cache: Path) -> str:
+ r = subprocess.run(["git", "-C", str(self.checkout(cache)),
+ "rev-parse", "HEAD"],
+ capture_output=True, text=True, check=True)
+ return r.stdout.strip()
+
+ def upstream_files(self, co: Path) -> dict[str, Path]:
+ """Map of target-relative path -> absolute upstream source path."""
+ src = co / self.subpath if self.subpath else co
+ if self.files is not None:
+ out = {}
+ for ukey, trel in self.files.items():
+ upath = co / ukey[1:] if ukey.startswith("/") else src / ukey
+ if upath.is_file():
+ out[trel] = upath
+ return out
+ return {rel: src / rel for rel in walk_files(src)}
+
+ def target_files(self) -> dict[str, Path]:
+ if self.files is not None:
+ return {trel: self.target / trel for trel in self.files.values()
+ if (self.target / trel).is_file()}
+ return {rel: self.target / rel for rel in walk_files(self.target)}
+
+ def baseline_files(self) -> dict[str, Path]:
+ return {rel: self.baseline / rel for rel in walk_files(self.baseline)}
+
+ def save_manifest(self) -> None:
+ self.manifest_path.write_text(json.dumps(self.manifest, indent=2) + "\n")
+
+
+def classify(fork: Fork, co: Path) -> list[dict[str, str]]:
+ up = fork.upstream_files(co)
+ tg = fork.target_files()
+ bl = fork.baseline_files()
+ have_baseline = fork.baseline.is_dir()
+
+ def read(p: Path | None) -> bytes | None:
+ return p.read_bytes() if p is not None and p.is_file() else None
+
+ rows = []
+ for rel in sorted(set(up) | set(tg) | set(bl)):
+ u, t, b = read(up.get(rel)), read(tg.get(rel)), read(bl.get(rel))
+ if not have_baseline:
+ if u is not None and t is not None:
+ status = "unchanged" if u == t else "no-baseline"
+ elif u is not None:
+ status = "upstream-new"
+ else:
+ status = "local-new"
+ elif u is None and b is None:
+ status = "local-new"
+ elif t is None and b is None:
+ status = "upstream-new"
+ elif u is None and b is not None:
+ status = "upstream-deleted"
+ elif t is None and b is not None:
+ status = "local-deleted"
+ elif u == b and t == b:
+ status = "unchanged"
+ elif u != b and t == b:
+ status = "upstream-changed"
+ elif u == b and t != b:
+ status = "local-only"
+ elif u == t:
+ status = "unchanged" # converged independently
+ else:
+ status = "both-changed"
+ rows.append({"path": rel, "status": status})
+ return rows
+
+
+def cmd_list(root: Path, _cache: Path, _args: argparse.Namespace) -> int:
+ updir = root / "upstreams"
+ manifests = sorted(updir.glob("*/manifest.json")) if updir.is_dir() else []
+ if not manifests:
+ print("no forks registered (no upstreams/*/manifest.json)")
+ return 0
+ for mp in manifests:
+ m = json.loads(mp.read_text())
+ sha = m.get("last_synced_commit")
+ state = f"last synced {sha[:10]}" if sha else "never synced"
+ print(f"{m['name']}: {state} ({m['url']} @ {m['ref']})")
+ return 0
+
+
+def cmd_bootstrap(root: Path, cache: Path, args: argparse.Namespace) -> int:
+ fork = Fork(root, args.name)
+ co = fork.clone(cache)
+ up = fork.upstream_files(co)
+ if fork.baseline.exists():
+ shutil.rmtree(fork.baseline)
+ for rel, src in up.items():
+ dest = fork.baseline / rel
+ dest.parent.mkdir(parents=True, exist_ok=True)
+ shutil.copy2(src, dest)
+ sha = fork.upstream_commit(cache)
+ fork.manifest["last_synced_commit"] = sha
+ fork.save_manifest()
+ print(f"bootstrapped {fork.name} at {sha} ({len(up)} files)")
+ return 0
+
+
+def cmd_check(root: Path, cache: Path, args: argparse.Namespace) -> int:
+ fork = Fork(root, args.name)
+ co = fork.clone(cache)
+ rows = classify(fork, co)
+ sha = fork.upstream_commit(cache)
+ if args.json:
+ print(json.dumps({"name": fork.name, "upstream_commit": sha,
+ "files": rows}, indent=2))
+ else:
+ print(f"{fork.name} @ upstream {sha}")
+ for r in rows:
+ print(f" {r['status']:<17} {r['path']}")
+ pending = sum(r["status"] != "unchanged" for r in rows)
+ print(f"{pending} file(s) need attention" if pending
+ else "everything in sync")
+ return 0
+
+
+def cmd_merge_file(root: Path, cache: Path, args: argparse.Namespace) -> int:
+ fork = Fork(root, args.name)
+ co = fork.checkout(cache)
+ if not co.is_dir():
+ fail(f"no cached checkout for {fork.name} — run check first")
+ up = fork.upstream_files(co)
+ if args.path not in up:
+ fail(f"{args.path} not present upstream for {fork.name}")
+ local = fork.target / args.path
+ if not local.is_file():
+ fail(f"{args.path} not present locally under {fork.target}")
+ base = fork.baseline / args.path
+ with tempfile.NamedTemporaryFile() as empty:
+ base_arg = str(base) if base.is_file() else empty.name
+ r = subprocess.run(
+ ["git", "merge-file", "--stdout",
+ "-L", "local", "-L", "baseline", "-L", "upstream",
+ str(local), base_arg, str(up[args.path])],
+ capture_output=True, text=True,
+ )
+ # git merge-file exits with the conflict count, or a negative value on
+ # hard error — which subprocess reports as >=128 (255 observed for -1).
+ if r.returncode >= 128 or r.returncode < 0:
+ fail(f"git merge-file failed on {args.path}: {r.stderr.strip()}")
+ sys.stdout.write(r.stdout)
+ return 0 if r.returncode == 0 else 1
+
+
+def cmd_mark_synced(root: Path, cache: Path, args: argparse.Namespace) -> int:
+ fork = Fork(root, args.name)
+ co = fork.checkout(cache)
+ if not co.is_dir():
+ fail(f"no cached checkout for {fork.name} — run check first")
+ up = fork.upstream_files(co)
+ if fork.baseline.exists():
+ shutil.rmtree(fork.baseline)
+ for rel, src in up.items():
+ dest = fork.baseline / rel
+ dest.parent.mkdir(parents=True, exist_ok=True)
+ shutil.copy2(src, dest)
+ sha = fork.upstream_commit(cache)
+ fork.manifest["last_synced_commit"] = sha
+ fork.save_manifest()
+ print(f"{fork.name} synced at {sha}")
+ return 0
+
+
+def main() -> int:
+ ap = argparse.ArgumentParser(description=__doc__)
+ ap.add_argument("--root", type=Path,
+ default=Path(__file__).resolve().parents[1],
+ help="repo root (default: this script's repo)")
+ ap.add_argument("--cache", type=Path,
+ default=Path(tempfile.gettempdir()) / "update-skills",
+ help="dir for upstream checkouts")
+ sub = ap.add_subparsers(dest="cmd", required=True)
+ sub.add_parser("list")
+ p = sub.add_parser("bootstrap")
+ p.add_argument("name")
+ p = sub.add_parser("check")
+ p.add_argument("name")
+ p.add_argument("--json", action="store_true")
+ p = sub.add_parser("merge-file")
+ p.add_argument("name")
+ p.add_argument("path")
+ p = sub.add_parser("mark-synced")
+ p.add_argument("name")
+ args = ap.parse_args()
+ handlers = {"list": cmd_list, "bootstrap": cmd_bootstrap,
+ "check": cmd_check, "merge-file": cmd_merge_file,
+ "mark-synced": cmd_mark_synced}
+ return handlers[args.cmd](args.root.resolve(), args.cache, args)
+
+
+if __name__ == "__main__":
+ sys.exit(main())