aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/tests/update-skills.bats299
-rwxr-xr-xscripts/update-skills.py295
2 files changed, 594 insertions, 0 deletions
diff --git a/scripts/tests/update-skills.bats b/scripts/tests/update-skills.bats
new file mode 100644
index 0000000..d74da1c
--- /dev/null
+++ b/scripts/tests/update-skills.bats
@@ -0,0 +1,299 @@
+#!/usr/bin/env bats
+# update-skills.py keeps forked skills/commands in sync with their upstreams
+# via per-fork manifests (upstreams/<name>/manifest.json), a committed baseline
+# snapshot (upstreams/<name>/baseline/), and 3-way merges against it. The
+# script is read-only against fork targets: check classifies, merge-file
+# merges to stdout; only bootstrap and mark-synced write (manifest + baseline).
+
+setup() {
+ REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../.." && pwd)"
+ SCRIPT="$REPO_ROOT/scripts/update-skills.py"
+ TMP="$(mktemp -d)"
+ CACHE="$TMP/cache"
+}
+
+teardown() {
+ rm -rf "$TMP"
+}
+
+# --- fixture helpers -------------------------------------------------------
+
+git_up() {
+ git -C "$TMP/up" -c user.name=test -c user.email=test@test "$@"
+}
+
+make_upstream() {
+ mkdir -p "$TMP/up/skills/demo"
+ git -C "$TMP/up" init -q -b main
+ printf 'line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\n' \
+ > "$TMP/up/skills/demo/SKILL.md"
+ printf 'def helper():\n return 1\n' > "$TMP/up/skills/demo/helper.py"
+ git_up add -A
+ git_up commit -qm "initial"
+}
+
+make_repo() {
+ mkdir -p "$TMP/root/upstreams/demo" "$TMP/root/demo-skill"
+ cp "$TMP/up/skills/demo/SKILL.md" "$TMP/up/skills/demo/helper.py" \
+ "$TMP/root/demo-skill/"
+ cat > "$TMP/root/upstreams/demo/manifest.json" <<EOF
+{
+ "name": "demo",
+ "url": "file://$TMP/up",
+ "ref": "main",
+ "subpath": "skills/demo",
+ "target": "demo-skill",
+ "license": "MIT",
+ "last_synced_commit": null
+}
+EOF
+}
+
+run_us() {
+ run python3 "$SCRIPT" --root "$TMP/root" --cache "$CACHE" "$@"
+}
+
+bootstrap_demo() {
+ python3 "$SCRIPT" --root "$TMP/root" --cache "$CACHE" bootstrap demo
+}
+
+# --- list ------------------------------------------------------------------
+
+@test "list shows fork with never-synced marker" {
+ make_upstream
+ make_repo
+ run_us list
+ [ "$status" -eq 0 ]
+ [[ "$output" == *"demo"* ]]
+ [[ "$output" == *"never synced"* ]]
+}
+
+# --- bootstrap -------------------------------------------------------------
+
+@test "bootstrap snapshots baseline and records upstream commit" {
+ make_upstream
+ make_repo
+ run_us bootstrap demo
+ [ "$status" -eq 0 ]
+ [ -f "$TMP/root/upstreams/demo/baseline/SKILL.md" ]
+ [ -f "$TMP/root/upstreams/demo/baseline/helper.py" ]
+ sha=$(git_up rev-parse HEAD)
+ grep -q "$sha" "$TMP/root/upstreams/demo/manifest.json"
+}
+
+# --- check classification --------------------------------------------------
+
+@test "check classifies identical tree as unchanged" {
+ make_upstream
+ make_repo
+ bootstrap_demo
+ run_us check demo
+ [ "$status" -eq 0 ]
+ [[ "$output" == *"unchanged"* ]]
+ [[ "$output" != *"both-changed"* ]]
+ [[ "$output" != *"local-only"* ]]
+ [[ "$output" != *"upstream-changed"* ]]
+}
+
+@test "check classifies upstream-changed after upstream edit" {
+ make_upstream
+ make_repo
+ bootstrap_demo
+ sed -i 's/line1/line1 upstream edit/' "$TMP/up/skills/demo/SKILL.md"
+ git_up commit -qam "upstream edit"
+ run_us check demo
+ [ "$status" -eq 0 ]
+ [[ "$output" == *"upstream-changed"*"SKILL.md"* ]]
+}
+
+@test "check classifies local-only after target edit" {
+ make_upstream
+ make_repo
+ bootstrap_demo
+ sed -i 's/line8/line8 local edit/' "$TMP/root/demo-skill/SKILL.md"
+ run_us check demo
+ [ "$status" -eq 0 ]
+ [[ "$output" == *"local-only"*"SKILL.md"* ]]
+}
+
+@test "check classifies both-changed when both sides edited" {
+ make_upstream
+ make_repo
+ bootstrap_demo
+ sed -i 's/line1/line1 upstream edit/' "$TMP/up/skills/demo/SKILL.md"
+ git_up commit -qam "upstream edit"
+ sed -i 's/line8/line8 local edit/' "$TMP/root/demo-skill/SKILL.md"
+ run_us check demo
+ [ "$status" -eq 0 ]
+ [[ "$output" == *"both-changed"*"SKILL.md"* ]]
+}
+
+@test "check classifies upstream-new, local-new, and upstream-deleted" {
+ make_upstream
+ make_repo
+ bootstrap_demo
+ echo "new upstream file" > "$TMP/up/skills/demo/new-up.md"
+ git_up rm -q skills/demo/helper.py
+ git_up add -A
+ git_up commit -qm "add one, delete one"
+ echo "new local file" > "$TMP/root/demo-skill/new-local.md"
+ run_us check demo
+ [ "$status" -eq 0 ]
+ [[ "$output" == *"upstream-new"*"new-up.md"* ]]
+ [[ "$output" == *"local-new"*"new-local.md"* ]]
+ [[ "$output" == *"upstream-deleted"*"helper.py"* ]]
+}
+
+@test "check --json emits valid JSON with upstream commit and files" {
+ make_upstream
+ make_repo
+ bootstrap_demo
+ run_us check demo --json
+ [ "$status" -eq 0 ]
+ echo "$output" | python3 -c '
+import json, sys
+d = json.load(sys.stdin)
+assert d["name"] == "demo"
+assert len(d["upstream_commit"]) == 40
+assert any(f["path"] == "SKILL.md" for f in d["files"])
+'
+}
+
+@test "check without baseline degrades to no-baseline statuses" {
+ make_upstream
+ make_repo
+ sed -i 's/line8/line8 local edit/' "$TMP/root/demo-skill/SKILL.md"
+ run_us check demo
+ [ "$status" -eq 0 ]
+ [[ "$output" == *"no-baseline"* ]]
+}
+
+# --- merge-file ------------------------------------------------------------
+
+@test "merge-file merges non-overlapping edits cleanly" {
+ make_upstream
+ make_repo
+ bootstrap_demo
+ sed -i 's/line1/line1 upstream edit/' "$TMP/up/skills/demo/SKILL.md"
+ git_up commit -qam "upstream edit"
+ sed -i 's/line8/line8 local edit/' "$TMP/root/demo-skill/SKILL.md"
+ python3 "$SCRIPT" --root "$TMP/root" --cache "$CACHE" check demo > /dev/null
+ run_us merge-file demo SKILL.md
+ [ "$status" -eq 0 ]
+ [[ "$output" == *"line1 upstream edit"* ]]
+ [[ "$output" == *"line8 local edit"* ]]
+ [[ "$output" != *"<<<<<<<"* ]]
+}
+
+@test "merge-file emits conflict markers on overlapping edits" {
+ make_upstream
+ make_repo
+ bootstrap_demo
+ sed -i 's/line4/line4 upstream edit/' "$TMP/up/skills/demo/SKILL.md"
+ git_up commit -qam "upstream edit"
+ sed -i 's/line4/line4 local edit/' "$TMP/root/demo-skill/SKILL.md"
+ python3 "$SCRIPT" --root "$TMP/root" --cache "$CACHE" check demo > /dev/null
+ run_us merge-file demo SKILL.md
+ [ "$status" -eq 1 ]
+ [[ "$output" == *"<<<<<<<"* ]]
+ [[ "$output" == *"line4 upstream edit"* ]]
+ [[ "$output" == *"line4 local edit"* ]]
+}
+
+@test "merge-file reports a hard git error instead of masking it as a conflict" {
+ make_upstream
+ make_repo
+ bootstrap_demo
+ printf 'up\x00stream' > "$TMP/up/skills/demo/SKILL.md"
+ git_up commit -qam "binary upstream"
+ printf 'lo\x00cal' > "$TMP/root/demo-skill/SKILL.md"
+ python3 "$SCRIPT" --root "$TMP/root" --cache "$CACHE" check demo > /dev/null
+ run_us merge-file demo SKILL.md
+ [ "$status" -eq 2 ]
+ [[ "$output" == *"merge-file failed"* ]]
+}
+
+# --- mark-synced -----------------------------------------------------------
+
+@test "mark-synced refreshes baseline and last_synced_commit" {
+ make_upstream
+ make_repo
+ bootstrap_demo
+ sed -i 's/line1/line1 upstream edit/' "$TMP/up/skills/demo/SKILL.md"
+ git_up commit -qam "upstream edit"
+ sha=$(git_up rev-parse HEAD)
+ python3 "$SCRIPT" --root "$TMP/root" --cache "$CACHE" check demo > /dev/null
+ run_us mark-synced demo
+ [ "$status" -eq 0 ]
+ grep -q "line1 upstream edit" "$TMP/root/upstreams/demo/baseline/SKILL.md"
+ grep -q "$sha" "$TMP/root/upstreams/demo/manifest.json"
+}
+
+# --- files map (the arch-decide shape) --------------------------------------
+
+@test "files map restricts tracking to mapped files under target paths" {
+ make_upstream
+ make_repo
+ mkdir -p "$TMP/root/commands"
+ cp "$TMP/up/skills/demo/SKILL.md" "$TMP/root/commands/demo.md"
+ cat > "$TMP/root/upstreams/demo/manifest.json" <<EOF
+{
+ "name": "demo",
+ "url": "file://$TMP/up",
+ "ref": "main",
+ "subpath": "skills/demo",
+ "target": "commands",
+ "files": {"SKILL.md": "demo.md"},
+ "license": "MIT",
+ "last_synced_commit": null
+}
+EOF
+ bootstrap_demo
+ [ -f "$TMP/root/upstreams/demo/baseline/demo.md" ]
+ run_us check demo
+ [ "$status" -eq 0 ]
+ [[ "$output" == *"demo.md"* ]]
+ # helper.py exists upstream but is unmapped — must not be tracked
+ [[ "$output" != *"helper.py"* ]]
+}
+
+# --- exclusions --------------------------------------------------------------
+
+@test "dependency and cache dirs excluded from classification" {
+ make_upstream
+ make_repo
+ bootstrap_demo
+ mkdir -p "$TMP/root/demo-skill/node_modules/x" "$TMP/root/demo-skill/__pycache__"
+ echo "x" > "$TMP/root/demo-skill/node_modules/x/x.js"
+ echo "x" > "$TMP/root/demo-skill/__pycache__/y.pyc"
+ run_us check demo
+ [ "$status" -eq 0 ]
+ [[ "$output" != *"node_modules"* ]]
+ [[ "$output" != *"__pycache__"* ]]
+}
+
+# --- errors ------------------------------------------------------------------
+
+@test "unknown fork errors and names it" {
+ make_upstream
+ make_repo
+ run_us check nosuchfork
+ [ "$status" -ne 0 ]
+ [[ "$output" == *"nosuchfork"* ]]
+}
+
+@test "unreachable upstream degrades with a clear error" {
+ make_upstream
+ make_repo
+ python3 - "$TMP/root/upstreams/demo/manifest.json" <<'EOF'
+import json, sys
+p = sys.argv[1]
+d = json.load(open(p))
+d["url"] = "file:///nonexistent/upstream/repo"
+json.dump(d, open(p, "w"), indent=2)
+EOF
+ run_us check demo
+ [ "$status" -ne 0 ]
+ [[ "$output" == *"demo"* ]]
+ [[ "$output" == *"clone"* ]]
+}
diff --git a/scripts/update-skills.py b/scripts/update-skills.py
new file mode 100755
index 0000000..3645596
--- /dev/null
+++ b/scripts/update-skills.py
@@ -0,0 +1,295 @@
+#!/usr/bin/env python3
+"""Keep forked skills and commands in sync with their upstreams.
+
+Each fork has a manifest at upstreams/<name>/manifest.json:
+
+ url upstream GitHub (or any git) URL
+ ref branch or tag to track
+ subpath path inside the upstream repo ("" = repo root)
+ target repo-relative path the fork lives at
+ files optional map of upstream-relative -> target-relative
+ paths; when present only mapped files are tracked
+ (a key starting with "/" is upstream-repo-root-relative)
+ license upstream license identifier (informational)
+ last_synced_commit upstream commit of the last completed sync
+
+The committed baseline snapshot at upstreams/<name>/baseline/ mirrors the
+*target* layout and is the 3-way merge base. The script never writes a fork's
+target files: check classifies, merge-file merges to stdout; only bootstrap
+and mark-synced write, and only to the manifest and baseline.
+
+Commands:
+ list show forks and sync state
+ bootstrap NAME snapshot upstream@ref as the baseline
+ check NAME [--json] clone upstream to cache, classify every file
+ merge-file NAME PATH 3-way merge one file to stdout (exit 1 = conflict)
+ mark-synced NAME refresh baseline + manifest from the checked cache
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import shutil
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+EXCLUDE_DIRS = {".git", "node_modules", "__pycache__", ".pytest_cache"}
+EXCLUDE_SUFFIXES = {".pyc"}
+
+
+def fail(msg: str, code: int = 2) -> None:
+ print(f"error: {msg}", file=sys.stderr)
+ sys.exit(code)
+
+
+def walk_files(base: Path) -> list[str]:
+ """Relative paths of all files under base, skipping excluded dirs/suffixes."""
+ out: list[str] = []
+ if not base.is_dir():
+ return out
+ stack = [base]
+ while stack:
+ d = stack.pop()
+ for p in sorted(d.iterdir()):
+ if p.is_dir():
+ if p.name not in EXCLUDE_DIRS:
+ stack.append(p)
+ elif p.is_file() and p.suffix not in EXCLUDE_SUFFIXES:
+ out.append(str(p.relative_to(base)))
+ return sorted(out)
+
+
+class Fork:
+ def __init__(self, root: Path, name: str):
+ self.root = root
+ self.name = name
+ self.dir = root / "upstreams" / name
+ self.manifest_path = self.dir / "manifest.json"
+ if not self.manifest_path.is_file():
+ fail(f"no manifest for fork '{name}' at {self.manifest_path}")
+ m = json.loads(self.manifest_path.read_text())
+ self.url: str = m["url"]
+ self.ref: str = m["ref"]
+ self.subpath: str = m.get("subpath", "")
+ self.target: Path = root / m["target"]
+ self.files: dict[str, str] | None = m.get("files")
+ self.manifest = m
+ self.baseline = self.dir / "baseline"
+
+ def checkout(self, cache: Path) -> Path:
+ return cache / self.name
+
+ def clone(self, cache: Path) -> Path:
+ co = self.checkout(cache)
+ if co.exists():
+ shutil.rmtree(co)
+ co.parent.mkdir(parents=True, exist_ok=True)
+ r = subprocess.run(
+ ["git", "clone", "--quiet", "--depth", "1",
+ "--branch", self.ref, self.url, str(co)],
+ capture_output=True, text=True,
+ )
+ if r.returncode != 0:
+ fail(f"could not clone {self.name} upstream from {self.url}: "
+ f"{r.stderr.strip()}")
+ return co
+
+ def upstream_commit(self, cache: Path) -> str:
+ r = subprocess.run(["git", "-C", str(self.checkout(cache)),
+ "rev-parse", "HEAD"],
+ capture_output=True, text=True, check=True)
+ return r.stdout.strip()
+
+ def upstream_files(self, co: Path) -> dict[str, Path]:
+ """Map of target-relative path -> absolute upstream source path."""
+ src = co / self.subpath if self.subpath else co
+ if self.files is not None:
+ out = {}
+ for ukey, trel in self.files.items():
+ upath = co / ukey[1:] if ukey.startswith("/") else src / ukey
+ if upath.is_file():
+ out[trel] = upath
+ return out
+ return {rel: src / rel for rel in walk_files(src)}
+
+ def target_files(self) -> dict[str, Path]:
+ if self.files is not None:
+ return {trel: self.target / trel for trel in self.files.values()
+ if (self.target / trel).is_file()}
+ return {rel: self.target / rel for rel in walk_files(self.target)}
+
+ def baseline_files(self) -> dict[str, Path]:
+ return {rel: self.baseline / rel for rel in walk_files(self.baseline)}
+
+ def save_manifest(self) -> None:
+ self.manifest_path.write_text(json.dumps(self.manifest, indent=2) + "\n")
+
+
+def classify(fork: Fork, co: Path) -> list[dict[str, str]]:
+ up = fork.upstream_files(co)
+ tg = fork.target_files()
+ bl = fork.baseline_files()
+ have_baseline = fork.baseline.is_dir()
+
+ def read(p: Path | None) -> bytes | None:
+ return p.read_bytes() if p is not None and p.is_file() else None
+
+ rows = []
+ for rel in sorted(set(up) | set(tg) | set(bl)):
+ u, t, b = read(up.get(rel)), read(tg.get(rel)), read(bl.get(rel))
+ if not have_baseline:
+ if u is not None and t is not None:
+ status = "unchanged" if u == t else "no-baseline"
+ elif u is not None:
+ status = "upstream-new"
+ else:
+ status = "local-new"
+ elif u is None and b is None:
+ status = "local-new"
+ elif t is None and b is None:
+ status = "upstream-new"
+ elif u is None and b is not None:
+ status = "upstream-deleted"
+ elif t is None and b is not None:
+ status = "local-deleted"
+ elif u == b and t == b:
+ status = "unchanged"
+ elif u != b and t == b:
+ status = "upstream-changed"
+ elif u == b and t != b:
+ status = "local-only"
+ elif u == t:
+ status = "unchanged" # converged independently
+ else:
+ status = "both-changed"
+ rows.append({"path": rel, "status": status})
+ return rows
+
+
+def cmd_list(root: Path, _cache: Path, _args: argparse.Namespace) -> int:
+ updir = root / "upstreams"
+ manifests = sorted(updir.glob("*/manifest.json")) if updir.is_dir() else []
+ if not manifests:
+ print("no forks registered (no upstreams/*/manifest.json)")
+ return 0
+ for mp in manifests:
+ m = json.loads(mp.read_text())
+ sha = m.get("last_synced_commit")
+ state = f"last synced {sha[:10]}" if sha else "never synced"
+ print(f"{m['name']}: {state} ({m['url']} @ {m['ref']})")
+ return 0
+
+
+def cmd_bootstrap(root: Path, cache: Path, args: argparse.Namespace) -> int:
+ fork = Fork(root, args.name)
+ co = fork.clone(cache)
+ up = fork.upstream_files(co)
+ if fork.baseline.exists():
+ shutil.rmtree(fork.baseline)
+ for rel, src in up.items():
+ dest = fork.baseline / rel
+ dest.parent.mkdir(parents=True, exist_ok=True)
+ shutil.copy2(src, dest)
+ sha = fork.upstream_commit(cache)
+ fork.manifest["last_synced_commit"] = sha
+ fork.save_manifest()
+ print(f"bootstrapped {fork.name} at {sha} ({len(up)} files)")
+ return 0
+
+
+def cmd_check(root: Path, cache: Path, args: argparse.Namespace) -> int:
+ fork = Fork(root, args.name)
+ co = fork.clone(cache)
+ rows = classify(fork, co)
+ sha = fork.upstream_commit(cache)
+ if args.json:
+ print(json.dumps({"name": fork.name, "upstream_commit": sha,
+ "files": rows}, indent=2))
+ else:
+ print(f"{fork.name} @ upstream {sha}")
+ for r in rows:
+ print(f" {r['status']:<17} {r['path']}")
+ pending = sum(r["status"] != "unchanged" for r in rows)
+ print(f"{pending} file(s) need attention" if pending
+ else "everything in sync")
+ return 0
+
+
+def cmd_merge_file(root: Path, cache: Path, args: argparse.Namespace) -> int:
+ fork = Fork(root, args.name)
+ co = fork.checkout(cache)
+ if not co.is_dir():
+ fail(f"no cached checkout for {fork.name} — run check first")
+ up = fork.upstream_files(co)
+ if args.path not in up:
+ fail(f"{args.path} not present upstream for {fork.name}")
+ local = fork.target / args.path
+ if not local.is_file():
+ fail(f"{args.path} not present locally under {fork.target}")
+ base = fork.baseline / args.path
+ with tempfile.NamedTemporaryFile() as empty:
+ base_arg = str(base) if base.is_file() else empty.name
+ r = subprocess.run(
+ ["git", "merge-file", "--stdout",
+ "-L", "local", "-L", "baseline", "-L", "upstream",
+ str(local), base_arg, str(up[args.path])],
+ capture_output=True, text=True,
+ )
+ # git merge-file exits with the conflict count, or a negative value on
+ # hard error — which subprocess reports as >=128 (255 observed for -1).
+ if r.returncode >= 128 or r.returncode < 0:
+ fail(f"git merge-file failed on {args.path}: {r.stderr.strip()}")
+ sys.stdout.write(r.stdout)
+ return 0 if r.returncode == 0 else 1
+
+
+def cmd_mark_synced(root: Path, cache: Path, args: argparse.Namespace) -> int:
+ fork = Fork(root, args.name)
+ co = fork.checkout(cache)
+ if not co.is_dir():
+ fail(f"no cached checkout for {fork.name} — run check first")
+ up = fork.upstream_files(co)
+ if fork.baseline.exists():
+ shutil.rmtree(fork.baseline)
+ for rel, src in up.items():
+ dest = fork.baseline / rel
+ dest.parent.mkdir(parents=True, exist_ok=True)
+ shutil.copy2(src, dest)
+ sha = fork.upstream_commit(cache)
+ fork.manifest["last_synced_commit"] = sha
+ fork.save_manifest()
+ print(f"{fork.name} synced at {sha}")
+ return 0
+
+
+def main() -> int:
+ ap = argparse.ArgumentParser(description=__doc__)
+ ap.add_argument("--root", type=Path,
+ default=Path(__file__).resolve().parents[1],
+ help="repo root (default: this script's repo)")
+ ap.add_argument("--cache", type=Path,
+ default=Path(tempfile.gettempdir()) / "update-skills",
+ help="dir for upstream checkouts")
+ sub = ap.add_subparsers(dest="cmd", required=True)
+ sub.add_parser("list")
+ p = sub.add_parser("bootstrap")
+ p.add_argument("name")
+ p = sub.add_parser("check")
+ p.add_argument("name")
+ p.add_argument("--json", action="store_true")
+ p = sub.add_parser("merge-file")
+ p.add_argument("name")
+ p.add_argument("path")
+ p = sub.add_parser("mark-synced")
+ p.add_argument("name")
+ args = ap.parse_args()
+ handlers = {"list": cmd_list, "bootstrap": cmd_bootstrap,
+ "check": cmd_check, "merge-file": cmd_merge_file,
+ "mark-synced": cmd_mark_synced}
+ return handlers[args.cmd](args.root.resolve(), args.cache, args)
+
+
+if __name__ == "__main__":
+ sys.exit(main())