diff options
Diffstat (limited to 'scripts')
| -rw-r--r-- | scripts/tests/update-skills.bats | 299 | ||||
| -rwxr-xr-x | scripts/update-skills.py | 295 |
2 files changed, 594 insertions, 0 deletions
diff --git a/scripts/tests/update-skills.bats b/scripts/tests/update-skills.bats new file mode 100644 index 0000000..d74da1c --- /dev/null +++ b/scripts/tests/update-skills.bats @@ -0,0 +1,299 @@ +#!/usr/bin/env bats +# update-skills.py keeps forked skills/commands in sync with their upstreams +# via per-fork manifests (upstreams/<name>/manifest.json), a committed baseline +# snapshot (upstreams/<name>/baseline/), and 3-way merges against it. The +# script is read-only against fork targets: check classifies, merge-file +# merges to stdout; only bootstrap and mark-synced write (manifest + baseline). + +setup() { + REPO_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../.." && pwd)" + SCRIPT="$REPO_ROOT/scripts/update-skills.py" + TMP="$(mktemp -d)" + CACHE="$TMP/cache" +} + +teardown() { + rm -rf "$TMP" +} + +# --- fixture helpers ------------------------------------------------------- + +git_up() { + git -C "$TMP/up" -c user.name=test -c user.email=test@test "$@" +} + +make_upstream() { + mkdir -p "$TMP/up/skills/demo" + git -C "$TMP/up" init -q -b main + printf 'line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\n' \ + > "$TMP/up/skills/demo/SKILL.md" + printf 'def helper():\n return 1\n' > "$TMP/up/skills/demo/helper.py" + git_up add -A + git_up commit -qm "initial" +} + +make_repo() { + mkdir -p "$TMP/root/upstreams/demo" "$TMP/root/demo-skill" + cp "$TMP/up/skills/demo/SKILL.md" "$TMP/up/skills/demo/helper.py" \ + "$TMP/root/demo-skill/" + cat > "$TMP/root/upstreams/demo/manifest.json" <<EOF +{ + "name": "demo", + "url": "file://$TMP/up", + "ref": "main", + "subpath": "skills/demo", + "target": "demo-skill", + "license": "MIT", + "last_synced_commit": null +} +EOF +} + +run_us() { + run python3 "$SCRIPT" --root "$TMP/root" --cache "$CACHE" "$@" +} + +bootstrap_demo() { + python3 "$SCRIPT" --root "$TMP/root" --cache "$CACHE" bootstrap demo +} + +# --- list ------------------------------------------------------------------ + +@test "list shows fork with never-synced marker" { + make_upstream + make_repo + run_us list + [ "$status" -eq 0 ] + [[ "$output" == *"demo"* ]] + [[ "$output" == *"never synced"* ]] +} + +# --- bootstrap ------------------------------------------------------------- + +@test "bootstrap snapshots baseline and records upstream commit" { + make_upstream + make_repo + run_us bootstrap demo + [ "$status" -eq 0 ] + [ -f "$TMP/root/upstreams/demo/baseline/SKILL.md" ] + [ -f "$TMP/root/upstreams/demo/baseline/helper.py" ] + sha=$(git_up rev-parse HEAD) + grep -q "$sha" "$TMP/root/upstreams/demo/manifest.json" +} + +# --- check classification -------------------------------------------------- + +@test "check classifies identical tree as unchanged" { + make_upstream + make_repo + bootstrap_demo + run_us check demo + [ "$status" -eq 0 ] + [[ "$output" == *"unchanged"* ]] + [[ "$output" != *"both-changed"* ]] + [[ "$output" != *"local-only"* ]] + [[ "$output" != *"upstream-changed"* ]] +} + +@test "check classifies upstream-changed after upstream edit" { + make_upstream + make_repo + bootstrap_demo + sed -i 's/line1/line1 upstream edit/' "$TMP/up/skills/demo/SKILL.md" + git_up commit -qam "upstream edit" + run_us check demo + [ "$status" -eq 0 ] + [[ "$output" == *"upstream-changed"*"SKILL.md"* ]] +} + +@test "check classifies local-only after target edit" { + make_upstream + make_repo + bootstrap_demo + sed -i 's/line8/line8 local edit/' "$TMP/root/demo-skill/SKILL.md" + run_us check demo + [ "$status" -eq 0 ] + [[ "$output" == *"local-only"*"SKILL.md"* ]] +} + +@test "check classifies both-changed when both sides edited" { + make_upstream + make_repo + bootstrap_demo + sed -i 's/line1/line1 upstream edit/' "$TMP/up/skills/demo/SKILL.md" + git_up commit -qam "upstream edit" + sed -i 's/line8/line8 local edit/' "$TMP/root/demo-skill/SKILL.md" + run_us check demo + [ "$status" -eq 0 ] + [[ "$output" == *"both-changed"*"SKILL.md"* ]] +} + +@test "check classifies upstream-new, local-new, and upstream-deleted" { + make_upstream + make_repo + bootstrap_demo + echo "new upstream file" > "$TMP/up/skills/demo/new-up.md" + git_up rm -q skills/demo/helper.py + git_up add -A + git_up commit -qm "add one, delete one" + echo "new local file" > "$TMP/root/demo-skill/new-local.md" + run_us check demo + [ "$status" -eq 0 ] + [[ "$output" == *"upstream-new"*"new-up.md"* ]] + [[ "$output" == *"local-new"*"new-local.md"* ]] + [[ "$output" == *"upstream-deleted"*"helper.py"* ]] +} + +@test "check --json emits valid JSON with upstream commit and files" { + make_upstream + make_repo + bootstrap_demo + run_us check demo --json + [ "$status" -eq 0 ] + echo "$output" | python3 -c ' +import json, sys +d = json.load(sys.stdin) +assert d["name"] == "demo" +assert len(d["upstream_commit"]) == 40 +assert any(f["path"] == "SKILL.md" for f in d["files"]) +' +} + +@test "check without baseline degrades to no-baseline statuses" { + make_upstream + make_repo + sed -i 's/line8/line8 local edit/' "$TMP/root/demo-skill/SKILL.md" + run_us check demo + [ "$status" -eq 0 ] + [[ "$output" == *"no-baseline"* ]] +} + +# --- merge-file ------------------------------------------------------------ + +@test "merge-file merges non-overlapping edits cleanly" { + make_upstream + make_repo + bootstrap_demo + sed -i 's/line1/line1 upstream edit/' "$TMP/up/skills/demo/SKILL.md" + git_up commit -qam "upstream edit" + sed -i 's/line8/line8 local edit/' "$TMP/root/demo-skill/SKILL.md" + python3 "$SCRIPT" --root "$TMP/root" --cache "$CACHE" check demo > /dev/null + run_us merge-file demo SKILL.md + [ "$status" -eq 0 ] + [[ "$output" == *"line1 upstream edit"* ]] + [[ "$output" == *"line8 local edit"* ]] + [[ "$output" != *"<<<<<<<"* ]] +} + +@test "merge-file emits conflict markers on overlapping edits" { + make_upstream + make_repo + bootstrap_demo + sed -i 's/line4/line4 upstream edit/' "$TMP/up/skills/demo/SKILL.md" + git_up commit -qam "upstream edit" + sed -i 's/line4/line4 local edit/' "$TMP/root/demo-skill/SKILL.md" + python3 "$SCRIPT" --root "$TMP/root" --cache "$CACHE" check demo > /dev/null + run_us merge-file demo SKILL.md + [ "$status" -eq 1 ] + [[ "$output" == *"<<<<<<<"* ]] + [[ "$output" == *"line4 upstream edit"* ]] + [[ "$output" == *"line4 local edit"* ]] +} + +@test "merge-file reports a hard git error instead of masking it as a conflict" { + make_upstream + make_repo + bootstrap_demo + printf 'up\x00stream' > "$TMP/up/skills/demo/SKILL.md" + git_up commit -qam "binary upstream" + printf 'lo\x00cal' > "$TMP/root/demo-skill/SKILL.md" + python3 "$SCRIPT" --root "$TMP/root" --cache "$CACHE" check demo > /dev/null + run_us merge-file demo SKILL.md + [ "$status" -eq 2 ] + [[ "$output" == *"merge-file failed"* ]] +} + +# --- mark-synced ----------------------------------------------------------- + +@test "mark-synced refreshes baseline and last_synced_commit" { + make_upstream + make_repo + bootstrap_demo + sed -i 's/line1/line1 upstream edit/' "$TMP/up/skills/demo/SKILL.md" + git_up commit -qam "upstream edit" + sha=$(git_up rev-parse HEAD) + python3 "$SCRIPT" --root "$TMP/root" --cache "$CACHE" check demo > /dev/null + run_us mark-synced demo + [ "$status" -eq 0 ] + grep -q "line1 upstream edit" "$TMP/root/upstreams/demo/baseline/SKILL.md" + grep -q "$sha" "$TMP/root/upstreams/demo/manifest.json" +} + +# --- files map (the arch-decide shape) -------------------------------------- + +@test "files map restricts tracking to mapped files under target paths" { + make_upstream + make_repo + mkdir -p "$TMP/root/commands" + cp "$TMP/up/skills/demo/SKILL.md" "$TMP/root/commands/demo.md" + cat > "$TMP/root/upstreams/demo/manifest.json" <<EOF +{ + "name": "demo", + "url": "file://$TMP/up", + "ref": "main", + "subpath": "skills/demo", + "target": "commands", + "files": {"SKILL.md": "demo.md"}, + "license": "MIT", + "last_synced_commit": null +} +EOF + bootstrap_demo + [ -f "$TMP/root/upstreams/demo/baseline/demo.md" ] + run_us check demo + [ "$status" -eq 0 ] + [[ "$output" == *"demo.md"* ]] + # helper.py exists upstream but is unmapped — must not be tracked + [[ "$output" != *"helper.py"* ]] +} + +# --- exclusions -------------------------------------------------------------- + +@test "dependency and cache dirs excluded from classification" { + make_upstream + make_repo + bootstrap_demo + mkdir -p "$TMP/root/demo-skill/node_modules/x" "$TMP/root/demo-skill/__pycache__" + echo "x" > "$TMP/root/demo-skill/node_modules/x/x.js" + echo "x" > "$TMP/root/demo-skill/__pycache__/y.pyc" + run_us check demo + [ "$status" -eq 0 ] + [[ "$output" != *"node_modules"* ]] + [[ "$output" != *"__pycache__"* ]] +} + +# --- errors ------------------------------------------------------------------ + +@test "unknown fork errors and names it" { + make_upstream + make_repo + run_us check nosuchfork + [ "$status" -ne 0 ] + [[ "$output" == *"nosuchfork"* ]] +} + +@test "unreachable upstream degrades with a clear error" { + make_upstream + make_repo + python3 - "$TMP/root/upstreams/demo/manifest.json" <<'EOF' +import json, sys +p = sys.argv[1] +d = json.load(open(p)) +d["url"] = "file:///nonexistent/upstream/repo" +json.dump(d, open(p, "w"), indent=2) +EOF + run_us check demo + [ "$status" -ne 0 ] + [[ "$output" == *"demo"* ]] + [[ "$output" == *"clone"* ]] +} diff --git a/scripts/update-skills.py b/scripts/update-skills.py new file mode 100755 index 0000000..3645596 --- /dev/null +++ b/scripts/update-skills.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 +"""Keep forked skills and commands in sync with their upstreams. + +Each fork has a manifest at upstreams/<name>/manifest.json: + + url upstream GitHub (or any git) URL + ref branch or tag to track + subpath path inside the upstream repo ("" = repo root) + target repo-relative path the fork lives at + files optional map of upstream-relative -> target-relative + paths; when present only mapped files are tracked + (a key starting with "/" is upstream-repo-root-relative) + license upstream license identifier (informational) + last_synced_commit upstream commit of the last completed sync + +The committed baseline snapshot at upstreams/<name>/baseline/ mirrors the +*target* layout and is the 3-way merge base. The script never writes a fork's +target files: check classifies, merge-file merges to stdout; only bootstrap +and mark-synced write, and only to the manifest and baseline. + +Commands: + list show forks and sync state + bootstrap NAME snapshot upstream@ref as the baseline + check NAME [--json] clone upstream to cache, classify every file + merge-file NAME PATH 3-way merge one file to stdout (exit 1 = conflict) + mark-synced NAME refresh baseline + manifest from the checked cache +""" +from __future__ import annotations + +import argparse +import json +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path + +EXCLUDE_DIRS = {".git", "node_modules", "__pycache__", ".pytest_cache"} +EXCLUDE_SUFFIXES = {".pyc"} + + +def fail(msg: str, code: int = 2) -> None: + print(f"error: {msg}", file=sys.stderr) + sys.exit(code) + + +def walk_files(base: Path) -> list[str]: + """Relative paths of all files under base, skipping excluded dirs/suffixes.""" + out: list[str] = [] + if not base.is_dir(): + return out + stack = [base] + while stack: + d = stack.pop() + for p in sorted(d.iterdir()): + if p.is_dir(): + if p.name not in EXCLUDE_DIRS: + stack.append(p) + elif p.is_file() and p.suffix not in EXCLUDE_SUFFIXES: + out.append(str(p.relative_to(base))) + return sorted(out) + + +class Fork: + def __init__(self, root: Path, name: str): + self.root = root + self.name = name + self.dir = root / "upstreams" / name + self.manifest_path = self.dir / "manifest.json" + if not self.manifest_path.is_file(): + fail(f"no manifest for fork '{name}' at {self.manifest_path}") + m = json.loads(self.manifest_path.read_text()) + self.url: str = m["url"] + self.ref: str = m["ref"] + self.subpath: str = m.get("subpath", "") + self.target: Path = root / m["target"] + self.files: dict[str, str] | None = m.get("files") + self.manifest = m + self.baseline = self.dir / "baseline" + + def checkout(self, cache: Path) -> Path: + return cache / self.name + + def clone(self, cache: Path) -> Path: + co = self.checkout(cache) + if co.exists(): + shutil.rmtree(co) + co.parent.mkdir(parents=True, exist_ok=True) + r = subprocess.run( + ["git", "clone", "--quiet", "--depth", "1", + "--branch", self.ref, self.url, str(co)], + capture_output=True, text=True, + ) + if r.returncode != 0: + fail(f"could not clone {self.name} upstream from {self.url}: " + f"{r.stderr.strip()}") + return co + + def upstream_commit(self, cache: Path) -> str: + r = subprocess.run(["git", "-C", str(self.checkout(cache)), + "rev-parse", "HEAD"], + capture_output=True, text=True, check=True) + return r.stdout.strip() + + def upstream_files(self, co: Path) -> dict[str, Path]: + """Map of target-relative path -> absolute upstream source path.""" + src = co / self.subpath if self.subpath else co + if self.files is not None: + out = {} + for ukey, trel in self.files.items(): + upath = co / ukey[1:] if ukey.startswith("/") else src / ukey + if upath.is_file(): + out[trel] = upath + return out + return {rel: src / rel for rel in walk_files(src)} + + def target_files(self) -> dict[str, Path]: + if self.files is not None: + return {trel: self.target / trel for trel in self.files.values() + if (self.target / trel).is_file()} + return {rel: self.target / rel for rel in walk_files(self.target)} + + def baseline_files(self) -> dict[str, Path]: + return {rel: self.baseline / rel for rel in walk_files(self.baseline)} + + def save_manifest(self) -> None: + self.manifest_path.write_text(json.dumps(self.manifest, indent=2) + "\n") + + +def classify(fork: Fork, co: Path) -> list[dict[str, str]]: + up = fork.upstream_files(co) + tg = fork.target_files() + bl = fork.baseline_files() + have_baseline = fork.baseline.is_dir() + + def read(p: Path | None) -> bytes | None: + return p.read_bytes() if p is not None and p.is_file() else None + + rows = [] + for rel in sorted(set(up) | set(tg) | set(bl)): + u, t, b = read(up.get(rel)), read(tg.get(rel)), read(bl.get(rel)) + if not have_baseline: + if u is not None and t is not None: + status = "unchanged" if u == t else "no-baseline" + elif u is not None: + status = "upstream-new" + else: + status = "local-new" + elif u is None and b is None: + status = "local-new" + elif t is None and b is None: + status = "upstream-new" + elif u is None and b is not None: + status = "upstream-deleted" + elif t is None and b is not None: + status = "local-deleted" + elif u == b and t == b: + status = "unchanged" + elif u != b and t == b: + status = "upstream-changed" + elif u == b and t != b: + status = "local-only" + elif u == t: + status = "unchanged" # converged independently + else: + status = "both-changed" + rows.append({"path": rel, "status": status}) + return rows + + +def cmd_list(root: Path, _cache: Path, _args: argparse.Namespace) -> int: + updir = root / "upstreams" + manifests = sorted(updir.glob("*/manifest.json")) if updir.is_dir() else [] + if not manifests: + print("no forks registered (no upstreams/*/manifest.json)") + return 0 + for mp in manifests: + m = json.loads(mp.read_text()) + sha = m.get("last_synced_commit") + state = f"last synced {sha[:10]}" if sha else "never synced" + print(f"{m['name']}: {state} ({m['url']} @ {m['ref']})") + return 0 + + +def cmd_bootstrap(root: Path, cache: Path, args: argparse.Namespace) -> int: + fork = Fork(root, args.name) + co = fork.clone(cache) + up = fork.upstream_files(co) + if fork.baseline.exists(): + shutil.rmtree(fork.baseline) + for rel, src in up.items(): + dest = fork.baseline / rel + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dest) + sha = fork.upstream_commit(cache) + fork.manifest["last_synced_commit"] = sha + fork.save_manifest() + print(f"bootstrapped {fork.name} at {sha} ({len(up)} files)") + return 0 + + +def cmd_check(root: Path, cache: Path, args: argparse.Namespace) -> int: + fork = Fork(root, args.name) + co = fork.clone(cache) + rows = classify(fork, co) + sha = fork.upstream_commit(cache) + if args.json: + print(json.dumps({"name": fork.name, "upstream_commit": sha, + "files": rows}, indent=2)) + else: + print(f"{fork.name} @ upstream {sha}") + for r in rows: + print(f" {r['status']:<17} {r['path']}") + pending = sum(r["status"] != "unchanged" for r in rows) + print(f"{pending} file(s) need attention" if pending + else "everything in sync") + return 0 + + +def cmd_merge_file(root: Path, cache: Path, args: argparse.Namespace) -> int: + fork = Fork(root, args.name) + co = fork.checkout(cache) + if not co.is_dir(): + fail(f"no cached checkout for {fork.name} — run check first") + up = fork.upstream_files(co) + if args.path not in up: + fail(f"{args.path} not present upstream for {fork.name}") + local = fork.target / args.path + if not local.is_file(): + fail(f"{args.path} not present locally under {fork.target}") + base = fork.baseline / args.path + with tempfile.NamedTemporaryFile() as empty: + base_arg = str(base) if base.is_file() else empty.name + r = subprocess.run( + ["git", "merge-file", "--stdout", + "-L", "local", "-L", "baseline", "-L", "upstream", + str(local), base_arg, str(up[args.path])], + capture_output=True, text=True, + ) + # git merge-file exits with the conflict count, or a negative value on + # hard error — which subprocess reports as >=128 (255 observed for -1). + if r.returncode >= 128 or r.returncode < 0: + fail(f"git merge-file failed on {args.path}: {r.stderr.strip()}") + sys.stdout.write(r.stdout) + return 0 if r.returncode == 0 else 1 + + +def cmd_mark_synced(root: Path, cache: Path, args: argparse.Namespace) -> int: + fork = Fork(root, args.name) + co = fork.checkout(cache) + if not co.is_dir(): + fail(f"no cached checkout for {fork.name} — run check first") + up = fork.upstream_files(co) + if fork.baseline.exists(): + shutil.rmtree(fork.baseline) + for rel, src in up.items(): + dest = fork.baseline / rel + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dest) + sha = fork.upstream_commit(cache) + fork.manifest["last_synced_commit"] = sha + fork.save_manifest() + print(f"{fork.name} synced at {sha}") + return 0 + + +def main() -> int: + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument("--root", type=Path, + default=Path(__file__).resolve().parents[1], + help="repo root (default: this script's repo)") + ap.add_argument("--cache", type=Path, + default=Path(tempfile.gettempdir()) / "update-skills", + help="dir for upstream checkouts") + sub = ap.add_subparsers(dest="cmd", required=True) + sub.add_parser("list") + p = sub.add_parser("bootstrap") + p.add_argument("name") + p = sub.add_parser("check") + p.add_argument("name") + p.add_argument("--json", action="store_true") + p = sub.add_parser("merge-file") + p.add_argument("name") + p.add_argument("path") + p = sub.add_parser("mark-synced") + p.add_argument("name") + args = ap.parse_args() + handlers = {"list": cmd_list, "bootstrap": cmd_bootstrap, + "check": cmd_check, "merge-file": cmd_merge_file, + "mark-synced": cmd_mark_synced} + return handlers[args.cmd](args.root.resolve(), args.cache, args) + + +if __name__ == "__main__": + sys.exit(main()) |
