5 files changed, 419 insertions, 0 deletions
diff --git a/hooks/tests/conftest.py b/hooks/tests/conftest.py
new file mode 100644
index 0000000..72c7920
--- /dev/null
+++ b/hooks/tests/conftest.py
@@ -0,0 +1,41 @@
+"""Pytest harness for the hook scripts under hooks/.
+
+Hook filenames are hyphenated (git-commit-confirm.py, etc.), so they
+cannot be imported by module name. `load_hook(filename)` loads them by
+path via importlib, and inserts hooks/ onto sys.path first so each hook's
+own `from _common import ...` resolves.
+"""
+
+import importlib.util
+import sys
+from pathlib import Path
+
+import pytest
+
+HOOKS_DIR = Path(__file__).resolve().parent.parent
+
+
+def load_hook(filename: str):
+    """Load a hook script by filename and return its module object.
+
+    Inserts hooks/ onto sys.path (idempotently) so the hook's
+    `from _common import ...` works, then loads the file by path under a
+    sanitized module name.
+    """
+    hooks_dir = str(HOOKS_DIR)
+    if hooks_dir not in sys.path:
+        sys.path.insert(0, hooks_dir)
+
+    path = HOOKS_DIR / filename
+    mod_name = "hook_" + filename.replace("-", "_").replace(".py", "")
+    spec = importlib.util.spec_from_file_location(mod_name, path)
+    if spec is None or spec.loader is None:
+        raise ImportError(f"could not load hook from {path}")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+@pytest.fixture
+def load_hook_fixture():
+    return load_hook
diff --git a/hooks/tests/test_common.py b/hooks/tests/test_common.py
new file mode 100644
index 0000000..10e6097
--- /dev/null
+++ b/hooks/tests/test_common.py
@@ -0,0 +1,74 @@
+"""Tests for hooks/_common.py — read_referenced_file and scan_attribution."""
+
+import sys
+from pathlib import Path
+
+HOOKS_DIR = Path(__file__).resolve().parent.parent
+if str(HOOKS_DIR) not in sys.path:
+    sys.path.insert(0, str(HOOKS_DIR))
+
+import _common  # noqa: E402
+
+
+# --- read_referenced_file: normal cases ------------------------------------
+
+def test_read_referenced_file_returns_content(tmp_path):
+    f = tmp_path / "msg.txt"
+    f.write_text("hello world\n")
+    assert _common.read_referenced_file(str(f)) == "hello world\n"
+
+
+def test_read_referenced_file_expands_user(tmp_path, monkeypatch):
+    # Point HOME at tmp_path so ~/msg.txt resolves under it.
+    monkeypatch.setenv("HOME", str(tmp_path))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path, raising=False)
+    f = tmp_path / "msg.txt"
+    f.write_text("tilde body")
+    assert _common.read_referenced_file("~/msg.txt") == "tilde body"
+
+
+# --- read_referenced_file: error / boundary cases --------------------------
+
+def test_read_referenced_file_missing_returns_none(tmp_path):
+    assert _common.read_referenced_file(str(tmp_path / "nope.txt")) is None
+
+
+def test_read_referenced_file_directory_returns_none(tmp_path):
+    # A directory is not a regular file.
+    assert _common.read_referenced_file(str(tmp_path)) is None
+
+
+def test_read_referenced_file_oversized_returns_none(tmp_path):
+    f = tmp_path / "big.txt"
+    f.write_text("x" * 50)
+    assert _common.read_referenced_file(str(f), max_bytes=10) is None
+
+
+def test_read_referenced_file_at_limit_is_read(tmp_path):
+    f = tmp_path / "edge.txt"
+    f.write_text("12345")  # exactly 5 bytes
+    assert _common.read_referenced_file(str(f), max_bytes=5) == "12345"
+
+
+def test_read_referenced_file_invalid_utf8_returns_none(tmp_path):
+    f = tmp_path / "bin.dat"
+    f.write_bytes(b"\xff\xfe\x00bad")
+    assert _common.read_referenced_file(str(f)) is None
+
+
+def test_read_referenced_file_empty_string_path_returns_none():
+    assert _common.read_referenced_file("") is None
+
+
+# --- scan_attribution sanity (relied on by the file-backed tests) ----------
+
+def test_scan_attribution_catches_coauthor():
+    assert _common.scan_attribution("Co-Authored-By: Claude")
+
+
+def test_scan_attribution_catches_robot_emoji():
+    assert _common.scan_attribution("nice work \U0001F916")
+
+
+def test_scan_attribution_clean_text_empty():
+    assert _common.scan_attribution("fix: tidy up the parser") == []
diff --git a/hooks/tests/test_destructive_bash_confirm.py b/hooks/tests/test_destructive_bash_confirm.py
new file mode 100644
index 0000000..50302e6
--- /dev/null
+++ b/hooks/tests/test_destructive_bash_confirm.py
@@ -0,0 +1,137 @@
+"""Tests for hooks/destructive-bash-confirm.py — shlex-based rm -rf parsing."""
+
+from conftest import load_hook
+
+hook = load_hook("destructive-bash-confirm.py")
+
+SENTINEL = "(unparsed — shell too complex to inspect safely)"
+
+
+# --- detection of flag forms (combined / separate / reordered) -------------
+
+def test_rf_combined_detected():
+    assert hook.detect_rm_rf("rm -rf build") == ["build"]
+
+
+def test_r_f_separate_detected():
+    assert hook.detect_rm_rf("rm -r -f build") == ["build"]
+
+
+def test_fr_reordered_detected():
+    assert hook.detect_rm_rf("rm -fr build") == ["build"]
+
+
+def test_capital_R_detected():
+    assert hook.detect_rm_rf("rm -Rf build") == ["build"]
+
+
+def test_long_flags_detected():
+    targets = hook.detect_rm_rf("rm --recursive --force build")
+    assert targets == ["build"]
+
+
+# --- quoted / spaced paths now parse correctly -----------------------------
+
+def test_quoted_path_with_space_parsed():
+    assert hook.detect_rm_rf('rm -rf "my dir"') == ["my dir"]
+
+
+def test_multiple_targets():
+    assert hook.detect_rm_rf("rm -rf a b c") == ["a", "b", "c"]
+
+
+def test_double_dash_separates_flags_from_paths():
+    assert hook.detect_rm_rf("rm -rf -- -weird-name") == ["-weird-name"]
+
+
+# --- not-a-match cases: no modal -------------------------------------------
+
+def test_no_r_returns_none():
+    assert hook.detect_rm_rf("rm -f file") is None
+
+
+def test_no_f_returns_none():
+    assert hook.detect_rm_rf("rm -r dir") is None
+
+
+def test_not_rm_returns_none():
+    assert hook.detect_rm_rf("rmdir foo") is None
+
+
+def test_plain_rm_returns_none():
+    assert hook.detect_rm_rf("rm file.txt") is None
+
+
+# --- dangerous path banner still fires on parsed targets -------------------
+
+def test_home_var_target_flags_dangerous():
+    detection = hook.detect_destructive('rm -rf "$HOME/x"')
+    assert detection is not None
+    kind, ctx = detection
+    assert kind == "rm -rf"
+    assert "_banner" in ctx
+    assert "$HOME/x" in ctx["_banner"]
+
+
+def test_root_path_flags_dangerous():
+    detection = hook.detect_destructive("rm -rf /etc/foo")
+    assert detection is not None
+    _, ctx = detection
+    assert "_banner" in ctx
+
+
+def test_safe_relative_target_no_banner():
+    detection = hook.detect_destructive("rm -rf build/cache")
+    assert detection is not None
+    _, ctx = detection
+    assert "_banner" not in ctx
+
+
+# --- fail-toward-asking on ambiguity ---------------------------------------
+
+def test_compound_command_returns_sentinel():
+    # `ls && rm -rf foo` — the naive parser missed this; now we ask.
+    assert hook.detect_rm_rf("ls && rm -rf foo") == [SENTINEL]
+
+
+def test_pipeline_returns_sentinel():
+    assert hook.detect_rm_rf("find . -type d | xargs rm -rf") == [SENTINEL]
+
+
+def test_semicolon_returns_sentinel():
+    assert hook.detect_rm_rf("cd /tmp; rm -rf junk") == [SENTINEL]
+
+
+def test_command_substitution_returns_sentinel():
+    assert hook.detect_rm_rf("rm -rf $(echo target)") == [SENTINEL]
+
+
+def test_backtick_substitution_returns_sentinel():
+    assert hook.detect_rm_rf("rm -rf `echo target`") == [SENTINEL]
+
+
+def test_redirect_returns_sentinel():
+    assert hook.detect_rm_rf("rm -rf foo > /dev/null") == [SENTINEL]
+
+
+def test_unbalanced_quotes_returns_sentinel():
+    # shlex.split raises ValueError → ask anyway rather than silently pass.
+    assert hook.detect_rm_rf('rm -rf "unterminated') == [SENTINEL]
+
+
+def test_compound_without_rm_rf_returns_none():
+    # Compound construct but no dangerous rm — should not fire.
+    assert hook.detect_rm_rf("ls && echo done") is None
+
+
+def test_compound_with_rm_but_no_force_returns_none():
+    # `&&` present but the rm has no -f, so nothing to flag.
+    assert hook.detect_rm_rf("ls && rm -r dir") is None
+
+
+def test_sentinel_fires_modal_via_detect_destructive():
+    detection = hook.detect_destructive("ls && rm -rf foo")
+    assert detection is not None
+    kind, ctx = detection
+    assert kind == "rm -rf"
+    assert ctx["targets"] == [SENTINEL]
diff --git a/hooks/tests/test_gh_pr_create_confirm.py b/hooks/tests/test_gh_pr_create_confirm.py
new file mode 100644
index 0000000..19dde2e
--- /dev/null
+++ b/hooks/tests/test_gh_pr_create_confirm.py
@@ -0,0 +1,70 @@
+"""Tests for hooks/gh-pr-create-confirm.py — --body-file reads real content."""
+
+from conftest import load_hook
+
+hook = load_hook("gh-pr-create-confirm.py")
+
+
+# --- existing parsing still works (regression guard) -----------------------
+
+def test_parse_title_and_inline_body():
+    cmd = 'gh pr create --title "feat: thing" --body "does the thing"'
+    fields = hook.parse_pr_create(cmd)
+    assert fields["title"] == "feat: thing"
+    assert fields["body"] == "does the thing"
+
+
+def test_parse_reviewers():
+    cmd = 'gh pr create --title "x" --reviewer alice,bob'
+    fields = hook.parse_pr_create(cmd)
+    assert fields["reviewers"] == ["alice", "bob"]
+
+
+# --- new: --body-file reads the real content -------------------------------
+
+def test_body_file_reads_real_content(tmp_path):
+    f = tmp_path / "body.md"
+    f.write_text("## Problem\nthings broke\n\n## Fix\nfixed them\n")
+    fields = hook.parse_pr_create(f'gh pr create --title "x" --body-file {f}')
+    assert "things broke" in fields["body"]
+    assert "fixed them" in fields["body"]
+    # No longer the old placeholder.
+    assert not fields["body"].startswith("(body read from file")
+
+
+def test_body_file_attribution_is_caught(tmp_path):
+    f = tmp_path / "body.md"
+    f.write_text("## Summary\nshipped a feature \U0001F916 generated with Claude\n")
+    fields = hook.parse_pr_create(f'gh pr create --title "feat: x" --body-file {f}')
+    scan_text = "\n".join(
+        filter(None, [fields.get("title"), fields.get("body")])
+    )
+    hits = hook.scan_attribution(scan_text)
+    assert hits  # robot emoji + 'Generated with Claude' both leak
+
+
+def test_body_file_clean_content_no_hits(tmp_path):
+    f = tmp_path / "body.md"
+    f.write_text("## Summary\nfixed the off-by-one in the pager\n")
+    fields = hook.parse_pr_create(f'gh pr create --title "fix: pager" --body-file {f}')
+    scan_text = "\n".join(
+        filter(None, [fields.get("title"), fields.get("body")])
+    )
+    assert hook.scan_attribution(scan_text) == []
+
+
+# --- unreadable file keeps an informative could-not-inspect placeholder ----
+
+def test_body_file_missing_keeps_could_not_inspect_placeholder(tmp_path):
+    missing = tmp_path / "nope.md"
+    fields = hook.parse_pr_create(f'gh pr create --title "x" --body-file {missing}')
+    assert "could not inspect" in fields["body"]
+    assert str(missing) in fields["body"]
+
+
+def test_body_file_oversized_keeps_placeholder(tmp_path, monkeypatch):
+    f = tmp_path / "big.md"
+    f.write_text("x" * 5000)
+    monkeypatch.setattr(hook, "read_referenced_file", lambda p: None)
+    fields = hook.parse_pr_create(f'gh pr create --title "x" --body-file {f}')
+    assert "could not inspect" in fields["body"]
diff --git a/hooks/tests/test_git_commit_confirm.py b/hooks/tests/test_git_commit_confirm.py
new file mode 100644
index 0000000..83519ad
--- /dev/null
+++ b/hooks/tests/test_git_commit_confirm.py
@@ -0,0 +1,97 @@
+"""Tests for hooks/git-commit-confirm.py — file-backed commit messages."""
+
+from conftest import load_hook
+
+hook = load_hook("git-commit-confirm.py")
+
+
+# --- existing forms still parse (regression guard) -------------------------
+
+def test_extract_dash_m_simple():
+    msg = hook.extract_commit_message('git commit -m "fix: tidy parser"')
+    assert msg == "fix: tidy parser"
+
+
+def test_extract_heredoc():
+    cmd = (
+        "git commit -m \"$(cat <<'EOF'\n"
+        "feat: add thing\n"
+        "\n"
+        "body line\n"
+        "EOF\n"
+        ")\""
+    )
+    msg = hook.extract_commit_message(cmd)
+    assert msg.startswith("feat: add thing")
+    assert "body line" in msg
+
+
+def test_extract_unparseable_falls_through():
+    # Bare `git commit` would drop into $EDITOR.
+    assert hook.extract_commit_message("git commit") == hook.UNPARSEABLE_MESSAGE
+
+
+# --- new: -F / --file / --file= forms read the file ------------------------
+
+def test_extract_dash_F_reads_file(tmp_path):
+    f = tmp_path / "msg.txt"
+    f.write_text("fix: from a file\n\nsome body\n")
+    assert hook.extract_commit_message(f"git commit -F {f}") == "fix: from a file\n\nsome body"
+
+
+def test_extract_long_file_flag_reads_file(tmp_path):
+    f = tmp_path / "msg.txt"
+    f.write_text("docs: long form file flag\n")
+    assert hook.extract_commit_message(f"git commit --file {f}") == "docs: long form file flag"
+
+
+def test_extract_file_equals_form_reads_file(tmp_path):
+    f = tmp_path / "msg.txt"
+    f.write_text("chore: equals form\n")
+    assert hook.extract_commit_message(f"git commit --file={f}") == "chore: equals form"
+
+
+def test_extract_F_strips_quotes_around_path(tmp_path):
+    f = tmp_path / "my msg.txt"
+    f.write_text("feat: quoted path\n")
+    assert hook.extract_commit_message(f'git commit -F "{f}"') == "feat: quoted path"
+
+
+# --- the audit-item bug: attribution in a file-backed message is now caught -
+
+def test_file_backed_attribution_is_caught(tmp_path):
+    f = tmp_path / "msg.txt"
+    f.write_text("feat: add widget\n\nCo-Authored-By: Claude <noreply@anthropic.com>\n")
+    msg = hook.extract_commit_message(f"git commit -F {f}")
+    issues = hook.collect_issues(msg, staged=["a.py"], author="Real Dev <dev@example.com>")
+    assert any(i.startswith("AI-attribution") for i in issues)
+
+
+def test_inline_message_without_attribution_is_clean(tmp_path):
+    # Sanity: a clean file-backed message produces no attribution issue.
+    f = tmp_path / "msg.txt"
+    f.write_text("fix: handle empty input\n")
+    msg = hook.extract_commit_message(f"git commit -F {f}")
+    issues = hook.collect_issues(msg, staged=["a.py"], author="Real Dev <dev@example.com>")
+    assert not any(i.startswith("AI-attribution") for i in issues)
+
+
+# --- unreadable file falls through to UNPARSEABLE (fail-safe: ask) ---------
+
+def test_missing_file_falls_through_to_unparseable(tmp_path):
+    missing = tmp_path / "nope.txt"
+    assert hook.extract_commit_message(f"git commit -F {missing}") == hook.UNPARSEABLE_MESSAGE
+
+
+def test_oversized_file_falls_through_and_hook_asks(tmp_path, monkeypatch):
+    f = tmp_path / "big.txt"
+    f.write_text("x" * 5000)
+    # Force the read to refuse via a tiny limit (simulates oversize).
+    monkeypatch.setattr(
+        hook, "read_referenced_file", lambda p, max_bytes=10: None
+    )
+    msg = hook.extract_commit_message(f"git commit -F {f}")
+    assert msg == hook.UNPARSEABLE_MESSAGE
+    # And the hook would ask, because UNPARSEABLE_MESSAGE is a flagged issue.
+    issues = hook.collect_issues(msg, staged=["a.py"], author="Dev <d@e.com>")
+    assert any("not parseable" in i for i in issues)