aboutsummaryrefslogtreecommitdiff
path: root/hooks
diff options
context:
space:
mode:
Diffstat (limited to 'hooks')
-rw-r--r--hooks/_common.py20
-rwxr-xr-xhooks/destructive-bash-confirm.py86
-rwxr-xr-xhooks/gh-pr-create-confirm.py18
-rwxr-xr-xhooks/git-commit-confirm.py19
-rw-r--r--hooks/tests/conftest.py41
-rw-r--r--hooks/tests/test_common.py74
-rw-r--r--hooks/tests/test_destructive_bash_confirm.py137
-rw-r--r--hooks/tests/test_gh_pr_create_confirm.py70
-rw-r--r--hooks/tests/test_git_commit_confirm.py97
9 files changed, 549 insertions, 13 deletions
diff --git a/hooks/_common.py b/hooks/_common.py
index d4bf520..e82f7ed 100644
--- a/hooks/_common.py
+++ b/hooks/_common.py
@@ -16,6 +16,7 @@ the tools themselves.
"""
import json
+import os
import re
import sys
from typing import Optional
@@ -64,6 +65,25 @@ def respond_ask(reason: str, system_message: Optional[str] = None) -> None:
print(json.dumps(output))
+def read_referenced_file(path: str, max_bytes: int = 1_000_000) -> Optional[str]:
+ """Read a local file referenced by -F/--file/--body-file so its text can be
+ attribution-scanned. Return the text, or None if it can't be safely read
+ (missing, not a regular file, larger than max_bytes, or not valid UTF-8).
+ None means 'could not inspect', never 'clean'."""
+ if not path:
+ return None
+ try:
+ expanded = os.path.expanduser(path)
+ if not os.path.isfile(expanded):
+ return None
+ if os.path.getsize(expanded) > max_bytes:
+ return None
+ with open(expanded, "r", encoding="utf-8", errors="strict") as fh:
+ return fh.read()
+ except (OSError, UnicodeDecodeError):
+ return None
+
+
def scan_attribution(text: str) -> list[str]:
"""Return human-readable descriptions of any AI-attribution hits."""
if not text:
diff --git a/hooks/destructive-bash-confirm.py b/hooks/destructive-bash-confirm.py
index c1cf5f9..be8b491 100755
--- a/hooks/destructive-bash-confirm.py
+++ b/hooks/destructive-bash-confirm.py
@@ -14,9 +14,20 @@ banner via systemMessage. First match wins — a command with multiple
destructive patterns fires on the first detected.
Non-destructive Bash calls exit 0 silent.
+
+Shell-parsing scope for `rm -rf` detection: `detect_rm_rf` tokenizes the
+command with `shlex.split`, so it handles a single simple command and its
+quoting/escaping (combined flags `-rf`, separate `-r -f`, reordered `-fr`,
+quoted/spaced paths). It does NOT model pipelines, compound commands
+(`;`, `&&`, `||`, `|`), command substitution (`$(...)`, backticks),
+redirects, aliases, or variable/glob expansion. When any of those appear
+alongside a forced recursive `rm` — or when the quoting is unbalanced —
+target attribution is unreliable, so the function returns a sentinel and
+the modal fires anyway (fail toward asking) rather than silently passing.
"""
import re
+import shlex
import subprocess
import sys
from typing import Optional
@@ -26,6 +37,13 @@ from _common import read_payload, respond_ask
PROTECTED_BRANCHES = {"main", "master", "develop", "release", "prod", "production"}
+# Returned as the sole target when the command is a forced recursive rm but
+# the shell is too complex to attribute targets safely. Forces the modal.
+UNPARSEABLE_RM_TARGET = "(unparsed — shell too complex to inspect safely)"
+
+# Constructs that make single-command target attribution unreliable.
+_COMPLEX_SHELL = (";", "&&", "||", "|", "$(", "`", ">", "<")
+
def main() -> int:
payload = read_payload()
@@ -90,6 +108,8 @@ def detect_destructive(cmd: str) -> Optional[tuple[str, dict]]:
if t in ("/", "~", "$HOME", ".", "..", "*")
or t.startswith("/")
or t.startswith("~")
+ or t.startswith("$HOME")
+ or "*" in t
]
if dangerous:
ctx["_banner"] = (
@@ -114,25 +134,73 @@ def is_force_push(cmd: str) -> bool:
def detect_rm_rf(cmd: str) -> Optional[list[str]]:
- """If cmd invokes `rm` with both -r/-R and -f flags, return its targets."""
- m = re.search(r"(?:^|[\s;&|()])rm\s+(.+)$", cmd)
- if not m:
+ """If cmd invokes `rm` with both -r/-R and -f flags, return its targets.
+
+ Tokenizes with `shlex.split`, so quoted/spaced paths and combined
+ (`-rf`), separate (`-r -f`), or reordered (`-fr`) flags all parse. The
+ long forms `--recursive` and `--force` count too. Returns:
+
+ - the target list (without flags) for a forced recursive rm,
+ - None when the command is not a forced recursive rm (no -r, no -f,
+ or not an `rm` at all) — no modal,
+ - [UNPARSEABLE_RM_TARGET] when a forced recursive rm is present but
+ the shell is too complex to attribute targets safely (compound
+ command, pipeline, command substitution, redirect) or the quoting
+ is unbalanced — fail toward asking.
+
+ shlex models a single simple command and its quoting only. It does not
+ model pipelines, compound commands, aliases, or variable/glob
+ expansion — those fall to the [UNPARSEABLE_RM_TARGET] ask path.
+ """
+ # Quick reject: no `rm` word at all.
+ if not re.search(r"(?:^|[\s;&|()])rm\b", cmd):
+ return None
+
+ complex_shell = any(tok in cmd for tok in _COMPLEX_SHELL)
+
+ try:
+ tokens = shlex.split(cmd)
+ except ValueError:
+ # Unbalanced quotes — can't tokenize. If an rm appears at all, ask.
+ return [UNPARSEABLE_RM_TARGET]
+
+ # Walk tokens; find the first `rm` and parse the flags/targets that
+ # immediately follow it within the same simple command.
+ try:
+ rm_idx = tokens.index("rm")
+ except ValueError:
return None
- rest = m.group(1).split()
- flag_chars = ""
+ rest = tokens[rm_idx + 1:]
+ has_r = False
+ has_f = False
i = 0
while i < len(rest) and rest[i].startswith("-") and rest[i] != "--":
- flag_chars += rest[i][1:]
+ tok = rest[i]
+ if tok in ("--recursive",):
+ has_r = True
+ elif tok in ("--force",):
+ has_f = True
+ elif tok.startswith("--"):
+ pass # some other long flag — ignore
+ else:
+ short = tok[1:]
+ if re.search(r"[rR]", short):
+ has_r = True
+ if "f" in short:
+ has_f = True
i += 1
- if rest[i:i+1] == ["--"]:
+ if rest[i:i + 1] == ["--"]:
i += 1
- has_r = bool(re.search(r"[rR]", flag_chars))
- has_f = "f" in flag_chars
if not (has_r and has_f):
return None
+ # Forced recursive rm confirmed. If the surrounding shell is too complex
+ # to trust target attribution, ask anyway rather than guess.
+ if complex_shell:
+ return [UNPARSEABLE_RM_TARGET]
+
return rest[i:]
diff --git a/hooks/gh-pr-create-confirm.py b/hooks/gh-pr-create-confirm.py
index e3c2f13..f539551 100755
--- a/hooks/gh-pr-create-confirm.py
+++ b/hooks/gh-pr-create-confirm.py
@@ -27,7 +27,12 @@ Wire in ~/.claude/settings.json alongside git-commit-confirm.py:
import re
import sys
-from _common import read_payload, respond_ask, scan_attribution
+from _common import (
+ read_payload,
+ read_referenced_file,
+ respond_ask,
+ scan_attribution,
+)
MAX_BODY_LINES = 20
@@ -91,9 +96,16 @@ def parse_pr_create(cmd: str) -> dict:
if b:
fields["body"] = b.group(2).strip()
else:
- bf = re.search(r"--body-file\s+(\S+)", cmd)
+ bf = re.search(r"--body-file\s+(\"[^\"]+\"|'[^']+'|\S+)", cmd)
if bf:
- fields["body"] = f"(body read from file: {bf.group(1)})"
+ path = bf.group(1).strip("\"'")
+ text = read_referenced_file(path)
+ if text is not None:
+ fields["body"] = text.strip()
+ else:
+ fields["body"] = (
+ f"(body read from file: {path} — could not inspect)"
+ )
# Base / head
base = re.search(r"--base\s+(\S+)", cmd)
diff --git a/hooks/git-commit-confirm.py b/hooks/git-commit-confirm.py
index 2441d23..618ac20 100755
--- a/hooks/git-commit-confirm.py
+++ b/hooks/git-commit-confirm.py
@@ -42,7 +42,12 @@ import re
import subprocess
import sys
-from _common import read_payload, respond_ask, scan_attribution
+from _common import (
+ read_payload,
+ read_referenced_file,
+ respond_ask,
+ scan_attribution,
+)
MAX_FILES_SHOWN = 25
@@ -140,6 +145,18 @@ def extract_commit_message(cmd: str) -> str:
if long_form:
return "\n\n".join(msg for _, msg in long_form).strip()
+ # File-backed message: -F <file> / --file <file> / --file=<file>.
+ # Read the file so its text is attribution-scanned. If it can't be read
+ # safely, fall through to UNPARSEABLE_MESSAGE so the hook asks (fail-safe).
+ file_flag = re.search(
+ r"(?:^|\s)(?:-F|--file)[=\s]+(\"[^\"]+\"|'[^']+'|\S+)", cmd
+ )
+ if file_flag:
+ path = file_flag.group(1).strip("\"'")
+ text = read_referenced_file(path)
+ if text is not None:
+ return text.strip()
+
return UNPARSEABLE_MESSAGE
diff --git a/hooks/tests/conftest.py b/hooks/tests/conftest.py
new file mode 100644
index 0000000..72c7920
--- /dev/null
+++ b/hooks/tests/conftest.py
@@ -0,0 +1,41 @@
+"""Pytest harness for the hook scripts under hooks/.
+
+Hook filenames are hyphenated (git-commit-confirm.py, etc.), so they
+cannot be imported by module name. `load_hook(filename)` loads them by
+path via importlib, and inserts hooks/ onto sys.path first so each hook's
+own `from _common import ...` resolves.
+"""
+
+import importlib.util
+import sys
+from pathlib import Path
+
+import pytest
+
+HOOKS_DIR = Path(__file__).resolve().parent.parent
+
+
+def load_hook(filename: str):
+ """Load a hook script by filename and return its module object.
+
+ Inserts hooks/ onto sys.path (idempotently) so the hook's
+ `from _common import ...` works, then loads the file by path under a
+ sanitized module name.
+ """
+ hooks_dir = str(HOOKS_DIR)
+ if hooks_dir not in sys.path:
+ sys.path.insert(0, hooks_dir)
+
+ path = HOOKS_DIR / filename
+ mod_name = "hook_" + filename.replace("-", "_").replace(".py", "")
+ spec = importlib.util.spec_from_file_location(mod_name, path)
+ if spec is None or spec.loader is None:
+ raise ImportError(f"could not load hook from {path}")
+ module = importlib.util.module_from_spec(spec)
+ spec.loader.exec_module(module)
+ return module
+
+
+@pytest.fixture
+def load_hook_fixture():
+ return load_hook
diff --git a/hooks/tests/test_common.py b/hooks/tests/test_common.py
new file mode 100644
index 0000000..10e6097
--- /dev/null
+++ b/hooks/tests/test_common.py
@@ -0,0 +1,74 @@
+"""Tests for hooks/_common.py — read_referenced_file and scan_attribution."""
+
+import sys
+from pathlib import Path
+
+HOOKS_DIR = Path(__file__).resolve().parent.parent
+if str(HOOKS_DIR) not in sys.path:
+ sys.path.insert(0, str(HOOKS_DIR))
+
+import _common # noqa: E402
+
+
+# --- read_referenced_file: normal cases ------------------------------------
+
+def test_read_referenced_file_returns_content(tmp_path):
+ f = tmp_path / "msg.txt"
+ f.write_text("hello world\n")
+ assert _common.read_referenced_file(str(f)) == "hello world\n"
+
+
+def test_read_referenced_file_expands_user(tmp_path, monkeypatch):
+ # Point HOME at tmp_path so ~/msg.txt resolves under it.
+ monkeypatch.setenv("HOME", str(tmp_path))
+ monkeypatch.setattr(Path, "home", lambda: tmp_path, raising=False)
+ f = tmp_path / "msg.txt"
+ f.write_text("tilde body")
+ assert _common.read_referenced_file("~/msg.txt") == "tilde body"
+
+
+# --- read_referenced_file: error / boundary cases --------------------------
+
+def test_read_referenced_file_missing_returns_none(tmp_path):
+ assert _common.read_referenced_file(str(tmp_path / "nope.txt")) is None
+
+
+def test_read_referenced_file_directory_returns_none(tmp_path):
+ # A directory is not a regular file.
+ assert _common.read_referenced_file(str(tmp_path)) is None
+
+
+def test_read_referenced_file_oversized_returns_none(tmp_path):
+ f = tmp_path / "big.txt"
+ f.write_text("x" * 50)
+ assert _common.read_referenced_file(str(f), max_bytes=10) is None
+
+
+def test_read_referenced_file_at_limit_is_read(tmp_path):
+ f = tmp_path / "edge.txt"
+ f.write_text("12345") # exactly 5 bytes
+ assert _common.read_referenced_file(str(f), max_bytes=5) == "12345"
+
+
+def test_read_referenced_file_invalid_utf8_returns_none(tmp_path):
+ f = tmp_path / "bin.dat"
+ f.write_bytes(b"\xff\xfe\x00bad")
+ assert _common.read_referenced_file(str(f)) is None
+
+
+def test_read_referenced_file_empty_string_path_returns_none():
+ assert _common.read_referenced_file("") is None
+
+
+# --- scan_attribution sanity (relied on by the file-backed tests) ----------
+
+def test_scan_attribution_catches_coauthor():
+ assert _common.scan_attribution("Co-Authored-By: Claude")
+
+
+def test_scan_attribution_catches_robot_emoji():
+ assert _common.scan_attribution("nice work \U0001F916")
+
+
+def test_scan_attribution_clean_text_empty():
+ assert _common.scan_attribution("fix: tidy up the parser") == []
diff --git a/hooks/tests/test_destructive_bash_confirm.py b/hooks/tests/test_destructive_bash_confirm.py
new file mode 100644
index 0000000..50302e6
--- /dev/null
+++ b/hooks/tests/test_destructive_bash_confirm.py
@@ -0,0 +1,137 @@
+"""Tests for hooks/destructive-bash-confirm.py — shlex-based rm -rf parsing."""
+
+from conftest import load_hook
+
+hook = load_hook("destructive-bash-confirm.py")
+
+SENTINEL = "(unparsed — shell too complex to inspect safely)"
+
+
+# --- detection of flag forms (combined / separate / reordered) -------------
+
+def test_rf_combined_detected():
+ assert hook.detect_rm_rf("rm -rf build") == ["build"]
+
+
+def test_r_f_separate_detected():
+ assert hook.detect_rm_rf("rm -r -f build") == ["build"]
+
+
+def test_fr_reordered_detected():
+ assert hook.detect_rm_rf("rm -fr build") == ["build"]
+
+
+def test_capital_R_detected():
+ assert hook.detect_rm_rf("rm -Rf build") == ["build"]
+
+
+def test_long_flags_detected():
+ targets = hook.detect_rm_rf("rm --recursive --force build")
+ assert targets == ["build"]
+
+
+# --- quoted / spaced paths now parse correctly -----------------------------
+
+def test_quoted_path_with_space_parsed():
+ assert hook.detect_rm_rf('rm -rf "my dir"') == ["my dir"]
+
+
+def test_multiple_targets():
+ assert hook.detect_rm_rf("rm -rf a b c") == ["a", "b", "c"]
+
+
+def test_double_dash_separates_flags_from_paths():
+ assert hook.detect_rm_rf("rm -rf -- -weird-name") == ["-weird-name"]
+
+
+# --- not-a-match cases: no modal -------------------------------------------
+
+def test_no_r_returns_none():
+ assert hook.detect_rm_rf("rm -f file") is None
+
+
+def test_no_f_returns_none():
+ assert hook.detect_rm_rf("rm -r dir") is None
+
+
+def test_not_rm_returns_none():
+ assert hook.detect_rm_rf("rmdir foo") is None
+
+
+def test_plain_rm_returns_none():
+ assert hook.detect_rm_rf("rm file.txt") is None
+
+
+# --- dangerous path banner still fires on parsed targets -------------------
+
+def test_home_var_target_flags_dangerous():
+ detection = hook.detect_destructive('rm -rf "$HOME/x"')
+ assert detection is not None
+ kind, ctx = detection
+ assert kind == "rm -rf"
+ assert "_banner" in ctx
+ assert "$HOME/x" in ctx["_banner"]
+
+
+def test_root_path_flags_dangerous():
+ detection = hook.detect_destructive("rm -rf /etc/foo")
+ assert detection is not None
+ _, ctx = detection
+ assert "_banner" in ctx
+
+
+def test_safe_relative_target_no_banner():
+ detection = hook.detect_destructive("rm -rf build/cache")
+ assert detection is not None
+ _, ctx = detection
+ assert "_banner" not in ctx
+
+
+# --- fail-toward-asking on ambiguity ---------------------------------------
+
+def test_compound_command_returns_sentinel():
+ # `ls && rm -rf foo` — the naive parser missed this; now we ask.
+ assert hook.detect_rm_rf("ls && rm -rf foo") == [SENTINEL]
+
+
+def test_pipeline_returns_sentinel():
+ assert hook.detect_rm_rf("find . -type d | xargs rm -rf") == [SENTINEL]
+
+
+def test_semicolon_returns_sentinel():
+ assert hook.detect_rm_rf("cd /tmp; rm -rf junk") == [SENTINEL]
+
+
+def test_command_substitution_returns_sentinel():
+ assert hook.detect_rm_rf("rm -rf $(echo target)") == [SENTINEL]
+
+
+def test_backtick_substitution_returns_sentinel():
+ assert hook.detect_rm_rf("rm -rf `echo target`") == [SENTINEL]
+
+
+def test_redirect_returns_sentinel():
+ assert hook.detect_rm_rf("rm -rf foo > /dev/null") == [SENTINEL]
+
+
+def test_unbalanced_quotes_returns_sentinel():
+ # shlex.split raises ValueError → ask anyway rather than silently pass.
+ assert hook.detect_rm_rf('rm -rf "unterminated') == [SENTINEL]
+
+
+def test_compound_without_rm_rf_returns_none():
+ # Compound construct but no dangerous rm — should not fire.
+ assert hook.detect_rm_rf("ls && echo done") is None
+
+
+def test_compound_with_rm_but_no_force_returns_none():
+ # `&&` present but the rm has no -f, so nothing to flag.
+ assert hook.detect_rm_rf("ls && rm -r dir") is None
+
+
+def test_sentinel_fires_modal_via_detect_destructive():
+ detection = hook.detect_destructive("ls && rm -rf foo")
+ assert detection is not None
+ kind, ctx = detection
+ assert kind == "rm -rf"
+ assert ctx["targets"] == [SENTINEL]
diff --git a/hooks/tests/test_gh_pr_create_confirm.py b/hooks/tests/test_gh_pr_create_confirm.py
new file mode 100644
index 0000000..19dde2e
--- /dev/null
+++ b/hooks/tests/test_gh_pr_create_confirm.py
@@ -0,0 +1,70 @@
+"""Tests for hooks/gh-pr-create-confirm.py — --body-file reads real content."""
+
+from conftest import load_hook
+
+hook = load_hook("gh-pr-create-confirm.py")
+
+
+# --- existing parsing still works (regression guard) -----------------------
+
+def test_parse_title_and_inline_body():
+ cmd = 'gh pr create --title "feat: thing" --body "does the thing"'
+ fields = hook.parse_pr_create(cmd)
+ assert fields["title"] == "feat: thing"
+ assert fields["body"] == "does the thing"
+
+
+def test_parse_reviewers():
+ cmd = 'gh pr create --title "x" --reviewer alice,bob'
+ fields = hook.parse_pr_create(cmd)
+ assert fields["reviewers"] == ["alice", "bob"]
+
+
+# --- new: --body-file reads the real content -------------------------------
+
+def test_body_file_reads_real_content(tmp_path):
+ f = tmp_path / "body.md"
+ f.write_text("## Problem\nthings broke\n\n## Fix\nfixed them\n")
+ fields = hook.parse_pr_create(f'gh pr create --title "x" --body-file {f}')
+ assert "things broke" in fields["body"]
+ assert "fixed them" in fields["body"]
+ # No longer the old placeholder.
+ assert not fields["body"].startswith("(body read from file")
+
+
+def test_body_file_attribution_is_caught(tmp_path):
+ f = tmp_path / "body.md"
+ f.write_text("## Summary\nshipped a feature \U0001F916 generated with Claude\n")
+ fields = hook.parse_pr_create(f'gh pr create --title "feat: x" --body-file {f}')
+ scan_text = "\n".join(
+ filter(None, [fields.get("title"), fields.get("body")])
+ )
+ hits = hook.scan_attribution(scan_text)
+ assert hits # robot emoji + 'Generated with Claude' both leak
+
+
+def test_body_file_clean_content_no_hits(tmp_path):
+ f = tmp_path / "body.md"
+ f.write_text("## Summary\nfixed the off-by-one in the pager\n")
+ fields = hook.parse_pr_create(f'gh pr create --title "fix: pager" --body-file {f}')
+ scan_text = "\n".join(
+ filter(None, [fields.get("title"), fields.get("body")])
+ )
+ assert hook.scan_attribution(scan_text) == []
+
+
+# --- unreadable file keeps an informative could-not-inspect placeholder ----
+
+def test_body_file_missing_keeps_could_not_inspect_placeholder(tmp_path):
+ missing = tmp_path / "nope.md"
+ fields = hook.parse_pr_create(f'gh pr create --title "x" --body-file {missing}')
+ assert "could not inspect" in fields["body"]
+ assert str(missing) in fields["body"]
+
+
+def test_body_file_oversized_keeps_placeholder(tmp_path, monkeypatch):
+ f = tmp_path / "big.md"
+ f.write_text("x" * 5000)
+ monkeypatch.setattr(hook, "read_referenced_file", lambda p: None)
+ fields = hook.parse_pr_create(f'gh pr create --title "x" --body-file {f}')
+ assert "could not inspect" in fields["body"]
diff --git a/hooks/tests/test_git_commit_confirm.py b/hooks/tests/test_git_commit_confirm.py
new file mode 100644
index 0000000..83519ad
--- /dev/null
+++ b/hooks/tests/test_git_commit_confirm.py
@@ -0,0 +1,97 @@
+"""Tests for hooks/git-commit-confirm.py — file-backed commit messages."""
+
+from conftest import load_hook
+
+hook = load_hook("git-commit-confirm.py")
+
+
+# --- existing forms still parse (regression guard) -------------------------
+
+def test_extract_dash_m_simple():
+ msg = hook.extract_commit_message('git commit -m "fix: tidy parser"')
+ assert msg == "fix: tidy parser"
+
+
+def test_extract_heredoc():
+ cmd = (
+ "git commit -m \"$(cat <<'EOF'\n"
+ "feat: add thing\n"
+ "\n"
+ "body line\n"
+ "EOF\n"
+ ")\""
+ )
+ msg = hook.extract_commit_message(cmd)
+ assert msg.startswith("feat: add thing")
+ assert "body line" in msg
+
+
+def test_extract_unparseable_falls_through():
+ # Bare `git commit` would drop into $EDITOR.
+ assert hook.extract_commit_message("git commit") == hook.UNPARSEABLE_MESSAGE
+
+
+# --- new: -F / --file / --file= forms read the file ------------------------
+
+def test_extract_dash_F_reads_file(tmp_path):
+ f = tmp_path / "msg.txt"
+ f.write_text("fix: from a file\n\nsome body\n")
+ assert hook.extract_commit_message(f"git commit -F {f}") == "fix: from a file\n\nsome body"
+
+
+def test_extract_long_file_flag_reads_file(tmp_path):
+ f = tmp_path / "msg.txt"
+ f.write_text("docs: long form file flag\n")
+ assert hook.extract_commit_message(f"git commit --file {f}") == "docs: long form file flag"
+
+
+def test_extract_file_equals_form_reads_file(tmp_path):
+ f = tmp_path / "msg.txt"
+ f.write_text("chore: equals form\n")
+ assert hook.extract_commit_message(f"git commit --file={f}") == "chore: equals form"
+
+
+def test_extract_F_strips_quotes_around_path(tmp_path):
+ f = tmp_path / "my msg.txt"
+ f.write_text("feat: quoted path\n")
+ assert hook.extract_commit_message(f'git commit -F "{f}"') == "feat: quoted path"
+
+
+# --- the audit-item bug: attribution in a file-backed message is now caught -
+
+def test_file_backed_attribution_is_caught(tmp_path):
+ f = tmp_path / "msg.txt"
+ f.write_text("feat: add widget\n\nCo-Authored-By: Claude <noreply@anthropic.com>\n")
+ msg = hook.extract_commit_message(f"git commit -F {f}")
+ issues = hook.collect_issues(msg, staged=["a.py"], author="Real Dev <dev@example.com>")
+ assert any(i.startswith("AI-attribution") for i in issues)
+
+
+def test_inline_message_without_attribution_is_clean(tmp_path):
+ # Sanity: a clean file-backed message produces no attribution issue.
+ f = tmp_path / "msg.txt"
+ f.write_text("fix: handle empty input\n")
+ msg = hook.extract_commit_message(f"git commit -F {f}")
+ issues = hook.collect_issues(msg, staged=["a.py"], author="Real Dev <dev@example.com>")
+ assert not any(i.startswith("AI-attribution") for i in issues)
+
+
+# --- unreadable file falls through to UNPARSEABLE (fail-safe: ask) ---------
+
+def test_missing_file_falls_through_to_unparseable(tmp_path):
+ missing = tmp_path / "nope.txt"
+ assert hook.extract_commit_message(f"git commit -F {missing}") == hook.UNPARSEABLE_MESSAGE
+
+
+def test_oversized_file_falls_through_and_hook_asks(tmp_path, monkeypatch):
+ f = tmp_path / "big.txt"
+ f.write_text("x" * 5000)
+ # Force the read to refuse via a tiny limit (simulates oversize).
+ monkeypatch.setattr(
+ hook, "read_referenced_file", lambda p, max_bytes=10: None
+ )
+ msg = hook.extract_commit_message(f"git commit -F {f}")
+ assert msg == hook.UNPARSEABLE_MESSAGE
+ # And the hook would ask, because UNPARSEABLE_MESSAGE is a flagged issue.
+ issues = hook.collect_issues(msg, staged=["a.py"], author="Dev <d@e.com>")
+ assert any("not parseable" in i for i in issues)