10 files changed, 550 insertions, 13 deletions
diff --git a/Makefile b/Makefile
index 8d433cf..3508510 100644
--- a/Makefile
+++ b/Makefile
@@ -418,6 +418,7 @@ catchup-machine: ## Pull rulesets, refresh install, sync .ai/ across projects, v
 
 test: ## Run all test suites (pytest + ERT + bats)
 	@cd .ai/scripts/tests && python3 -m pytest
+	@cd hooks/tests && python3 -m pytest
 	@set -e; for f in .ai/scripts/tests/test-*.el; do \
 		[ -e "$$f" ] || continue; \
 		echo "ert: $$(basename "$$f")"; \
diff --git a/hooks/_common.py b/hooks/_common.py
index d4bf520..e82f7ed 100644
--- a/hooks/_common.py
+++ b/hooks/_common.py
@@ -16,6 +16,7 @@ the tools themselves.
 """
 
 import json
+import os
 import re
 import sys
 from typing import Optional
@@ -64,6 +65,25 @@ def respond_ask(reason: str, system_message: Optional[str] = None) -> None:
     print(json.dumps(output))
 
 
+def read_referenced_file(path: str, max_bytes: int = 1_000_000) -> Optional[str]:
+    """Read a local file referenced by -F/--file/--body-file so its text can be
+    attribution-scanned. Return the text, or None if it can't be safely read
+    (missing, not a regular file, larger than max_bytes, or not valid UTF-8).
+    None means 'could not inspect', never 'clean'."""
+    if not path:
+        return None
+    try:
+        expanded = os.path.expanduser(path)
+        if not os.path.isfile(expanded):
+            return None
+        if os.path.getsize(expanded) > max_bytes:
+            return None
+        with open(expanded, "r", encoding="utf-8", errors="strict") as fh:
+            return fh.read()
+    except (OSError, UnicodeDecodeError):
+        return None
+
+
 def scan_attribution(text: str) -> list[str]:
     """Return human-readable descriptions of any AI-attribution hits."""
     if not text:
diff --git a/hooks/destructive-bash-confirm.py b/hooks/destructive-bash-confirm.py
index c1cf5f9..be8b491 100755
--- a/hooks/destructive-bash-confirm.py
+++ b/hooks/destructive-bash-confirm.py
@@ -14,9 +14,20 @@ banner via systemMessage. First match wins — a command with multiple
 destructive patterns fires on the first detected.
 
 Non-destructive Bash calls exit 0 silent.
+
+Shell-parsing scope for `rm -rf` detection: `detect_rm_rf` tokenizes the
+command with `shlex.split`, so it handles a single simple command and its
+quoting/escaping (combined flags `-rf`, separate `-r -f`, reordered `-fr`,
+quoted/spaced paths). It does NOT model pipelines, compound commands
+(`;`, `&&`, `||`, `|`), command substitution (`$(...)`, backticks),
+redirects, aliases, or variable/glob expansion. When any of those appear
+alongside a forced recursive `rm` — or when the quoting is unbalanced —
+target attribution is unreliable, so the function returns a sentinel and
+the modal fires anyway (fail toward asking) rather than silently passing.
 """
 
 import re
+import shlex
 import subprocess
 import sys
 from typing import Optional
@@ -26,6 +37,13 @@ from _common import read_payload, respond_ask
 
 PROTECTED_BRANCHES = {"main", "master", "develop", "release", "prod", "production"}
 
+# Returned as the sole target when the command is a forced recursive rm but
+# the shell is too complex to attribute targets safely. Forces the modal.
+UNPARSEABLE_RM_TARGET = "(unparsed — shell too complex to inspect safely)"
+
+# Constructs that make single-command target attribution unreliable.
+_COMPLEX_SHELL = (";", "&&", "||", "|", "$(", "`", ">", "<")
+
 
 def main() -> int:
     payload = read_payload()
@@ -90,6 +108,8 @@ def detect_destructive(cmd: str) -> Optional[tuple[str, dict]]:
             if t in ("/", "~", "$HOME", ".", "..", "*")
             or t.startswith("/")
             or t.startswith("~")
+            or t.startswith("$HOME")
+            or "*" in t
         ]
         if dangerous:
             ctx["_banner"] = (
@@ -114,25 +134,73 @@ def is_force_push(cmd: str) -> bool:
 
 
 def detect_rm_rf(cmd: str) -> Optional[list[str]]:
-    """If cmd invokes `rm` with both -r/-R and -f flags, return its targets."""
-    m = re.search(r"(?:^|[\s;&|()])rm\s+(.+)$", cmd)
-    if not m:
+    """If cmd invokes `rm` with both -r/-R and -f flags, return its targets.
+
+    Tokenizes with `shlex.split`, so quoted/spaced paths and combined
+    (`-rf`), separate (`-r -f`), or reordered (`-fr`) flags all parse. The
+    long forms `--recursive` and `--force` count too. Returns:
+
+      - the target list (without flags) for a forced recursive rm,
+      - None when the command is not a forced recursive rm (no -r, no -f,
+        or not an `rm` at all) — no modal,
+      - [UNPARSEABLE_RM_TARGET] when a forced recursive rm is present but
+        the shell is too complex to attribute targets safely (compound
+        command, pipeline, command substitution, redirect) or the quoting
+        is unbalanced — fail toward asking.
+
+    shlex models a single simple command and its quoting only. It does not
+    model pipelines, compound commands, aliases, or variable/glob
+    expansion — those fall to the [UNPARSEABLE_RM_TARGET] ask path.
+    """
+    # Quick reject: no `rm` word at all.
+    if not re.search(r"(?:^|[\s;&|()])rm\b", cmd):
+        return None
+
+    complex_shell = any(tok in cmd for tok in _COMPLEX_SHELL)
+
+    try:
+        tokens = shlex.split(cmd)
+    except ValueError:
+        # Unbalanced quotes — can't tokenize. If an rm appears at all, ask.
+        return [UNPARSEABLE_RM_TARGET]
+
+    # Walk tokens; find the first `rm` and parse the flags/targets that
+    # immediately follow it within the same simple command.
+    try:
+        rm_idx = tokens.index("rm")
+    except ValueError:
         return None
 
-    rest = m.group(1).split()
-    flag_chars = ""
+    rest = tokens[rm_idx + 1:]
+    has_r = False
+    has_f = False
     i = 0
     while i < len(rest) and rest[i].startswith("-") and rest[i] != "--":
-        flag_chars += rest[i][1:]
+        tok = rest[i]
+        if tok in ("--recursive",):
+            has_r = True
+        elif tok in ("--force",):
+            has_f = True
+        elif tok.startswith("--"):
+            pass  # some other long flag — ignore
+        else:
+            short = tok[1:]
+            if re.search(r"[rR]", short):
+                has_r = True
+            if "f" in short:
+                has_f = True
         i += 1
-    if rest[i:i+1] == ["--"]:
+    if rest[i:i + 1] == ["--"]:
         i += 1
 
-    has_r = bool(re.search(r"[rR]", flag_chars))
-    has_f = "f" in flag_chars
     if not (has_r and has_f):
         return None
 
+    # Forced recursive rm confirmed. If the surrounding shell is too complex
+    # to trust target attribution, ask anyway rather than guess.
+    if complex_shell:
+        return [UNPARSEABLE_RM_TARGET]
+
     return rest[i:]
 
 
diff --git a/hooks/gh-pr-create-confirm.py b/hooks/gh-pr-create-confirm.py
index e3c2f13..f539551 100755
--- a/hooks/gh-pr-create-confirm.py
+++ b/hooks/gh-pr-create-confirm.py
@@ -27,7 +27,12 @@ Wire in ~/.claude/settings.json alongside git-commit-confirm.py:
 import re
 import sys
 
-from _common import read_payload, respond_ask, scan_attribution
+from _common import (
+    read_payload,
+    read_referenced_file,
+    respond_ask,
+    scan_attribution,
+)
 
 
 MAX_BODY_LINES = 20
@@ -91,9 +96,16 @@ def parse_pr_create(cmd: str) -> dict:
         if b:
             fields["body"] = b.group(2).strip()
         else:
-            bf = re.search(r"--body-file\s+(\S+)", cmd)
+            bf = re.search(r"--body-file\s+(\"[^\"]+\"|'[^']+'|\S+)", cmd)
             if bf:
-                fields["body"] = f"(body read from file: {bf.group(1)})"
+                path = bf.group(1).strip("\"'")
+                text = read_referenced_file(path)
+                if text is not None:
+                    fields["body"] = text.strip()
+                else:
+                    fields["body"] = (
+                        f"(body read from file: {path} — could not inspect)"
+                    )
 
     # Base / head
     base = re.search(r"--base\s+(\S+)", cmd)
diff --git a/hooks/git-commit-confirm.py b/hooks/git-commit-confirm.py
index 2441d23..618ac20 100755
--- a/hooks/git-commit-confirm.py
+++ b/hooks/git-commit-confirm.py
@@ -42,7 +42,12 @@ import re
 import subprocess
 import sys
 
-from _common import read_payload, respond_ask, scan_attribution
+from _common import (
+    read_payload,
+    read_referenced_file,
+    respond_ask,
+    scan_attribution,
+)
 
 
 MAX_FILES_SHOWN = 25
@@ -140,6 +145,18 @@ def extract_commit_message(cmd: str) -> str:
     if long_form:
         return "\n\n".join(msg for _, msg in long_form).strip()
 
+    # File-backed message: -F <file> / --file <file> / --file=<file>.
+    # Read the file so its text is attribution-scanned. If it can't be read
+    # safely, fall through to UNPARSEABLE_MESSAGE so the hook asks (fail-safe).
+    file_flag = re.search(
+        r"(?:^|\s)(?:-F|--file)[=\s]+(\"[^\"]+\"|'[^']+'|\S+)", cmd
+    )
+    if file_flag:
+        path = file_flag.group(1).strip("\"'")
+        text = read_referenced_file(path)
+        if text is not None:
+            return text.strip()
+
     return UNPARSEABLE_MESSAGE
 
 
diff --git a/hooks/tests/conftest.py b/hooks/tests/conftest.py
new file mode 100644
index 0000000..72c7920
--- /dev/null
+++ b/hooks/tests/conftest.py
@@ -0,0 +1,41 @@
+"""Pytest harness for the hook scripts under hooks/.
+
+Hook filenames are hyphenated (git-commit-confirm.py, etc.), so they
+cannot be imported by module name. `load_hook(filename)` loads them by
+path via importlib, and inserts hooks/ onto sys.path first so each hook's
+own `from _common import ...` resolves.
+"""
+
+import importlib.util
+import sys
+from pathlib import Path
+
+import pytest
+
+HOOKS_DIR = Path(__file__).resolve().parent.parent
+
+
+def load_hook(filename: str):
+    """Load a hook script by filename and return its module object.
+
+    Inserts hooks/ onto sys.path (idempotently) so the hook's
+    `from _common import ...` works, then loads the file by path under a
+    sanitized module name.
+    """
+    hooks_dir = str(HOOKS_DIR)
+    if hooks_dir not in sys.path:
+        sys.path.insert(0, hooks_dir)
+
+    path = HOOKS_DIR / filename
+    mod_name = "hook_" + filename.replace("-", "_").replace(".py", "")
+    spec = importlib.util.spec_from_file_location(mod_name, path)
+    if spec is None or spec.loader is None:
+        raise ImportError(f"could not load hook from {path}")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+@pytest.fixture
+def load_hook_fixture():
+    return load_hook
diff --git a/hooks/tests/test_common.py b/hooks/tests/test_common.py
new file mode 100644
index 0000000..10e6097
--- /dev/null
+++ b/hooks/tests/test_common.py
@@ -0,0 +1,74 @@
+"""Tests for hooks/_common.py — read_referenced_file and scan_attribution."""
+
+import sys
+from pathlib import Path
+
+HOOKS_DIR = Path(__file__).resolve().parent.parent
+if str(HOOKS_DIR) not in sys.path:
+    sys.path.insert(0, str(HOOKS_DIR))
+
+import _common  # noqa: E402
+
+
+# --- read_referenced_file: normal cases ------------------------------------
+
+def test_read_referenced_file_returns_content(tmp_path):
+    f = tmp_path / "msg.txt"
+    f.write_text("hello world\n")
+    assert _common.read_referenced_file(str(f)) == "hello world\n"
+
+
+def test_read_referenced_file_expands_user(tmp_path, monkeypatch):
+    # Point HOME at tmp_path so ~/msg.txt resolves under it.
+    monkeypatch.setenv("HOME", str(tmp_path))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path, raising=False)
+    f = tmp_path / "msg.txt"
+    f.write_text("tilde body")
+    assert _common.read_referenced_file("~/msg.txt") == "tilde body"
+
+
+# --- read_referenced_file: error / boundary cases --------------------------
+
+def test_read_referenced_file_missing_returns_none(tmp_path):
+    assert _common.read_referenced_file(str(tmp_path / "nope.txt")) is None
+
+
+def test_read_referenced_file_directory_returns_none(tmp_path):
+    # A directory is not a regular file.
+    assert _common.read_referenced_file(str(tmp_path)) is None
+
+
+def test_read_referenced_file_oversized_returns_none(tmp_path):
+    f = tmp_path / "big.txt"
+    f.write_text("x" * 50)
+    assert _common.read_referenced_file(str(f), max_bytes=10) is None
+
+
+def test_read_referenced_file_at_limit_is_read(tmp_path):
+    f = tmp_path / "edge.txt"
+    f.write_text("12345")  # exactly 5 bytes
+    assert _common.read_referenced_file(str(f), max_bytes=5) == "12345"
+
+
+def test_read_referenced_file_invalid_utf8_returns_none(tmp_path):
+    f = tmp_path / "bin.dat"
+    f.write_bytes(b"\xff\xfe\x00bad")
+    assert _common.read_referenced_file(str(f)) is None
+
+
+def test_read_referenced_file_empty_string_path_returns_none():
+    assert _common.read_referenced_file("") is None
+
+
+# --- scan_attribution sanity (relied on by the file-backed tests) ----------
+
+def test_scan_attribution_catches_coauthor():
+    assert _common.scan_attribution("Co-Authored-By: Claude")
+
+
+def test_scan_attribution_catches_robot_emoji():
+    assert _common.scan_attribution("nice work \U0001F916")
+
+
+def test_scan_attribution_clean_text_empty():
+    assert _common.scan_attribution("fix: tidy up the parser") == []
diff --git a/hooks/tests/test_destructive_bash_confirm.py b/hooks/tests/test_destructive_bash_confirm.py
new file mode 100644
index 0000000..50302e6
--- /dev/null
+++ b/hooks/tests/test_destructive_bash_confirm.py
@@ -0,0 +1,137 @@
+"""Tests for hooks/destructive-bash-confirm.py — shlex-based rm -rf parsing."""
+
+from conftest import load_hook
+
+hook = load_hook("destructive-bash-confirm.py")
+
+SENTINEL = "(unparsed — shell too complex to inspect safely)"
+
+
+# --- detection of flag forms (combined / separate / reordered) -------------
+
+def test_rf_combined_detected():
+    assert hook.detect_rm_rf("rm -rf build") == ["build"]
+
+
+def test_r_f_separate_detected():
+    assert hook.detect_rm_rf("rm -r -f build") == ["build"]
+
+
+def test_fr_reordered_detected():
+    assert hook.detect_rm_rf("rm -fr build") == ["build"]
+
+
+def test_capital_R_detected():
+    assert hook.detect_rm_rf("rm -Rf build") == ["build"]
+
+
+def test_long_flags_detected():
+    targets = hook.detect_rm_rf("rm --recursive --force build")
+    assert targets == ["build"]
+
+
+# --- quoted / spaced paths now parse correctly -----------------------------
+
+def test_quoted_path_with_space_parsed():
+    assert hook.detect_rm_rf('rm -rf "my dir"') == ["my dir"]
+
+
+def test_multiple_targets():
+    assert hook.detect_rm_rf("rm -rf a b c") == ["a", "b", "c"]
+
+
+def test_double_dash_separates_flags_from_paths():
+    assert hook.detect_rm_rf("rm -rf -- -weird-name") == ["-weird-name"]
+
+
+# --- not-a-match cases: no modal -------------------------------------------
+
+def test_no_r_returns_none():
+    assert hook.detect_rm_rf("rm -f file") is None
+
+
+def test_no_f_returns_none():
+    assert hook.detect_rm_rf("rm -r dir") is None
+
+
+def test_not_rm_returns_none():
+    assert hook.detect_rm_rf("rmdir foo") is None
+
+
+def test_plain_rm_returns_none():
+    assert hook.detect_rm_rf("rm file.txt") is None
+
+
+# --- dangerous path banner still fires on parsed targets -------------------
+
+def test_home_var_target_flags_dangerous():
+    detection = hook.detect_destructive('rm -rf "$HOME/x"')
+    assert detection is not None
+    kind, ctx = detection
+    assert kind == "rm -rf"
+    assert "_banner" in ctx
+    assert "$HOME/x" in ctx["_banner"]
+
+
+def test_root_path_flags_dangerous():
+    detection = hook.detect_destructive("rm -rf /etc/foo")
+    assert detection is not None
+    _, ctx = detection
+    assert "_banner" in ctx
+
+
+def test_safe_relative_target_no_banner():
+    detection = hook.detect_destructive("rm -rf build/cache")
+    assert detection is not None
+    _, ctx = detection
+    assert "_banner" not in ctx
+
+
+# --- fail-toward-asking on ambiguity ---------------------------------------
+
+def test_compound_command_returns_sentinel():
+    # `ls && rm -rf foo` — the naive parser missed this; now we ask.
+    assert hook.detect_rm_rf("ls && rm -rf foo") == [SENTINEL]
+
+
+def test_pipeline_returns_sentinel():
+    assert hook.detect_rm_rf("find . -type d | xargs rm -rf") == [SENTINEL]
+
+
+def test_semicolon_returns_sentinel():
+    assert hook.detect_rm_rf("cd /tmp; rm -rf junk") == [SENTINEL]
+
+
+def test_command_substitution_returns_sentinel():
+    assert hook.detect_rm_rf("rm -rf $(echo target)") == [SENTINEL]
+
+
+def test_backtick_substitution_returns_sentinel():
+    assert hook.detect_rm_rf("rm -rf `echo target`") == [SENTINEL]
+
+
+def test_redirect_returns_sentinel():
+    assert hook.detect_rm_rf("rm -rf foo > /dev/null") == [SENTINEL]
+
+
+def test_unbalanced_quotes_returns_sentinel():
+    # shlex.split raises ValueError → ask anyway rather than silently pass.
+    assert hook.detect_rm_rf('rm -rf "unterminated') == [SENTINEL]
+
+
+def test_compound_without_rm_rf_returns_none():
+    # Compound construct but no dangerous rm — should not fire.
+    assert hook.detect_rm_rf("ls && echo done") is None
+
+
+def test_compound_with_rm_but_no_force_returns_none():
+    # `&&` present but the rm has no -f, so nothing to flag.
+    assert hook.detect_rm_rf("ls && rm -r dir") is None
+
+
+def test_sentinel_fires_modal_via_detect_destructive():
+    detection = hook.detect_destructive("ls && rm -rf foo")
+    assert detection is not None
+    kind, ctx = detection
+    assert kind == "rm -rf"
+    assert ctx["targets"] == [SENTINEL]
diff --git a/hooks/tests/test_gh_pr_create_confirm.py b/hooks/tests/test_gh_pr_create_confirm.py
new file mode 100644
index 0000000..19dde2e
--- /dev/null
+++ b/hooks/tests/test_gh_pr_create_confirm.py
@@ -0,0 +1,70 @@
+"""Tests for hooks/gh-pr-create-confirm.py — --body-file reads real content."""
+
+from conftest import load_hook
+
+hook = load_hook("gh-pr-create-confirm.py")
+
+
+# --- existing parsing still works (regression guard) -----------------------
+
+def test_parse_title_and_inline_body():
+    cmd = 'gh pr create --title "feat: thing" --body "does the thing"'
+    fields = hook.parse_pr_create(cmd)
+    assert fields["title"] == "feat: thing"
+    assert fields["body"] == "does the thing"
+
+
+def test_parse_reviewers():
+    cmd = 'gh pr create --title "x" --reviewer alice,bob'
+    fields = hook.parse_pr_create(cmd)
+    assert fields["reviewers"] == ["alice", "bob"]
+
+
+# --- new: --body-file reads the real content -------------------------------
+
+def test_body_file_reads_real_content(tmp_path):
+    f = tmp_path / "body.md"
+    f.write_text("## Problem\nthings broke\n\n## Fix\nfixed them\n")
+    fields = hook.parse_pr_create(f'gh pr create --title "x" --body-file {f}')
+    assert "things broke" in fields["body"]
+    assert "fixed them" in fields["body"]
+    # No longer the old placeholder.
+    assert not fields["body"].startswith("(body read from file")
+
+
+def test_body_file_attribution_is_caught(tmp_path):
+    f = tmp_path / "body.md"
+    f.write_text("## Summary\nshipped a feature \U0001F916 generated with Claude\n")
+    fields = hook.parse_pr_create(f'gh pr create --title "feat: x" --body-file {f}')
+    scan_text = "\n".join(
+        filter(None, [fields.get("title"), fields.get("body")])
+    )
+    hits = hook.scan_attribution(scan_text)
+    assert hits  # robot emoji + 'Generated with Claude' both leak
+
+
+def test_body_file_clean_content_no_hits(tmp_path):
+    f = tmp_path / "body.md"
+    f.write_text("## Summary\nfixed the off-by-one in the pager\n")
+    fields = hook.parse_pr_create(f'gh pr create --title "fix: pager" --body-file {f}')
+    scan_text = "\n".join(
+        filter(None, [fields.get("title"), fields.get("body")])
+    )
+    assert hook.scan_attribution(scan_text) == []
+
+
+# --- unreadable file keeps an informative could-not-inspect placeholder ----
+
+def test_body_file_missing_keeps_could_not_inspect_placeholder(tmp_path):
+    missing = tmp_path / "nope.md"
+    fields = hook.parse_pr_create(f'gh pr create --title "x" --body-file {missing}')
+    assert "could not inspect" in fields["body"]
+    assert str(missing) in fields["body"]
+
+
+def test_body_file_oversized_keeps_placeholder(tmp_path, monkeypatch):
+    f = tmp_path / "big.md"
+    f.write_text("x" * 5000)
+    monkeypatch.setattr(hook, "read_referenced_file", lambda p: None)
+    fields = hook.parse_pr_create(f'gh pr create --title "x" --body-file {f}')
+    assert "could not inspect" in fields["body"]
diff --git a/hooks/tests/test_git_commit_confirm.py b/hooks/tests/test_git_commit_confirm.py
new file mode 100644
index 0000000..83519ad
--- /dev/null
+++ b/hooks/tests/test_git_commit_confirm.py
@@ -0,0 +1,97 @@
+"""Tests for hooks/git-commit-confirm.py — file-backed commit messages."""
+
+from conftest import load_hook
+
+hook = load_hook("git-commit-confirm.py")
+
+
+# --- existing forms still parse (regression guard) -------------------------
+
+def test_extract_dash_m_simple():
+    msg = hook.extract_commit_message('git commit -m "fix: tidy parser"')
+    assert msg == "fix: tidy parser"
+
+
+def test_extract_heredoc():
+    cmd = (
+        "git commit -m \"$(cat <<'EOF'\n"
+        "feat: add thing\n"
+        "\n"
+        "body line\n"
+        "EOF\n"
+        ")\""
+    )
+    msg = hook.extract_commit_message(cmd)
+    assert msg.startswith("feat: add thing")
+    assert "body line" in msg
+
+
+def test_extract_unparseable_falls_through():
+    # Bare `git commit` would drop into $EDITOR.
+    assert hook.extract_commit_message("git commit") == hook.UNPARSEABLE_MESSAGE
+
+
+# --- new: -F / --file / --file= forms read the file ------------------------
+
+def test_extract_dash_F_reads_file(tmp_path):
+    f = tmp_path / "msg.txt"
+    f.write_text("fix: from a file\n\nsome body\n")
+    assert hook.extract_commit_message(f"git commit -F {f}") == "fix: from a file\n\nsome body"
+
+
+def test_extract_long_file_flag_reads_file(tmp_path):
+    f = tmp_path / "msg.txt"
+    f.write_text("docs: long form file flag\n")
+    assert hook.extract_commit_message(f"git commit --file {f}") == "docs: long form file flag"
+
+
+def test_extract_file_equals_form_reads_file(tmp_path):
+    f = tmp_path / "msg.txt"
+    f.write_text("chore: equals form\n")
+    assert hook.extract_commit_message(f"git commit --file={f}") == "chore: equals form"
+
+
+def test_extract_F_strips_quotes_around_path(tmp_path):
+    f = tmp_path / "my msg.txt"
+    f.write_text("feat: quoted path\n")
+    assert hook.extract_commit_message(f'git commit -F "{f}"') == "feat: quoted path"
+
+
+# --- the audit-item bug: attribution in a file-backed message is now caught -
+
+def test_file_backed_attribution_is_caught(tmp_path):
+    f = tmp_path / "msg.txt"
+    f.write_text("feat: add widget\n\nCo-Authored-By: Claude <noreply@anthropic.com>\n")
+    msg = hook.extract_commit_message(f"git commit -F {f}")
+    issues = hook.collect_issues(msg, staged=["a.py"], author="Real Dev <dev@example.com>")
+    assert any(i.startswith("AI-attribution") for i in issues)
+
+
+def test_inline_message_without_attribution_is_clean(tmp_path):
+    # Sanity: a clean file-backed message produces no attribution issue.
+    f = tmp_path / "msg.txt"
+    f.write_text("fix: handle empty input\n")
+    msg = hook.extract_commit_message(f"git commit -F {f}")
+    issues = hook.collect_issues(msg, staged=["a.py"], author="Real Dev <dev@example.com>")
+    assert not any(i.startswith("AI-attribution") for i in issues)
+
+
+# --- unreadable file falls through to UNPARSEABLE (fail-safe: ask) ---------
+
+def test_missing_file_falls_through_to_unparseable(tmp_path):
+    missing = tmp_path / "nope.txt"
+    assert hook.extract_commit_message(f"git commit -F {missing}") == hook.UNPARSEABLE_MESSAGE
+
+
+def test_oversized_file_falls_through_and_hook_asks(tmp_path, monkeypatch):
+    f = tmp_path / "big.txt"
+    f.write_text("x" * 5000)
+    # Force the read to refuse via a tiny limit (simulates oversize).
+    monkeypatch.setattr(
+        hook, "read_referenced_file", lambda p, max_bytes=10: None
+    )
+    msg = hook.extract_commit_message(f"git commit -F {f}")
+    assert msg == hook.UNPARSEABLE_MESSAGE
+    # And the hook would ask, because UNPARSEABLE_MESSAGE is a flagged issue.
+    issues = hook.collect_issues(msg, staged=["a.py"], author="Dev <d@e.com>")
+    assert any("not parseable" in i for i in issues)