1 files changed, 77 insertions, 9 deletions
diff --git a/hooks/destructive-bash-confirm.py b/hooks/destructive-bash-confirm.py
index c1cf5f9..be8b491 100755
--- a/hooks/destructive-bash-confirm.py
+++ b/hooks/destructive-bash-confirm.py
@@ -14,9 +14,20 @@ banner via systemMessage. First match wins — a command with multiple
 destructive patterns fires on the first detected.
 
 Non-destructive Bash calls exit 0 silent.
+
+Shell-parsing scope for `rm -rf` detection: `detect_rm_rf` tokenizes the
+command with `shlex.split`, so it handles a single simple command and its
+quoting/escaping (combined flags `-rf`, separate `-r -f`, reordered `-fr`,
+quoted/spaced paths). It does NOT model pipelines, compound commands
+(`;`, `&&`, `||`, `|`), command substitution (`$(...)`, backticks),
+redirects, aliases, or variable/glob expansion. When any of those appear
+alongside a forced recursive `rm` — or when the quoting is unbalanced —
+target attribution is unreliable, so the function returns a sentinel and
+the modal fires anyway (fail toward asking) rather than silently passing.
 """
 
 import re
+import shlex
 import subprocess
 import sys
 from typing import Optional
@@ -26,6 +37,13 @@ from _common import read_payload, respond_ask
 
 PROTECTED_BRANCHES = {"main", "master", "develop", "release", "prod", "production"}
 
+# Returned as the sole target when the command is a forced recursive rm but
+# the shell is too complex to attribute targets safely. Forces the modal.
+UNPARSEABLE_RM_TARGET = "(unparsed — shell too complex to inspect safely)"
+
+# Constructs that make single-command target attribution unreliable.
+_COMPLEX_SHELL = (";", "&&", "||", "|", "$(", "`", ">", "<")
+
 
 def main() -> int:
     payload = read_payload()
@@ -90,6 +108,8 @@ def detect_destructive(cmd: str) -> Optional[tuple[str, dict]]:
             if t in ("/", "~", "$HOME", ".", "..", "*")
             or t.startswith("/")
             or t.startswith("~")
+            or t.startswith("$HOME")
+            or "*" in t
         ]
         if dangerous:
             ctx["_banner"] = (
@@ -114,25 +134,73 @@ def is_force_push(cmd: str) -> bool:
 
 
 def detect_rm_rf(cmd: str) -> Optional[list[str]]:
-    """If cmd invokes `rm` with both -r/-R and -f flags, return its targets."""
-    m = re.search(r"(?:^|[\s;&|()])rm\s+(.+)$", cmd)
-    if not m:
+    """If cmd invokes `rm` with both -r/-R and -f flags, return its targets.
+
+    Tokenizes with `shlex.split`, so quoted/spaced paths and combined
+    (`-rf`), separate (`-r -f`), or reordered (`-fr`) flags all parse. The
+    long forms `--recursive` and `--force` count too. Returns:
+
+      - the target list (without flags) for a forced recursive rm,
+      - None when the command is not a forced recursive rm (no -r, no -f,
+        or not an `rm` at all) — no modal,
+      - [UNPARSEABLE_RM_TARGET] when a forced recursive rm is present but
+        the shell is too complex to attribute targets safely (compound
+        command, pipeline, command substitution, redirect) or the quoting
+        is unbalanced — fail toward asking.
+
+    shlex models a single simple command and its quoting only. It does not
+    model pipelines, compound commands, aliases, or variable/glob
+    expansion — those fall to the [UNPARSEABLE_RM_TARGET] ask path.
+    """
+    # Quick reject: no `rm` word at all.
+    if not re.search(r"(?:^|[\s;&|()])rm\b", cmd):
+        return None
+
+    complex_shell = any(tok in cmd for tok in _COMPLEX_SHELL)
+
+    try:
+        tokens = shlex.split(cmd)
+    except ValueError:
+        # Unbalanced quotes — can't tokenize. If an rm appears at all, ask.
+        return [UNPARSEABLE_RM_TARGET]
+
+    # Walk tokens; find the first `rm` and parse the flags/targets that
+    # immediately follow it within the same simple command.
+    try:
+        rm_idx = tokens.index("rm")
+    except ValueError:
         return None
 
-    rest = m.group(1).split()
-    flag_chars = ""
+    rest = tokens[rm_idx + 1:]
+    has_r = False
+    has_f = False
     i = 0
     while i < len(rest) and rest[i].startswith("-") and rest[i] != "--":
-        flag_chars += rest[i][1:]
+        tok = rest[i]
+        if tok in ("--recursive",):
+            has_r = True
+        elif tok in ("--force",):
+            has_f = True
+        elif tok.startswith("--"):
+            pass  # some other long flag — ignore
+        else:
+            short = tok[1:]
+            if re.search(r"[rR]", short):
+                has_r = True
+            if "f" in short:
+                has_f = True
         i += 1
-    if rest[i:i+1] == ["--"]:
+    if rest[i:i + 1] == ["--"]:
         i += 1
 
-    has_r = bool(re.search(r"[rR]", flag_chars))
-    has_f = "f" in flag_chars
     if not (has_r and has_f):
         return None
 
+    # Forced recursive rm confirmed. If the surrounding shell is too complex
+    # to trust target attribution, ask anyway rather than guess.
+    if complex_shell:
+        return [UNPARSEABLE_RM_TARGET]
+
     return rest[i:]