aboutsummaryrefslogtreecommitdiff
path: root/hooks/destructive-bash-confirm.py
diff options
context:
space:
mode:
Diffstat (limited to 'hooks/destructive-bash-confirm.py')
-rwxr-xr-xhooks/destructive-bash-confirm.py86
1 files changed, 77 insertions, 9 deletions
diff --git a/hooks/destructive-bash-confirm.py b/hooks/destructive-bash-confirm.py
index c1cf5f9..be8b491 100755
--- a/hooks/destructive-bash-confirm.py
+++ b/hooks/destructive-bash-confirm.py
@@ -14,9 +14,20 @@ banner via systemMessage. First match wins — a command with multiple
destructive patterns fires on the first detected.
Non-destructive Bash calls exit 0 silent.
+
+Shell-parsing scope for `rm -rf` detection: `detect_rm_rf` tokenizes the
+command with `shlex.split`, so it handles a single simple command and its
+quoting/escaping (combined flags `-rf`, separate `-r -f`, reordered `-fr`,
+quoted/spaced paths). It does NOT model pipelines, compound commands
+(`;`, `&&`, `||`, `|`), command substitution (`$(...)`, backticks),
+redirects, aliases, or variable/glob expansion. When any of those appear
+alongside a forced recursive `rm` — or when the quoting is unbalanced —
+target attribution is unreliable, so the function returns a sentinel and
+the modal fires anyway (fail toward asking) rather than silently passing.
"""
import re
+import shlex
import subprocess
import sys
from typing import Optional
@@ -26,6 +37,13 @@ from _common import read_payload, respond_ask
PROTECTED_BRANCHES = {"main", "master", "develop", "release", "prod", "production"}
+# Returned as the sole target when the command is a forced recursive rm but
+# the shell is too complex to attribute targets safely. Forces the modal.
+UNPARSEABLE_RM_TARGET = "(unparsed — shell too complex to inspect safely)"
+
+# Constructs that make single-command target attribution unreliable.
+_COMPLEX_SHELL = (";", "&&", "||", "|", "$(", "`", ">", "<")
+
def main() -> int:
payload = read_payload()
@@ -90,6 +108,8 @@ def detect_destructive(cmd: str) -> Optional[tuple[str, dict]]:
if t in ("/", "~", "$HOME", ".", "..", "*")
or t.startswith("/")
or t.startswith("~")
+ or t.startswith("$HOME")
+ or "*" in t
]
if dangerous:
ctx["_banner"] = (
@@ -114,25 +134,73 @@ def is_force_push(cmd: str) -> bool:
def detect_rm_rf(cmd: str) -> Optional[list[str]]:
- """If cmd invokes `rm` with both -r/-R and -f flags, return its targets."""
- m = re.search(r"(?:^|[\s;&|()])rm\s+(.+)$", cmd)
- if not m:
+ """If cmd invokes `rm` with both -r/-R and -f flags, return its targets.
+
+ Tokenizes with `shlex.split`, so quoted/spaced paths and combined
+ (`-rf`), separate (`-r -f`), or reordered (`-fr`) flags all parse. The
+ long forms `--recursive` and `--force` count too. Returns:
+
+ - the target list (without flags) for a forced recursive rm,
+ - None when the command is not a forced recursive rm (no -r, no -f,
+ or not an `rm` at all) — no modal,
+ - [UNPARSEABLE_RM_TARGET] when a forced recursive rm is present but
+ the shell is too complex to attribute targets safely (compound
+ command, pipeline, command substitution, redirect) or the quoting
+ is unbalanced — fail toward asking.
+
+ shlex models a single simple command and its quoting only. It does not
+ model pipelines, compound commands, aliases, or variable/glob
+ expansion — those fall to the [UNPARSEABLE_RM_TARGET] ask path.
+ """
+ # Quick reject: no `rm` word at all.
+ if not re.search(r"(?:^|[\s;&|()])rm\b", cmd):
+ return None
+
+ complex_shell = any(tok in cmd for tok in _COMPLEX_SHELL)
+
+ try:
+ tokens = shlex.split(cmd)
+ except ValueError:
+ # Unbalanced quotes — can't tokenize. If an rm appears at all, ask.
+ return [UNPARSEABLE_RM_TARGET]
+
+ # Walk tokens; find the first `rm` and parse the flags/targets that
+ # immediately follow it within the same simple command.
+ try:
+ rm_idx = tokens.index("rm")
+ except ValueError:
return None
- rest = m.group(1).split()
- flag_chars = ""
+ rest = tokens[rm_idx + 1:]
+ has_r = False
+ has_f = False
i = 0
while i < len(rest) and rest[i].startswith("-") and rest[i] != "--":
- flag_chars += rest[i][1:]
+ tok = rest[i]
+ if tok in ("--recursive",):
+ has_r = True
+ elif tok in ("--force",):
+ has_f = True
+ elif tok.startswith("--"):
+ pass # some other long flag — ignore
+ else:
+ short = tok[1:]
+ if re.search(r"[rR]", short):
+ has_r = True
+ if "f" in short:
+ has_f = True
i += 1
- if rest[i:i+1] == ["--"]:
+ if rest[i:i + 1] == ["--"]:
i += 1
- has_r = bool(re.search(r"[rR]", flag_chars))
- has_f = "f" in flag_chars
if not (has_r and has_f):
return None
+ # Forced recursive rm confirmed. If the surrounding shell is too complex
+ # to trust target attribution, ask anyway rather than guess.
+ if complex_shell:
+ return [UNPARSEABLE_RM_TARGET]
+
return rest[i:]