diff options
| author | Craig Jennings <c@cjennings.net> | 2026-05-22 15:38:58 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-05-22 15:38:58 -0500 |
| commit | 53d33d9cdd5c6d7fd7c4dc7315b04d225add94d6 (patch) | |
| tree | a2cfe5331eed9e22cc9880c3b75e246d74e2239e /hooks/destructive-bash-confirm.py | |
| parent | efcc8e5ffdd09f538fd2d83824f4c632264ad96c (diff) | |
| download | rulesets-53d33d9cdd5c6d7fd7c4dc7315b04d225add94d6.tar.gz rulesets-53d33d9cdd5c6d7fd7c4dc7315b04d225add94d6.zip | |
feat(hooks): scan file-backed messages and harden rm parsing
Two audit gaps in the confirmation hooks, plus the test harness they were missing.
The git-commit and gh-pr-create hooks scanned for AI attribution but only saw inline messages. A commit made with -F/--file or a PR made with --body-file slipped through, since the hook stored a placeholder instead of the file's text, and the publish flow uses -F constantly. A new read_referenced_file helper in _common.py reads the referenced local file (missing, oversized, or non-UTF-8 returns None, which means "couldn't inspect" and never "clean"), so attribution scanning now sees the real committed and posted text. An unreadable file falls through to the existing ask-anyway path.
destructive-bash-confirm.py parsed rm flags by splitting on whitespace, which mangled quoted paths and missed flag variants. detect_rm_rf now tokenizes with shlex, so quoted or spaced paths and combined, separate, or reordered flags all parse. It fails toward asking (a sentinel that still fires the modal) on unbalanced quotes, or when a forced recursive rm sits alongside a pipeline, compound command, substitution, or redirect, since target attribution isn't trustworthy there. The supported and unsupported shell constructs are documented in the docstrings.
These hooks had no tests and weren't in make test. Added a pytest harness under hooks/tests (an importlib-by-path loader, since the hook filenames are hyphenated) with 54 tests across the three hooks and the shared helper, and wired hooks/tests into make test. Full suite green.
Diffstat (limited to 'hooks/destructive-bash-confirm.py')
| -rwxr-xr-x | hooks/destructive-bash-confirm.py | 86 |
1 files changed, 77 insertions, 9 deletions
diff --git a/hooks/destructive-bash-confirm.py b/hooks/destructive-bash-confirm.py index c1cf5f9..be8b491 100755 --- a/hooks/destructive-bash-confirm.py +++ b/hooks/destructive-bash-confirm.py @@ -14,9 +14,20 @@ banner via systemMessage. First match wins — a command with multiple destructive patterns fires on the first detected. Non-destructive Bash calls exit 0 silent. + +Shell-parsing scope for `rm -rf` detection: `detect_rm_rf` tokenizes the +command with `shlex.split`, so it handles a single simple command and its +quoting/escaping (combined flags `-rf`, separate `-r -f`, reordered `-fr`, +quoted/spaced paths). It does NOT model pipelines, compound commands +(`;`, `&&`, `||`, `|`), command substitution (`$(...)`, backticks), +redirects, aliases, or variable/glob expansion. When any of those appear +alongside a forced recursive `rm` — or when the quoting is unbalanced — +target attribution is unreliable, so the function returns a sentinel and +the modal fires anyway (fail toward asking) rather than silently passing. """ import re +import shlex import subprocess import sys from typing import Optional @@ -26,6 +37,13 @@ from _common import read_payload, respond_ask PROTECTED_BRANCHES = {"main", "master", "develop", "release", "prod", "production"} +# Returned as the sole target when the command is a forced recursive rm but +# the shell is too complex to attribute targets safely. Forces the modal. +UNPARSEABLE_RM_TARGET = "(unparsed — shell too complex to inspect safely)" + +# Constructs that make single-command target attribution unreliable. +_COMPLEX_SHELL = (";", "&&", "||", "|", "$(", "`", ">", "<") + def main() -> int: payload = read_payload() @@ -90,6 +108,8 @@ def detect_destructive(cmd: str) -> Optional[tuple[str, dict]]: if t in ("/", "~", "$HOME", ".", "..", "*") or t.startswith("/") or t.startswith("~") + or t.startswith("$HOME") + or "*" in t ] if dangerous: ctx["_banner"] = ( @@ -114,25 +134,73 @@ def is_force_push(cmd: str) -> bool: def detect_rm_rf(cmd: str) -> Optional[list[str]]: - """If cmd invokes `rm` with both -r/-R and -f flags, return its targets.""" - m = re.search(r"(?:^|[\s;&|()])rm\s+(.+)$", cmd) - if not m: + """If cmd invokes `rm` with both -r/-R and -f flags, return its targets. + + Tokenizes with `shlex.split`, so quoted/spaced paths and combined + (`-rf`), separate (`-r -f`), or reordered (`-fr`) flags all parse. The + long forms `--recursive` and `--force` count too. Returns: + + - the target list (without flags) for a forced recursive rm, + - None when the command is not a forced recursive rm (no -r, no -f, + or not an `rm` at all) — no modal, + - [UNPARSEABLE_RM_TARGET] when a forced recursive rm is present but + the shell is too complex to attribute targets safely (compound + command, pipeline, command substitution, redirect) or the quoting + is unbalanced — fail toward asking. + + shlex models a single simple command and its quoting only. It does not + model pipelines, compound commands, aliases, or variable/glob + expansion — those fall to the [UNPARSEABLE_RM_TARGET] ask path. + """ + # Quick reject: no `rm` word at all. + if not re.search(r"(?:^|[\s;&|()])rm\b", cmd): + return None + + complex_shell = any(tok in cmd for tok in _COMPLEX_SHELL) + + try: + tokens = shlex.split(cmd) + except ValueError: + # Unbalanced quotes — can't tokenize. If an rm appears at all, ask. + return [UNPARSEABLE_RM_TARGET] + + # Walk tokens; find the first `rm` and parse the flags/targets that + # immediately follow it within the same simple command. + try: + rm_idx = tokens.index("rm") + except ValueError: return None - rest = m.group(1).split() - flag_chars = "" + rest = tokens[rm_idx + 1:] + has_r = False + has_f = False i = 0 while i < len(rest) and rest[i].startswith("-") and rest[i] != "--": - flag_chars += rest[i][1:] + tok = rest[i] + if tok in ("--recursive",): + has_r = True + elif tok in ("--force",): + has_f = True + elif tok.startswith("--"): + pass # some other long flag — ignore + else: + short = tok[1:] + if re.search(r"[rR]", short): + has_r = True + if "f" in short: + has_f = True i += 1 - if rest[i:i+1] == ["--"]: + if rest[i:i + 1] == ["--"]: i += 1 - has_r = bool(re.search(r"[rR]", flag_chars)) - has_f = "f" in flag_chars if not (has_r and has_f): return None + # Forced recursive rm confirmed. If the surrounding shell is too complex + # to trust target attribution, ask anyway rather than guess. + if complex_shell: + return [UNPARSEABLE_RM_TARGET] + return rest[i:] |
