hooks/destructive-bash-confirm.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242

#!/usr/bin/env python3
"""PreToolUse hook for Bash: gate destructive commands behind a modal.

Detects and asks for confirmation before:
  - git push --force / -f / --force-with-lease  (overwrites remote history)
  - git reset --hard                             (discards working-tree)
  - git clean -f                                 (deletes untracked files)
  - git branch -D                                (force-deletes branches)
  - rm -rf  (any flag combo containing both -r/-R and -f)

Each pattern emits a modal with the command, local context (current
branch, uncommitted line count, targeted paths, etc.), and a warning
banner via systemMessage. First match wins — a command with multiple
destructive patterns fires on the first detected.

Non-destructive Bash calls exit 0 silent.

Shell-parsing scope for `rm -rf` detection: `detect_rm_rf` tokenizes the
command with `shlex.split`, so it handles a single simple command and its
quoting/escaping (combined flags `-rf`, separate `-r -f`, reordered `-fr`,
quoted/spaced paths). It does NOT model pipelines, compound commands
(`;`, `&&`, `||`, `|`), command substitution (`$(...)`, backticks),
redirects, aliases, or variable/glob expansion. When any of those appear
alongside a forced recursive `rm` — or when the quoting is unbalanced —
target attribution is unreliable, so the function returns a sentinel and
the modal fires anyway (fail toward asking) rather than silently passing.
"""

import re
import shlex
import subprocess
import sys
from typing import Optional

from _common import read_payload, respond_ask


PROTECTED_BRANCHES = {"main", "master", "develop", "release", "prod", "production"}

# Returned as the sole target when the command is a forced recursive rm but
# the shell is too complex to attribute targets safely. Forces the modal.
UNPARSEABLE_RM_TARGET = "(unparsed — shell too complex to inspect safely)"

# Constructs that make single-command target attribution unreliable.
_COMPLEX_SHELL = (";", "&&", "||", "|", "$(", "`", ">", "<")


def main() -> int:
    payload = read_payload()
    if payload.get("tool_name") != "Bash":
        return 0

    cmd = payload.get("tool_input", {}).get("command", "")
    detection = detect_destructive(cmd)
    if not detection:
        return 0

    kind, context = detection
    reason = format_confirmation(kind, cmd, context)
    banner = context.pop("_banner", f"DESTRUCTIVE: {kind}")

    respond_ask(reason, system_message=banner)
    return 0


def detect_destructive(cmd: str) -> Optional[tuple[str, dict]]:
    """Return (kind, context) for the first destructive pattern matched."""

    if is_force_push(cmd):
        branch = run_git(["rev-parse", "--abbrev-ref", "HEAD"]).strip()
        ctx: dict = {"branch": branch or "(detached)"}
        if branch in PROTECTED_BRANCHES:
            ctx["_banner"] = (
                f"DESTRUCTIVE: force-push to PROTECTED branch '{branch}' — "
                f"rewrites shared history."
            )
        return "git push --force", ctx

    if re.search(r"(?:^|[\s;&|()])git\s+reset\s+(?:\S+\s+)*--hard\b", cmd):
        staged = count_lines(run_git(["diff", "--cached", "--stat"]))
        unstaged = count_lines(run_git(["diff", "--stat"]))
        return "git reset --hard", {
            "staged_files": max(staged - 1, 0),
            "unstaged_files": max(unstaged - 1, 0),
        }

    if re.search(r"(?:^|[\s;&|()])git\s+clean\s+(?:\S+\s+)*-[a-zA-Z]*f", cmd):
        untracked = run_git(["ls-files", "--others", "--exclude-standard"])
        return "git clean -f", {
            "untracked_files": len(untracked.splitlines()),
        }

    if m := re.search(r"(?:^|[\s;&|()])git\s+branch\s+(?:\S+\s+)*-D\s+(\S+)", cmd):
        target = m.group(1)
        unmerged = run_git(
            ["log", f"main..{target}", "--oneline"]
        ).strip() if target else ""
        ctx = {"branch_to_delete": target}
        if unmerged:
            ctx["unmerged_commits"] = len(unmerged.splitlines())
        return "git branch -D", ctx

    rm_targets = detect_rm_rf(cmd)
    if rm_targets is not None:
        ctx = {"targets": rm_targets or ["(none parsed)"]}
        dangerous = [
            t for t in rm_targets
            if t in ("/", "~", "$HOME", ".", "..", "*")
            or t.startswith("/")
            or t.startswith("~")
            or t.startswith("$HOME")
            or "*" in t
        ]
        if dangerous:
            ctx["_banner"] = (
                f"DESTRUCTIVE: rm -rf targeting root/home/wildcard paths: "
                f"{', '.join(dangerous)}"
            )
        return "rm -rf", ctx

    return None


def is_force_push(cmd: str) -> bool:
    """Match `git push` with any force variant."""
    if not re.search(r"(?:^|[\s;&|()])git\s+(?:\S+\s+)*push\b", cmd):
        return False
    # Look for --force / --force-with-lease / -f as a standalone flag
    # (avoid matching -f inside a longer token that isn't a flag chain)
    return bool(
        re.search(r"(?:\s|^)--force(?:-with-lease)?\b", cmd)
        or re.search(r"(?:\s|^)-[a-zA-Z]*f[a-zA-Z]*\b", cmd[cmd.find("push"):])
    )


def detect_rm_rf(cmd: str) -> Optional[list[str]]:
    """If cmd invokes `rm` with both -r/-R and -f flags, return its targets.

    Tokenizes with `shlex.split`, so quoted/spaced paths and combined
    (`-rf`), separate (`-r -f`), or reordered (`-fr`) flags all parse. The
    long forms `--recursive` and `--force` count too. Returns:

      - the target list (without flags) for a forced recursive rm,
      - None when the command is not a forced recursive rm (no -r, no -f,
        or not an `rm` at all) — no modal,
      - [UNPARSEABLE_RM_TARGET] when a forced recursive rm is present but
        the shell is too complex to attribute targets safely (compound
        command, pipeline, command substitution, redirect) or the quoting
        is unbalanced — fail toward asking.

    shlex models a single simple command and its quoting only. It does not
    model pipelines, compound commands, aliases, or variable/glob
    expansion — those fall to the [UNPARSEABLE_RM_TARGET] ask path.
    """
    # Quick reject: no `rm` word at all.
    if not re.search(r"(?:^|[\s;&|()])rm\b", cmd):
        return None

    complex_shell = any(tok in cmd for tok in _COMPLEX_SHELL)

    try:
        tokens = shlex.split(cmd)
    except ValueError:
        # Unbalanced quotes — can't tokenize. If an rm appears at all, ask.
        return [UNPARSEABLE_RM_TARGET]

    # Walk tokens; find the first `rm` and parse the flags/targets that
    # immediately follow it within the same simple command.
    try:
        rm_idx = tokens.index("rm")
    except ValueError:
        return None

    rest = tokens[rm_idx + 1:]
    has_r = False
    has_f = False
    i = 0
    while i < len(rest) and rest[i].startswith("-") and rest[i] != "--":
        tok = rest[i]
        if tok in ("--recursive",):
            has_r = True
        elif tok in ("--force",):
            has_f = True
        elif tok.startswith("--"):
            pass  # some other long flag — ignore
        else:
            short = tok[1:]
            if re.search(r"[rR]", short):
                has_r = True
            if "f" in short:
                has_f = True
        i += 1
    if rest[i:i + 1] == ["--"]:
        i += 1

    if not (has_r and has_f):
        return None

    # Forced recursive rm confirmed. If the surrounding shell is too complex
    # to trust target attribution, ask anyway rather than guess.
    if complex_shell:
        return [UNPARSEABLE_RM_TARGET]

    return rest[i:]


def run_git(args: list) -> str:
    try:
        out = subprocess.run(
            ["git"] + args,
            capture_output=True,
            text=True,
            timeout=3,
        )
        return out.stdout
    except (subprocess.SubprocessError, OSError, FileNotFoundError):
        return ""


def count_lines(text: str) -> int:
    return len([ln for ln in text.splitlines() if ln.strip()])


def format_confirmation(kind: str, cmd: str, context: dict) -> str:
    lines = [f"Run destructive command — {kind}?", ""]
    lines.append("Command:")
    lines.append(f"  {cmd}")
    lines.append("")

    if context:
        lines.append("Context:")
        for key, val in context.items():
            lines.append(f"  {key}: {val}")
        lines.append("")

    lines.append("This operation is destructive and typically irreversible.")
    lines.append("Confirm before proceeding.")
    return "\n".join(lines)


if __name__ == "__main__":
    sys.exit(main())