1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
|
#!/usr/bin/env python3
"""Capture a screenshot for visual verification (Wayland / Hyprland).
Claude can read PNG files, so this turns "does the UI look right?" into an
inspectable artifact: capture the screen, a window, or a freshly-launched app,
then read the path it prints.
Modes (pick one):
--full capture everything (all outputs) [default]
--active capture the currently focused window
--window REGEX capture the window whose class or title matches REGEX
--list list open windows (class / title / workspace) and exit
--launch CMD run CMD on a transient off-screen (headless) output,
capture it, and tear everything down — verifies an app's
UI without touching the visible workspace
Layout (with --launch):
--layout LAYOUT tiled (window fills output) | monocle (fills, gapless) |
floating (window floats, sized by --size, centered)
--size WxH tiled/monocle: headless output resolution;
floating: window size (e.g. 1600x1000)
--timeout SECONDS how long to wait for the launched window (default 10)
Common:
--output PATH PNG output path (default: timestamped file in /tmp);
the saved path is printed on stdout, parent dir created
--delay SECONDS wait before capturing (let a window settle / animate)
Requires grim and hyprctl. For window/region modes the window must be visible
on the active workspace — grim reads rendered outputs, so a hidden window
captures whatever is drawn at its region. The --launch mode sidesteps that by
rendering on a headless output that is real to the compositor but not displayed.
--launch forces the Wayland backend (DISPLAY unset, GDK/Qt steered to
wayland): an XWayland surface can race the headless-output teardown and
crash the compositor. X11-only apps (e.g. a GTK3/X11 emacs build) fail to
map under that default — pass --x11 to allow XWayland for them; teardown
then waits for the clients to unmap before removing the output, which
narrows the race but cannot provably eliminate it.
"""
import argparse
import json
import os
import re
import shutil
import subprocess
import sys
import time
from datetime import datetime
from typing import NoReturn
def die(msg) -> NoReturn:
print(f"screenshot: {msg}", file=sys.stderr)
sys.exit(1)
# --------------------------- pure helpers (unit-tested) ----------------------
def parse_size(size):
"""Parse 'WxH' into an (width, height) int tuple. Dies on malformed input."""
m = re.fullmatch(r"(\d+)x(\d+)", size or "")
if not m:
die(f"--size expects WxH (e.g. 1600x1000), got {size!r}")
return int(m.group(1)), int(m.group(2))
def geometry_str(client):
"""Build a grim -g geometry string ('x,y wxh') from a client's at/size."""
(x, y), (w, h) = client["at"], client["size"]
return f"{x},{y} {w}x{h}"
def match_windows(clients, regex):
"""Return the CLIENTS whose class or title matches REGEX (case-insensitive)."""
pat = re.compile(regex, re.IGNORECASE)
return [c for c in clients
if pat.search(c.get("class", "") or "")
or pat.search(c.get("title", "") or "")]
def launch_rule(wsid, layout):
"""Hyprland exec-rule body: place a window on WSID silently, per LAYOUT."""
rule = f"workspace {wsid} silent"
if layout == "monocle":
rule += ";fullscreen 1"
elif layout == "floating":
rule += ";float"
return rule
def center_offset(ow, oh, w, h):
"""Top-left offset to center a w*h window in an ow*oh output (clamped >=0)."""
return max(0, (ow - w) // 2), max(0, (oh - h) // 2)
def wayland_cmd(cmd):
"""Wrap CMD so the launched app maps as a native Wayland surface.
An XWayland client's configure request can race the headless-output
teardown — Hyprland's damage path then dereferences the removed monitor
and the compositor aborts (observed Hyprland 0.55.2, 2026-06-10).
Unsetting DISPLAY makes an XWayland surface impossible; the backend vars
steer GTK/Qt to Wayland explicitly. An X11-only app fails to map and the
script dies with its normal no-window error instead of crashing the
session.
"""
return f"env -u DISPLAY GDK_BACKEND=wayland QT_QPA_PLATFORM=wayland {cmd}"
def clients_on_workspace(clients, wsid):
"""Return the CLIENTS whose workspace id is WSID."""
return [c for c in clients if c.get("workspace", {}).get("id") == wsid]
# ------------------------------- I/O wrappers --------------------------------
def require_tools(*tools):
missing = [t for t in tools if shutil.which(t) is None]
if missing:
die(f"missing required tool(s): {', '.join(missing)} "
f"(this script targets Wayland/Hyprland)")
def hypr(*args):
"""Run `hyprctl -j <args>` and return parsed JSON."""
out = subprocess.run(["hyprctl", "-j", *args], capture_output=True, text=True)
if out.returncode != 0:
die(f"hyprctl {' '.join(args)} failed: {out.stderr.strip()}")
return json.loads(out.stdout)
def hypr_run(*args):
"""Fire a hyprctl dispatch/keyword command (no JSON parse)."""
subprocess.run(["hyprctl", *args], capture_output=True, text=True)
def grim(output, geometry=None, output_name=None):
cmd = ["grim"]
if geometry:
cmd += ["-g", geometry]
if output_name:
cmd += ["-o", output_name]
cmd.append(output)
res = subprocess.run(cmd, capture_output=True, text=True)
if res.returncode != 0:
die(f"grim failed: {res.stderr.strip()}")
def list_windows():
rows = sorted(hypr("clients"),
key=lambda c: (c.get("workspace", {}).get("id", 0), c.get("class", "")))
if not rows:
print("(no open windows)")
return
for c in rows:
ws = c.get("workspace", {}).get("name", "?")
print(f" ws:{ws:<8} class={c.get('class','')!r:30} title={c.get('title','')!r}")
def find_window(regex):
matches = match_windows(hypr("clients"), regex)
if not matches:
die(f"no window matches {regex!r} — run --list to see open windows")
if len(matches) > 1:
labels = "\n".join(f" class={c.get('class','')!r} title={c.get('title','')!r}"
for c in matches)
die(f"{len(matches)} windows match {regex!r}; narrow it:\n{labels}")
return matches[0]
def capture_launched(cmd, out, settle, timeout, layout, size, allow_x11=False):
"""Run CMD on a transient headless output, capture it, then tear down.
Creates a virtual (headless) Hyprland output that the compositor renders but
does not display, launches CMD onto its workspace without switching the
visible view, applies LAYOUT, captures the output, and removes everything.
LAYOUT: 'tiled'/'monocle' (window fills the output, sized by SIZE) or
'floating' (window floats at SIZE, centered). SIZE is (w, h) or None.
ALLOW_X11 skips the Wayland-backend forcing for X11-only apps.
"""
before_mons = {m["name"] for m in hypr("monitors")}
hypr_run("output", "create", "headless")
time.sleep(0.4)
new_mons = [m for m in hypr("monitors") if m["name"] not in before_mons]
if not new_mons:
die("could not create a headless output")
name = new_mons[0]["name"]
wsid = new_mons[0]["activeWorkspace"]["id"]
try:
if size and layout in ("tiled", "monocle"):
hypr_run("keyword", "monitor", f"{name},{size[0]}x{size[1]}@60,0x0,1")
time.sleep(0.2)
before_wins = {c["address"] for c in hypr("clients")}
run_cmd = cmd if allow_x11 else wayland_cmd(cmd)
hypr_run("dispatch", "exec", f"[{launch_rule(wsid, layout)}] {run_cmd}")
deadline = time.time() + timeout
new_wins = []
while time.time() < deadline:
new_wins = [c for c in hypr("clients") if c["address"] not in before_wins]
if new_wins:
break
time.sleep(0.3)
if not new_wins:
hint = "" if allow_x11 else \
" (the Wayland backend is forced by default — if the app is X11-only, retry with --x11)"
die(f"no window appeared within {timeout}s of launching: {cmd}{hint}")
# Daemon-spawned frames (emacsclient) ignore the exec rule, so apply
# placement and layout by address — robust for both spawn paths.
mon = next(m for m in hypr("monitors") if m["name"] == name)
ow, oh = mon["width"], mon["height"]
for c in new_wins:
addr = c["address"]
if c.get("workspace", {}).get("id") != wsid:
hypr_run("dispatch", "movetoworkspacesilent", f"{wsid},address:{addr}")
if layout == "floating":
hypr_run("dispatch", "setfloating", f"address:{addr}")
if size:
hypr_run("dispatch", "resizewindowpixel",
f"exact {size[0]} {size[1]},address:{addr}")
cx, cy = center_offset(ow, oh, size[0], size[1])
hypr_run("dispatch", "movewindowpixel", f"exact {cx} {cy},address:{addr}")
time.sleep(settle)
grim(out, output_name=name)
finally:
for c in clients_on_workspace(hypr("clients"), wsid):
hypr_run("dispatch", "closewindow", f"address:{c['address']}")
# Wait for the clients to actually unmap before removing the output:
# a configure request still in flight against a just-removed monitor
# crashes the compositor.
unmap_deadline = time.time() + 5
while time.time() < unmap_deadline:
if not clients_on_workspace(hypr("clients"), wsid):
break
time.sleep(0.2)
time.sleep(0.2)
hypr_run("output", "remove", name)
def resolve_output(path):
out = path or f"/tmp/claude-screenshot-{datetime.now():%Y%m%d-%H%M%S}.png"
os.makedirs(os.path.dirname(os.path.abspath(out)), exist_ok=True)
return out
def main():
p = argparse.ArgumentParser(add_help=True, description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
mode = p.add_mutually_exclusive_group()
mode.add_argument("--full", action="store_true", help="capture all outputs (default)")
mode.add_argument("--active", action="store_true", help="capture the focused window")
mode.add_argument("--window", metavar="REGEX", help="capture window matching REGEX")
mode.add_argument("--list", action="store_true", help="list windows and exit")
mode.add_argument("--launch", metavar="CMD", help="launch CMD off-screen and capture it")
p.add_argument("--layout", choices=("tiled", "monocle", "floating"), default="tiled",
help="window layout for --launch (default: tiled)")
p.add_argument("--size", help="WxH: output res (tiled/monocle) or window size (floating)")
p.add_argument("--timeout", type=float, default=10.0,
help="seconds to wait for the --launch window (default 10)")
p.add_argument("--x11", action="store_true",
help="allow the --launch app to map via XWayland (X11-only "
"apps); default forces the Wayland backend to avoid a "
"compositor race on teardown")
p.add_argument("--output", "-o", help="PNG output path")
p.add_argument("--delay", type=float, default=0.0, help="seconds to wait before capture")
args = p.parse_args()
require_tools("grim", "hyprctl")
if args.list:
list_windows()
return
size = parse_size(args.size) if args.size else None
if args.launch:
out = resolve_output(args.output)
settle = args.delay if args.delay else 1.5
capture_launched(args.launch, out, settle, args.timeout, args.layout, size,
allow_x11=args.x11)
print(out)
return
if args.delay:
time.sleep(args.delay)
geometry = None
if args.window:
geometry = geometry_str(find_window(args.window))
elif args.active:
win = hypr("activewindow")
if not win or "at" not in win:
die("no active window")
geometry = geometry_str(win)
# else: --full / default → whole screen
out = resolve_output(args.output)
grim(out, geometry)
print(out)
if __name__ == "__main__":
main()
|