3 files changed, 67 insertions, 15 deletions
diff --git a/.claude/hooks/validate-el.sh b/.claude/hooks/validate-el.sh
index db1770b..d6999ac 100755
--- a/.claude/hooks/validate-el.sh
+++ b/.claude/hooks/validate-el.sh
@@ -15,7 +15,11 @@ set -u
 # Emit a JSON failure payload and exit 2. Arguments:
 #   $1 — short failure type (e.g. "PAREN CHECK FAILED")
 #   $2 — file path
-#   $3 — emacs output (error body)
+#   $3 — emacs output (error body), always sent to Claude in additionalContext
+#   $4 — optional compact terminal echo; when set, the terminal shows this
+#        instead of the full $3 (Claude still gets the full $3). Used by the
+#        test runner so a failing suite prints a short summary to the pane
+#        rather than dumping every ERT backtrace.
 fail_json() {
     local ctx
     ctx="$(printf '%s: %s\n\n%s\n\nFix before proceeding.' "$1" "$2" "$3" \
@@ -23,7 +27,7 @@ fail_json() {
     cat <<EOF
 {"hookSpecificOutput": {"hookEventName": "PostToolUse", "additionalContext": $ctx}}
 EOF
-    printf '%s: %s\n%s\n' "$1" "$2" "$3" >&2
+    printf '%s: %s\n%s\n' "$1" "$2" "${4:-$3}" >&2
     exit 2
 }
 
@@ -35,13 +39,6 @@ f="$(jq -r '.tool_input.file_path // .tool_response.filePath // empty')"
 [ -z "$f" ] && exit 0
 [ "${f##*.}" = "el" ] || exit 0
 
-# Skip files outside the project — the hook's -L paths only cover this repo,
-# so byte-compiling another project's .el will fail on its own requires.
-case "$f" in
-  "$PROJECT_ROOT"/*) ;;
-  *) exit 0 ;;
-esac
-
 MAX_AUTO_TEST_FILES=20  # skip if more matches than this (large test suites)
 
 # --- Phase 1: syntax + byte-compile ---
@@ -58,6 +55,7 @@ case "$f" in
                      -L "$PROJECT_ROOT" \
                      -L "$PROJECT_ROOT/modules" \
                      -L "$PROJECT_ROOT/tests" \
+                     -L "$PROJECT_ROOT/themes" \
                      --eval '(package-initialize)' \
                      "$f" \
                      --eval '(check-parens)' \
@@ -95,17 +93,21 @@ count="${#tests[@]}"
 if [ "$count" -ge 1 ] && [ "$count" -le "$MAX_AUTO_TEST_FILES" ]; then
   load_args=()
   for t in "${tests[@]}"; do load_args+=("-l" "$t"); done
-  # Run from tests/ so each file's `(require 'test-bootstrap (expand-file-name
-  # "test-bootstrap.el"))` resolves against the directory the bootstrap lives in,
-  # not the project root.
-  if ! output="$(cd "$PROJECT_ROOT/tests" && emacs --batch --no-site-file --no-site-lisp \
+  if ! output="$(emacs --batch --no-site-file --no-site-lisp \
                    -L "$PROJECT_ROOT" \
                    -L "$PROJECT_ROOT/modules" \
                    -L "$PROJECT_ROOT/tests" \
+                   -L "$PROJECT_ROOT/themes" \
                    --eval '(package-initialize)' \
                    -l ert "${load_args[@]}" \
                    --eval "(ert-run-tests-batch-and-exit '(not (tag :slow)))" 2>&1)"; then
-    fail_json "TESTS FAILED ($count test file(s))" "$f" "$output"
+    # Terminal gets a compact summary (the run tally + the failing test names);
+    # Claude still gets the full backtrace via additionalContext.  Keeps the
+    # pane from drowning in ERT stack frames on every red test.
+    summary="$(printf '%s\n' "$output" \
+        | grep -E '^Ran [0-9]+ tests|unexpected results:|^[[:space:]]+FAILED' || true)"
+    [ -n "$summary" ] && summary="${summary}"$'\n'"(full backtrace in Claude's context)"
+    fail_json "TESTS FAILED ($count test file(s))" "$f" "$output" "$summary"
   fi
 fi
 
diff --git a/.claude/rules/elisp-testing.md b/.claude/rules/elisp-testing.md
index b5def78..7c3a9ef 100644
--- a/.claude/rules/elisp-testing.md
+++ b/.claude/rules/elisp-testing.md
@@ -37,6 +37,12 @@ Every non-trivial function needs at least:
 
 Missing a category is a test gap. If three cases look near-identical, parametrize with a loop or `dolist` rather than copy-pasting.
 
+### Measuring it — `make coverage-summary`
+
+The bundle ships a coverage summary at `.claude/scripts/coverage-summary.el` and a Makefile fragment (`coverage-makefile.txt`) with `coverage` and `coverage-summary` targets. After `make coverage` writes an undercover SimpleCov report, `make coverage-summary` prints a per-file table and a unit-weighted project number.
+
+The number to watch is the missing-file count. A module no test loads never appears in the SimpleCov report, so a line-weighted total skips it silently — the suite looks healthier than it is. The summary counts every `modules/*.el` on disk that's absent from the report as 0%, so an untested module drags the project number down where you can see it. Copy the fragment's targets into your own Makefile to adopt it; the bundle never edits your Makefile.
+
 ## TDD Workflow
 
 Write the failing test first. A failing test proves you understand the change. Assume the bug is in production code until the test proves otherwise — never fix the test before proving the test is wrong.
@@ -99,6 +105,50 @@ make test-name TEST=pattern             # Match by test name pattern
 
 A PostToolUse hook runs matching tests automatically after edits to a module, when the match count is small enough to be fast.
 
+## Batch-Mode Reproducibility
+
+Tests must pass under `emacs --batch` — the headless, scriptable path that CI and the `make` targets use. `--batch` is the source of truth, not an interactive session.
+
+- Don't depend on interactive-session state: window configuration, frame parameters, `this-command`, minibuffer activity, or anything a running editor accumulates. A test that passes in a live Emacs but fails (or hangs) under `--batch` is broken.
+- Don't block on a prompt. `--batch` has no one to answer `y-or-n-p` or `read-string`, so an unmocked prompt either errors or stalls the run. Test the internal directly (see *Interactive vs Internal* above) or `cl-letf` the prompt.
+- Keep tests deterministic: no reliance on test execution order, wall-clock time (mock `current-time`), or environment that differs between the developer's machine and CI.
+
+## Isolating Emacs State
+
+A test must not read or mutate the developer's real Emacs config. Bind a throwaway environment so the run is hermetic regardless of who runs it.
+
+- Bind `user-emacs-directory` (and, when relevant, `user-init-file`) to a temp directory so package state, `custom-file` writes, caches, and auto-save files land in the sandbox rather than the developer's `~/.emacs.d`.
+- Control `load-path` explicitly. Add only the project's own directories; don't lean on whatever happens to be installed in the developer's session.
+- Depend only on the project's declared dependencies. A test that passes because some unrelated package is installed on this machine will fail on a clean checkout or in CI.
+
+```elisp
+(ert-deftest test-foo-writes-to-sandbox ()
+  "Normal: writes under an isolated user-emacs-directory."
+  (let* ((sandbox (make-temp-file "elisp-test-" t))
+         (user-emacs-directory (file-name-as-directory sandbox)))
+    (unwind-protect
+        (progn
+          (cj/--foo)
+          (should (file-exists-p (expand-file-name "foo.cache" user-emacs-directory))))
+      (delete-directory sandbox t))))
+```
+
+## Byte-Compile and Native-Comp Warnings
+
+A clean compile is part of green. Byte-compile warnings (free variables, wrong argument counts, unused lexical bindings, obsolete-function calls) flag real defects, so treat them as failures rather than noise.
+
+This can be enforced in the test run by binding `byte-compile-error-on-warn` to `t` and compiling the modules under test, optionally extending to native compilation where `native-comp-async-report-warnings-errors` is available.
+
+Keep the native-comp half conditional. Native compilation exists only on builds with the `native-compile` feature (Emacs 28+ compiled with it); older or non-native builds lack `native-comp-*` variables and `native-compile` entirely. Gate on the feature so the suite still runs everywhere:
+
+```elisp
+(when (and (fboundp 'native-comp-available-p) (native-comp-available-p))
+  ;; native-comp-specific checks here
+  )
+```
+
+Make the warnings-as-errors gate opt-in or version-aware rather than absolute — a warning that's clean on the project's pinned Emacs may differ across versions, and a hard failure on every build penalizes contributors on a different Emacs than the maintainer's.
+
 ## Anti-Patterns
 
 - Hardcoded timestamps — generate relative to `current-time` or mock
diff --git a/.claude/rules/elisp.md b/.claude/rules/elisp.md
index e641058..ea9bdc2 100644
--- a/.claude/rules/elisp.md
+++ b/.claude/rules/elisp.md
@@ -72,4 +72,4 @@ Then `(require 'foo-config)` in `init.el` (or a config aggregator).
 
 - A PostToolUse hook runs `check-parens` and `byte-compile-file` on every `.el` save
 - If it blocks, read the error — don't retry blindly
-- Prefer Write over repeated Edits for nontrivial new code; incremental edits accumulate subtle paren mismatches
+- Edit cohesively, then verify parens/byte-compile right away. For nontrivial Elisp, land a function as one complete, coherent change rather than dribbling it in over many tiny partial edits — incremental fragments accumulate subtle paren mismatches. Run the paren-balance and byte-compile checks immediately after editing, whatever editing mechanism the environment uses.