6 files changed, 672 insertions, 0 deletions
diff --git a/.claude/hooks/validate-el.sh b/.claude/hooks/validate-el.sh
new file mode 100755
index 0000000..0c3a46c
--- /dev/null
+++ b/.claude/hooks/validate-el.sh
@@ -0,0 +1,105 @@
+#!/usr/bin/env bash
+# Validate and test .el files after Edit/Write/MultiEdit.
+# PostToolUse hook: receives tool-call JSON on stdin.
+#
+# On success: exit 0 silent.
+# On failure: emit JSON with hookSpecificOutput.additionalContext so Claude
+# sees a structured error in its context, THEN exit 2 to block the tool
+# pipeline. stderr still echoes the error for terminal visibility.
+#
+# Phase 1: check-parens + byte-compile
+# Phase 2: for non-test .el files, run matching tests/test-<stem>*.el
+
+set -u
+
+# Emit a JSON failure payload and exit 2. Arguments:
+#   $1 — short failure type (e.g. "PAREN CHECK FAILED")
+#   $2 — file path
+#   $3 — emacs output (error body)
+fail_json() {
+    local ctx
+    ctx="$(printf '%s: %s\n\n%s\n\nFix before proceeding.' "$1" "$2" "$3" \
+        | jq -Rs .)"
+    cat <<EOF
+{"hookSpecificOutput": {"hookEventName": "PostToolUse", "additionalContext": $ctx}}
+EOF
+    printf '%s: %s\n%s\n' "$1" "$2" "$3" >&2
+    exit 2
+}
+
+# Portable project root: prefer Claude Code's env var, fall back to deriving
+# from this script's location ($project/.claude/hooks/validate-el.sh).
+PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(cd "$(dirname "$0")/../.." && pwd)}"
+
+f="$(jq -r '.tool_input.file_path // .tool_response.filePath // empty')"
+[ -z "$f" ] && exit 0
+[ "${f##*.}" = "el" ] || exit 0
+
+MAX_AUTO_TEST_FILES=20  # skip if more matches than this (large test suites)
+
+# --- Phase 1: syntax + byte-compile ---
+case "$f" in
+  */init.el|*/early-init.el)
+    # Byte-compile here would load the full package graph. Parens only.
+    if ! output="$(emacs --batch --no-site-file --no-site-lisp "$f" \
+                     --eval '(check-parens)' 2>&1)"; then
+      fail_json "PAREN CHECK FAILED" "$f" "$output"
+    fi
+    ;;
+  *.el)
+    if ! output="$(emacs --batch --no-site-file --no-site-lisp \
+                     -L "$PROJECT_ROOT" \
+                     -L "$PROJECT_ROOT/modules" \
+                     -L "$PROJECT_ROOT/tests" \
+                     --eval '(package-initialize)' \
+                     "$f" \
+                     --eval '(check-parens)' \
+                     --eval "(or (byte-compile-file \"$f\") (kill-emacs 1))" 2>&1)"; then
+      fail_json "VALIDATION FAILED" "$f" "$output"
+    fi
+    ;;
+esac
+
+# --- Phase 2: test runner ---
+# Determine which tests (if any) apply to this edit. Works for projects with
+# source at root, in modules/, or elsewhere — stem-based test lookup is the
+# common pattern.
+tests=()
+case "$f" in
+  */init.el|*/early-init.el)
+    : # Phase 1 handled it; skip test runner
+    ;;
+  "$PROJECT_ROOT/tests/testutil-"*.el)
+    stem="$(basename "${f%.el}")"
+    stem="${stem#testutil-}"
+    mapfile -t tests < <(find "$PROJECT_ROOT/tests" -maxdepth 1 -name "test-${stem}*.el" 2>/dev/null | sort)
+    ;;
+  "$PROJECT_ROOT/tests/test-"*.el)
+    tests=("$f")
+    ;;
+  *.el)
+    # Any other .el under the project — find matching tests by stem
+    stem="$(basename "${f%.el}")"
+    mapfile -t tests < <(find "$PROJECT_ROOT/tests" -maxdepth 1 -name "test-${stem}*.el" 2>/dev/null | sort)
+    ;;
+esac
+
+count="${#tests[@]}"
+if [ "$count" -ge 1 ] && [ "$count" -le "$MAX_AUTO_TEST_FILES" ]; then
+  load_args=()
+  for t in "${tests[@]}"; do load_args+=("-l" "$t"); done
+  # Run from tests/ so each file's `(require 'test-bootstrap (expand-file-name
+  # "test-bootstrap.el"))` resolves against the directory the bootstrap lives in,
+  # not the project root.
+  if ! output="$(cd "$PROJECT_ROOT/tests" && emacs --batch --no-site-file --no-site-lisp \
+                   -L "$PROJECT_ROOT" \
+                   -L "$PROJECT_ROOT/modules" \
+                   -L "$PROJECT_ROOT/tests" \
+                   --eval '(package-initialize)' \
+                   -l ert "${load_args[@]}" \
+                   --eval "(ert-run-tests-batch-and-exit '(not (tag :slow)))" 2>&1)"; then
+    fail_json "TESTS FAILED ($count test file(s))" "$f" "$output"
+  fi
+fi
+
+exit 0
diff --git a/.claude/rules/commits.md b/.claude/rules/commits.md
new file mode 100644
index 0000000..301c6ff
--- /dev/null
+++ b/.claude/rules/commits.md
@@ -0,0 +1,66 @@
+# Commit Rules
+
+Applies to: `**/*`
+
+## Author Identity
+
+All commits are authored as the user (repo owner / maintainer), never as
+Claude, Claude Code, Anthropic, or any AI tool. Git uses the configured
+`user.name` and `user.email` — do not modify git config to attribute
+otherwise.
+
+## No AI Attribution — Anywhere
+
+Absolutely no AI/LLM/Claude/Anthropic attribution in:
+
+- Commit messages (subject or body)
+- PR descriptions and titles
+- Issue comments and reviews
+- Code comments
+- Commit trailers
+- Release notes, changelogs, and any public-facing artifact
+
+This means:
+
+- **No** `Co-Authored-By: Claude …` (or Claude Code, or any AI) trailers
+- **No** "Generated with Claude Code" footers or equivalents
+- **No** 🤖 emojis or similar markers implying AI authorship
+- **No** references to "Claude", "Anthropic", "LLM", "AI tool" as a credited contributor
+- **No** attribution added via template defaults — strip them before committing
+
+If a tool, template, or default config inserts attribution, remove it. If
+settings.json needs it, set `attribution.commit: ""` and `attribution.pr: ""`
+to suppress the defaults.
+
+## Commit Message Format
+
+Conventional prefixes:
+
+- `feat:` — new feature
+- `fix:` — bug fix
+- `refactor:` — code restructuring, no behavior change
+- `test:` — adding or updating tests
+- `docs:` — documentation only
+- `chore:` — build, tooling, meta
+
+Subject line ≤72 characters. Body explains the *why* when not obvious.
+Skip the body entirely when the subject line is self-explanatory.
+
+## Before Committing
+
+1. Check author identity: `git log -1 --format='%an <%ae>'` — should be the user.
+2. Scan the message for AI-attribution language (including emojis and footers).
+3. Review the diff — only intended changes staged; no unrelated files.
+4. Run tests and linters (see `verification.md`).
+
+## If You Catch Yourself
+
+Typing any of the following — stop, delete, rewrite:
+
+- `Co-Authored-By: Claude`
+- `🤖 Generated with …`
+- "Created with Claude Code"
+- "Assisted by AI"
+
+Rewrite the commit as the user would write it: concise, focused on the
+change, no mention of how the change was produced.
diff --git a/.claude/rules/elisp-testing.md b/.claude/rules/elisp-testing.md
new file mode 100644
index 0000000..b5def78
--- /dev/null
+++ b/.claude/rules/elisp-testing.md
@@ -0,0 +1,107 @@
+# Elisp Testing Rules
+
+Applies to: `**/tests/*.el`
+
+Implements the core principles from `testing.md`. All rules there apply here —
+this file covers Elisp-specific patterns.
+
+## Framework: ERT
+
+Use `ert-deftest` for all tests. One test = one scenario.
+
+## File Layout
+
+- `tests/test-<module>.el` — tests for `<module>.el`
+- `tests/test-<module>--<helper>.el` — tests for a specific private helper (matches `<module>--<helper>` function naming)
+- `tests/testutil-<module>.el` — fixtures and mocks scoped to one module
+- `tests/testutil-*.el` — cross-module helpers (shared fixtures, generic mocks, filesystem helpers); name them for what they help with
+
+Tests must `(require 'module-name)` before the testutil file that stubs its internals, unless documented otherwise. Order matters — a testutil that defines a stub can be shadowed by a later `require` of the real module.
+
+## Test Naming
+
+```elisp
+(ert-deftest test-<module>-<function>-<scenario> ()
+  "Normal/Boundary/Error: brief description."
+  ...)
+```
+
+Put the category (Normal, Boundary, Error) in the docstring so the category is grep-able.
+
+## Required Coverage
+
+Every non-trivial function needs at least:
+- One **Normal** case (happy path)
+- One **Boundary** case (empty, nil, min, max, unicode, long string)
+- One **Error** case (invalid input, missing resource, failure mode)
+
+Missing a category is a test gap. If three cases look near-identical, parametrize with a loop or `dolist` rather than copy-pasting.
+
+## TDD Workflow
+
+Write the failing test first. A failing test proves you understand the change. Assume the bug is in production code until the test proves otherwise — never fix the test before proving the test is wrong.
+
+For untested code, write a **characterization test** that captures current behavior before you change anything. It becomes the safety net for the refactor.
+
+## Interactive vs Internal — Split for Testability
+
+When a function mixes business logic with user interaction, split it:
+
+- **Internal** (`cj/--foo`) — pure logic. All parameters explicit. No prompts,
+  no UI. Deterministic and trivially testable.
+- **Interactive wrapper** (`cj/foo`) — thin layer that reads user input and
+  delegates to the internal.
+
+```elisp
+(defun cj/--move-buffer-and-file (dir &optional ok-if-exists)
+  "Move the current buffer's file into DIR. Overwrite if OK-IF-EXISTS."
+  ...)
+
+(defun cj/move-buffer-and-file ()
+  "Interactive wrapper: prompt for DIR, delegate."
+  (interactive)
+  (let ((dir (read-directory-name "Move to: ")))
+    (cj/--move-buffer-and-file dir)))
+```
+
+Test the internal directly with parameter values — no `cl-letf` on
+`read-directory-name`, `yes-or-no-p`, etc. The wrapper gets a smoke test or
+nothing — Emacs already tests its own prompts. The internal also becomes
+reusable by other Elisp code without triggering UI.
+
+## Mocking
+
+Mock at boundaries:
+- Shell: `cl-letf` on `shell-command`, `shell-command-to-string`, `call-process`
+- File I/O when tests shouldn't touch disk
+- Network: URL retrievers, HTTP clients
+- Time: `cl-letf` on `current-time`, `format-time-string`
+
+Never mock:
+- The code under test
+- Core Emacs primitives (buffer ops, string ops, lists)
+- Your own domain logic — restructure it to be testable instead
+
+## Idioms
+
+- `cl-letf` for scoped overrides (self-cleaning)
+- `with-temp-buffer` for buffer manipulation tests
+- `make-temp-file` with `.el` suffix for on-disk fixtures
+- Tests must run in any order; no shared mutable state
+
+## Running Tests
+
+```bash
+make test                               # All
+make test-file FILE=tests/test-foo.el   # One file
+make test-name TEST=pattern             # Match by test name pattern
+```
+
+A PostToolUse hook runs matching tests automatically after edits to a module, when the match count is small enough to be fast.
+
+## Anti-Patterns
+
+- Hardcoded timestamps — generate relative to `current-time` or mock
+- Testing implementation details (private storage structure) instead of behavior
+- Mocking the thing you're testing
+- Skipping a failing test without an issue to track it
diff --git a/.claude/rules/elisp.md b/.claude/rules/elisp.md
new file mode 100644
index 0000000..e641058
--- /dev/null
+++ b/.claude/rules/elisp.md
@@ -0,0 +1,75 @@
+# Elisp / Emacs Rules
+
+Applies to: `**/*.el`
+
+## Style
+
+- 2-space indent, no tabs
+- Hyphen-case for identifiers: `cj/do-thing`, not `cj/doThing`
+- Naming prefixes:
+  - `cj/name` — user-facing functions and commands (bound to keys, called from init)
+  - `cj/--name` — private helpers (double-dash signals "internal")
+  - `<module>/name` — module-scoped where appropriate (e.g., `calendar-sync/parse-ics`)
+- File header: `;;; foo-config.el --- brief description -*- lexical-binding: t -*-`
+- `(provide 'foo-config)` at the bottom of every module
+- `lexical-binding: t` is mandatory — no file without it
+
+## Function Design
+
+- Keep functions under 15 lines where possible
+- One responsibility per function
+- Extract helpers instead of nesting deeply — 5+ levels of nesting is a refactor signal
+- Prefer named helpers over lambdas for anything nontrivial
+- No premature abstraction — three similar lines beats a clever macro
+
+Small functions are the single strongest defense against paren errors. Deeply nested code is where AI and humans both fail.
+
+## Requires and Loading
+
+- Every `(require 'foo)` must correspond to a loadable file on the load-path
+- Byte-compile warnings about free variables usually indicate a missing `require` or a typo in a symbol name — read them
+- Use `use-package` for external (MELPA/ELPA) packages
+- Use plain `(require 'foo-config)` for internal modules
+- For optional features, `(when (require 'foo nil t) ...)` degrades gracefully if absent
+
+## Lexical-Binding Traps
+
+- `(boundp 'x)` where `x` is a lexical variable always returns nil. Bind with `defvar` at top level if you need `boundp` to work, or use the value directly.
+- `setq` on an undeclared free variable is a warning — use `let` for locals or `defvar` for module-level state
+- Closures capture by reference. Avoid capturing mutating loop variables in nested defuns.
+
+## Regex Gotchas
+
+- `\s` is NOT whitespace in Emacs regex. Use `[ \t]` or `\\s-` (syntax class).
+- `^` in `string-match` matches after `\n` OR at position 0 — use `(= (match-beginning 0) start)` for positional checks when that matters.
+- `replace-regexp-in-string` interprets backslashes in the replacement. Pass `t t` (FIXEDCASE LITERAL) when the replacement contains literal backslashes.
+
+## Keybindings
+
+- `keymap-global-set` for global; `keymap-set KEYMAP ...` for mode-local
+- Group module-specific bindings inside the module's file
+- Autoload cookies (`;;;###autoload`) don't activate through plain `(require ...)` — use the form directly, not an autoloaded wrapper
+
+## Module Template
+
+```elisp
+;;; foo-config.el --- Foo feature configuration -*- lexical-binding: t -*-
+
+;;; Commentary:
+;; One-line description.
+
+;;; Code:
+
+;; ... code ...
+
+(provide 'foo-config)
+;;; foo-config.el ends here
+```
+
+Then `(require 'foo-config)` in `init.el` (or a config aggregator).
+
+## Editing Workflow
+
+- A PostToolUse hook runs `check-parens` and `byte-compile-file` on every `.el` save
+- If it blocks, read the error — don't retry blindly
+- Prefer Write over repeated Edits for nontrivial new code; incremental edits accumulate subtle paren mismatches
diff --git a/.claude/rules/testing.md b/.claude/rules/testing.md
new file mode 100644
index 0000000..b91b76c
--- /dev/null
+++ b/.claude/rules/testing.md
@@ -0,0 +1,277 @@
+# Testing Standards
+
+Applies to: `**/*`
+
+Core TDD discipline and test quality rules. Language-specific patterns
+(frameworks, fixture idioms, mocking tools) live in per-language testing files
+under `languages/<lang>/claude/rules/`.
+
+## Test-Driven Development (Default)
+
+TDD is the default workflow for all code, including demos and prototypes. **Write tests first, before any implementation code.** Tests are how you prove you understand the problem — if you can't write a failing test, you don't yet understand what needs to change.
+
+1. **Red**: Write a failing test that defines the desired behavior
+2. **Green**: Write the minimal code to make the test pass
+3. **Refactor**: Clean up while keeping tests green
+
+Do not skip TDD for demo code. Demos build muscle memory — the habit carries into production.
+
+### Understand Before You Test
+
+Before writing tests, invest time in understanding the code:
+
+1. **Explore the codebase** — Read the module under test, its callers, and its dependencies. Understand the data flow end to end.
+2. **Identify the root cause** — If fixing a bug, trace the problem to its origin. Don't test (or fix) surface symptoms when the real issue is deeper in the call chain.
+3. **Reason through edge cases** — Consider boundary conditions, error states, concurrent access, and interactions with adjacent modules. Your tests should cover what could actually go wrong, not just the obvious happy path.
+
+### Adding Tests to Existing Untested Code
+
+When working in a codebase without tests:
+
+1. Write a **characterization test** that captures current behavior before making changes
+2. Use the characterization test as a safety net while refactoring
+3. Then follow normal TDD for the new change
+
+## Test Categories (Required for All Code)
+
+Every unit under test requires coverage across three categories:
+
+### 1. Normal Cases (Happy Path)
+- Standard inputs and expected use cases
+- Common workflows and default configurations
+- Typical data volumes
+
+### 2. Boundary Cases
+- Minimum/maximum values (0, 1, -1, MAX_INT)
+- Empty vs null vs undefined (language-appropriate)
+- Single-element collections
+- Unicode and internationalization (emoji, RTL text, combining characters)
+- Very long strings, deeply nested structures
+- Timezone boundaries (midnight, DST transitions)
+- Date edge cases (leap years, month boundaries)
+
+### 3. Error Cases
+- Invalid inputs and type mismatches
+- Network failures and timeouts
+- Missing required parameters
+- Permission denied scenarios
+- Resource exhaustion
+- Malformed data
+
+## Combinatorial Coverage
+
+For functions with 3+ parameters that each take multiple values (feature-flag
+combinations, config matrices, permission/role interactions, multi-field
+form validation, API parameter spaces), the exhaustive test count explodes
+(M^N) while 3-5 ad-hoc cases miss pair interactions. Use **pairwise /
+combinatorial testing** — generate a minimal matrix that hits every 2-way
+combination of parameter values. Empirically catches 60-90% of combinatorial
+bugs with 80-99% fewer tests.
+
+Invoke `/pairwise-tests` on the offending function; continue using `/add-tests`
+and the Normal/Boundary/Error discipline for the rest. The two approaches
+complement: pairwise covers parameter *interactions*; category discipline
+covers each parameter's individual edge space.
+
+Skip pairwise when: the function has 1-2 parameters (just write the cases),
+the context requires *provably* exhaustive coverage (regulated systems — document
+in an ADR), or the testing target is non-parametric (single happy path,
+performance regression, a specific error).
+
+## Test Organization
+
+Typical layout:
+
+```
+tests/
+  unit/           # One test file per source file
+  integration/    # Multi-component workflows
+  e2e/            # Full system tests
+```
+
+Per-language files may adjust this (e.g. Elisp collates ERT tests into
+`tests/test-<module>*.el` without subdirectories).
+
+### Testing Pyramid
+
+Rough proportions for most projects:
+- Unit tests: 70-80% (fast, isolated, granular)
+- Integration tests: 15-25% (component interactions, real dependencies)
+- E2E tests: 5-10% (full system, slowest)
+
+Don't duplicate coverage: if unit tests fully exercise a function's logic,
+integration tests should focus on *how* components interact — not repeat the
+function's case coverage.
+
+## Integration Tests
+
+Integration tests exercise multiple components together. Two rules:
+
+**The docstring names every component integrated** and marks which are real vs
+mocked. Integration failures are harder to pinpoint than unit failures;
+enumerating the participants up front tells you where to start looking.
+
+Example:
+
+```
+def test_integration_refund_during_sync_updates_ledger_atomically():
+    """Refund processed mid-sync updates order and ledger in one transaction.
+
+    Components integrated:
+    - OrderService.refund (entry point)
+    - PaymentGateway.reverse (MOCKED — returns success)
+    - Ledger.credit (real)
+    - db.transaction (real)
+
+    Validates:
+    - Refund rolls back if ledger write fails
+    - Both tables updated or neither
+    """
+```
+
+**Write an integration test when** multiple components must work together,
+state crosses function boundaries, or edge cases combine. **Don't** when
+single-function behavior suffices, or when mocking would erase the interaction
+you meant to test.
+
+## Naming Convention
+
+- Unit: `test_<module>_<function>_<scenario>_<expected>`
+- Integration: `test_integration_<workflow>_<scenario>_<outcome>`
+
+Examples:
+- `test_cart_apply_discount_expired_coupon_raises_error`
+- `test_integration_order_sync_network_timeout_retries_three_times`
+
+Languages that prefer camelCase, kebab-case, or other conventions keep the
+structure but use their idiom. Consistency within a project matters more than
+the specific case choice.
+
+## Test Quality
+
+### Independence
+- No shared mutable state between tests
+- Each test runs successfully in isolation
+- Explicit setup and teardown
+
+### Determinism
+- Never hardcode dates or times — generate them relative to `now()`
+- No reliance on test execution order
+- No flaky network calls in unit tests
+
+### Performance
+- Unit tests: <100ms each
+- Integration tests: <1s each
+- E2E tests: <10s each
+- Mark slow tests with appropriate decorators/tags
+
+### Mocking Boundaries
+Mock external dependencies at the system boundary:
+- Network calls (HTTP, gRPC, WebSocket)
+- File I/O and cloud storage
+- Time and dates
+- Third-party service clients
+
+Never mock:
+- The code under test
+- Internal domain logic
+- Framework behavior (ORM queries, middleware, hooks, buffer primitives)
+
+### Signs of Overmocking
+
+Ask yourself:
+
+- Would this test still pass if I replaced the function body with `raise NotImplementedError` (or equivalent)? If yes, the mocks are doing the work — you're testing mocks, not code.
+- Is the mock more complex than the function being tested? Smell.
+- Am I mocking internal string / parsing / decoding helpers? Those aren't boundaries — they're the work.
+- Does the test break when I refactor without changing behavior? Good tests survive refactors; overmocked ones couple to implementation.
+
+When tests demand heavy internal mocking, the fix isn't better mocks — it's
+restructuring the code (see *If Tests Are Hard to Write* below).
+
+### Testing Code That Uses Frameworks
+
+When a function mostly delegates to framework or library code, test *your*
+integration logic:
+- ✓ "I call the library with the right arguments in the right context"
+- ✓ "I handle its return value correctly"
+- ✗ "The library works in 50 scenarios" — trust it; it has its own tests
+
+For polyglot behavior (e.g., comment handling across C/Java/Go/JS), test 2-3
+representative modes thoroughly plus a minimal smoke test in the others.
+Exhaustive permutations are diminishing returns.
+
+### Test Real Code, Not Copies
+
+Never inline or copy production code into test files. Always `require`/`import`
+the module under test. Copied code passes even when production breaks — the
+bug hides behind the duplicate.
+
+Mock dependencies at their boundary; exercise the real function body.
+
+### Error Behavior, Not Error Text
+
+Test that errors occur with the right type; don't assert exact wording:
+- ✓ Right exception type (`pytest.raises(ValueError)`, `(should-error ... :type 'user-error)`)
+- ✓ Regex on values the message *must* contain (e.g., the offending filename)
+- ✗ `assert str(e) == "File 'foo' not found"` — breaks when prose changes even though behavior is unchanged
+
+Production code should emit clear, contextual errors. Tests verify the
+behavior (raised, caught, returned nil) and values that must appear — not the
+prose.
+
+## If Tests Are Hard to Write, Refactor the Code
+
+If a test needs extensive mocking of internal helpers, elaborate fixture
+scaffolding, or mocks that recreate the function's own logic, the production
+code needs restructuring — not the test.
+
+Signals:
+- Deep nesting (callbacks inside callbacks)
+- Long functions doing multiple things ("fetch AND parse AND decode AND save")
+- Tests that mock internal string / parsing / I/O helpers
+- Tests that break on refactors with no behavior change
+
+Fix: extract focused helpers (one responsibility each), test each in isolation
+with real inputs, compose them in a thin outer function. Several small unit
+tests plus one composition test beats one monster test behind a wall of mocks.
+
+## Coverage Targets
+
+- Business logic and domain services: **90%+**
+- API endpoints and views: **80%+**
+- UI components: **70%+**
+- Utilities and helpers: **90%+**
+- Overall project minimum: **80%+**
+
+New code must not decrease coverage. PRs that lower coverage require justification.
+
+## TDD Discipline
+
+TDD is non-negotiable. These are the rationalizations agents use to skip it — don't fall for them:
+
+| Excuse | Why It's Wrong |
+|--------|----------------|
+| "This is too simple to need a test" | Simple code breaks too. The test takes 30 seconds. Write it. |
+| "I'll add tests after the implementation" | You won't, and even if you do, they'll test what you wrote rather than what was needed. Test-after validates implementation, not behavior. |
+| "Let me just get it working first" | That's not TDD. If you can't write a failing test, you don't understand the requirement yet. |
+| "This is just a refactor" | Refactors without tests are guesses. Write a characterization test first, then refactor while it stays green. |
+| "I'm only changing one line" | One-line changes cause production outages. Write a test that covers the line you're changing. |
+| "The existing code has no tests" | Start with a characterization test. Don't make the problem worse. |
+| "This is demo/prototype code" | Demos build habits. Untested demo code becomes untested production code. |
+| "I need to spike first" | Spikes are fine — then throw away the spike, write the test, and implement properly. |
+
+If you catch yourself thinking any of these, stop and write the test.
+
+## Anti-Patterns (Do Not Do)
+
+- Hardcoded dates or timestamps (they rot)
+- Testing implementation details instead of behavior
+- Mocking the thing you're testing
+- Mocking internal helpers (string ops, parsing, decoding) — those are the work
+- Inlining production code into test files — always `require` / `import` the real module
+- Asserting exact error-message text instead of type + key values
+- Shared mutable state between tests
+- Non-deterministic tests (random without seed, network in unit tests)
+- Testing framework behavior instead of your code
+- Ignoring or skipping failing tests without a tracking issue
diff --git a/.claude/rules/verification.md b/.claude/rules/verification.md
new file mode 100644
index 0000000..8993736
--- /dev/null
+++ b/.claude/rules/verification.md
@@ -0,0 +1,42 @@
+# Verification Before Completion
+
+Applies to: `**/*`
+
+## The Rule
+
+Do not claim work is done without fresh verification evidence. Run the command, read the output, confirm it matches the claim, then — and only then — declare success.
+
+This applies to every completion claim:
+- "Tests pass" → Run the test suite. Read the output. Confirm all green.
+- "Linter is clean" → Run the linter. Read the output. Confirm no warnings.
+- "Build succeeds" → Run the build. Read the output. Confirm no errors.
+- "Bug is fixed" → Run the reproduction steps. Confirm the bug is gone.
+- "No regressions" → Run the full test suite, not just the tests you added.
+
+## What Fresh Means
+
+- Run the verification command **now**, in the current session
+- Do not rely on a previous run from before your changes
+- Do not assume your changes didn't break something unrelated
+- Do not extrapolate from partial output — read the whole result
+
+## Red Flags
+
+If you find yourself using these words, you haven't verified:
+
+- "should" ("tests should pass")
+- "probably" ("this probably works")
+- "I believe" ("I believe the build is clean")
+- "based on the changes" ("based on the changes, nothing should break")
+
+Replace beliefs with evidence. Run the command.
+
+## Before Committing
+
+Before any commit:
+1. Run the test suite — confirm all tests pass
+2. Run the linter — confirm no new warnings
+3. Run the type checker — confirm no new errors
+4. Review the diff — confirm only intended changes are staged
+
+Do not commit based on the assumption that nothing broke. Verify.