diff options
Diffstat (limited to '.claude')
| -rwxr-xr-x | .claude/hooks/validate-el.sh | 105 | ||||
| -rw-r--r-- | .claude/rules/commits.md | 66 | ||||
| -rw-r--r-- | .claude/rules/elisp-testing.md | 107 | ||||
| -rw-r--r-- | .claude/rules/elisp.md | 75 | ||||
| -rw-r--r-- | .claude/rules/testing.md | 277 | ||||
| -rw-r--r-- | .claude/rules/verification.md | 42 |
6 files changed, 672 insertions, 0 deletions
diff --git a/.claude/hooks/validate-el.sh b/.claude/hooks/validate-el.sh new file mode 100755 index 0000000..0c3a46c --- /dev/null +++ b/.claude/hooks/validate-el.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +# Validate and test .el files after Edit/Write/MultiEdit. +# PostToolUse hook: receives tool-call JSON on stdin. +# +# On success: exit 0 silent. +# On failure: emit JSON with hookSpecificOutput.additionalContext so Claude +# sees a structured error in its context, THEN exit 2 to block the tool +# pipeline. stderr still echoes the error for terminal visibility. +# +# Phase 1: check-parens + byte-compile +# Phase 2: for non-test .el files, run matching tests/test-<stem>*.el + +set -u + +# Emit a JSON failure payload and exit 2. Arguments: +# $1 — short failure type (e.g. "PAREN CHECK FAILED") +# $2 — file path +# $3 — emacs output (error body) +fail_json() { + local ctx + ctx="$(printf '%s: %s\n\n%s\n\nFix before proceeding.' "$1" "$2" "$3" \ + | jq -Rs .)" + cat <<EOF +{"hookSpecificOutput": {"hookEventName": "PostToolUse", "additionalContext": $ctx}} +EOF + printf '%s: %s\n%s\n' "$1" "$2" "$3" >&2 + exit 2 +} + +# Portable project root: prefer Claude Code's env var, fall back to deriving +# from this script's location ($project/.claude/hooks/validate-el.sh). +PROJECT_ROOT="${CLAUDE_PROJECT_DIR:-$(cd "$(dirname "$0")/../.." && pwd)}" + +f="$(jq -r '.tool_input.file_path // .tool_response.filePath // empty')" +[ -z "$f" ] && exit 0 +[ "${f##*.}" = "el" ] || exit 0 + +MAX_AUTO_TEST_FILES=20 # skip if more matches than this (large test suites) + +# --- Phase 1: syntax + byte-compile --- +case "$f" in + */init.el|*/early-init.el) + # Byte-compile here would load the full package graph. Parens only. + if ! output="$(emacs --batch --no-site-file --no-site-lisp "$f" \ + --eval '(check-parens)' 2>&1)"; then + fail_json "PAREN CHECK FAILED" "$f" "$output" + fi + ;; + *.el) + if ! output="$(emacs --batch --no-site-file --no-site-lisp \ + -L "$PROJECT_ROOT" \ + -L "$PROJECT_ROOT/modules" \ + -L "$PROJECT_ROOT/tests" \ + --eval '(package-initialize)' \ + "$f" \ + --eval '(check-parens)' \ + --eval "(or (byte-compile-file \"$f\") (kill-emacs 1))" 2>&1)"; then + fail_json "VALIDATION FAILED" "$f" "$output" + fi + ;; +esac + +# --- Phase 2: test runner --- +# Determine which tests (if any) apply to this edit. Works for projects with +# source at root, in modules/, or elsewhere — stem-based test lookup is the +# common pattern. +tests=() +case "$f" in + */init.el|*/early-init.el) + : # Phase 1 handled it; skip test runner + ;; + "$PROJECT_ROOT/tests/testutil-"*.el) + stem="$(basename "${f%.el}")" + stem="${stem#testutil-}" + mapfile -t tests < <(find "$PROJECT_ROOT/tests" -maxdepth 1 -name "test-${stem}*.el" 2>/dev/null | sort) + ;; + "$PROJECT_ROOT/tests/test-"*.el) + tests=("$f") + ;; + *.el) + # Any other .el under the project — find matching tests by stem + stem="$(basename "${f%.el}")" + mapfile -t tests < <(find "$PROJECT_ROOT/tests" -maxdepth 1 -name "test-${stem}*.el" 2>/dev/null | sort) + ;; +esac + +count="${#tests[@]}" +if [ "$count" -ge 1 ] && [ "$count" -le "$MAX_AUTO_TEST_FILES" ]; then + load_args=() + for t in "${tests[@]}"; do load_args+=("-l" "$t"); done + # Run from tests/ so each file's `(require 'test-bootstrap (expand-file-name + # "test-bootstrap.el"))` resolves against the directory the bootstrap lives in, + # not the project root. + if ! output="$(cd "$PROJECT_ROOT/tests" && emacs --batch --no-site-file --no-site-lisp \ + -L "$PROJECT_ROOT" \ + -L "$PROJECT_ROOT/modules" \ + -L "$PROJECT_ROOT/tests" \ + --eval '(package-initialize)' \ + -l ert "${load_args[@]}" \ + --eval "(ert-run-tests-batch-and-exit '(not (tag :slow)))" 2>&1)"; then + fail_json "TESTS FAILED ($count test file(s))" "$f" "$output" + fi +fi + +exit 0 diff --git a/.claude/rules/commits.md b/.claude/rules/commits.md new file mode 100644 index 0000000..301c6ff --- /dev/null +++ b/.claude/rules/commits.md @@ -0,0 +1,66 @@ +# Commit Rules + +Applies to: `**/*` + +## Author Identity + +All commits are authored as the user (repo owner / maintainer), never as +Claude, Claude Code, Anthropic, or any AI tool. Git uses the configured +`user.name` and `user.email` — do not modify git config to attribute +otherwise. + +## No AI Attribution — Anywhere + +Absolutely no AI/LLM/Claude/Anthropic attribution in: + +- Commit messages (subject or body) +- PR descriptions and titles +- Issue comments and reviews +- Code comments +- Commit trailers +- Release notes, changelogs, and any public-facing artifact + +This means: + +- **No** `Co-Authored-By: Claude …` (or Claude Code, or any AI) trailers +- **No** "Generated with Claude Code" footers or equivalents +- **No** 🤖 emojis or similar markers implying AI authorship +- **No** references to "Claude", "Anthropic", "LLM", "AI tool" as a credited contributor +- **No** attribution added via template defaults — strip them before committing + +If a tool, template, or default config inserts attribution, remove it. If +settings.json needs it, set `attribution.commit: ""` and `attribution.pr: ""` +to suppress the defaults. + +## Commit Message Format + +Conventional prefixes: + +- `feat:` — new feature +- `fix:` — bug fix +- `refactor:` — code restructuring, no behavior change +- `test:` — adding or updating tests +- `docs:` — documentation only +- `chore:` — build, tooling, meta + +Subject line ≤72 characters. Body explains the *why* when not obvious. +Skip the body entirely when the subject line is self-explanatory. + +## Before Committing + +1. Check author identity: `git log -1 --format='%an <%ae>'` — should be the user. +2. Scan the message for AI-attribution language (including emojis and footers). +3. Review the diff — only intended changes staged; no unrelated files. +4. Run tests and linters (see `verification.md`). + +## If You Catch Yourself + +Typing any of the following — stop, delete, rewrite: + +- `Co-Authored-By: Claude` +- `🤖 Generated with …` +- "Created with Claude Code" +- "Assisted by AI" + +Rewrite the commit as the user would write it: concise, focused on the +change, no mention of how the change was produced. diff --git a/.claude/rules/elisp-testing.md b/.claude/rules/elisp-testing.md new file mode 100644 index 0000000..b5def78 --- /dev/null +++ b/.claude/rules/elisp-testing.md @@ -0,0 +1,107 @@ +# Elisp Testing Rules + +Applies to: `**/tests/*.el` + +Implements the core principles from `testing.md`. All rules there apply here — +this file covers Elisp-specific patterns. + +## Framework: ERT + +Use `ert-deftest` for all tests. One test = one scenario. + +## File Layout + +- `tests/test-<module>.el` — tests for `<module>.el` +- `tests/test-<module>--<helper>.el` — tests for a specific private helper (matches `<module>--<helper>` function naming) +- `tests/testutil-<module>.el` — fixtures and mocks scoped to one module +- `tests/testutil-*.el` — cross-module helpers (shared fixtures, generic mocks, filesystem helpers); name them for what they help with + +Tests must `(require 'module-name)` before the testutil file that stubs its internals, unless documented otherwise. Order matters — a testutil that defines a stub can be shadowed by a later `require` of the real module. + +## Test Naming + +```elisp +(ert-deftest test-<module>-<function>-<scenario> () + "Normal/Boundary/Error: brief description." + ...) +``` + +Put the category (Normal, Boundary, Error) in the docstring so the category is grep-able. + +## Required Coverage + +Every non-trivial function needs at least: +- One **Normal** case (happy path) +- One **Boundary** case (empty, nil, min, max, unicode, long string) +- One **Error** case (invalid input, missing resource, failure mode) + +Missing a category is a test gap. If three cases look near-identical, parametrize with a loop or `dolist` rather than copy-pasting. + +## TDD Workflow + +Write the failing test first. A failing test proves you understand the change. Assume the bug is in production code until the test proves otherwise — never fix the test before proving the test is wrong. + +For untested code, write a **characterization test** that captures current behavior before you change anything. It becomes the safety net for the refactor. + +## Interactive vs Internal — Split for Testability + +When a function mixes business logic with user interaction, split it: + +- **Internal** (`cj/--foo`) — pure logic. All parameters explicit. No prompts, + no UI. Deterministic and trivially testable. +- **Interactive wrapper** (`cj/foo`) — thin layer that reads user input and + delegates to the internal. + +```elisp +(defun cj/--move-buffer-and-file (dir &optional ok-if-exists) + "Move the current buffer's file into DIR. Overwrite if OK-IF-EXISTS." + ...) + +(defun cj/move-buffer-and-file () + "Interactive wrapper: prompt for DIR, delegate." + (interactive) + (let ((dir (read-directory-name "Move to: "))) + (cj/--move-buffer-and-file dir))) +``` + +Test the internal directly with parameter values — no `cl-letf` on +`read-directory-name`, `yes-or-no-p`, etc. The wrapper gets a smoke test or +nothing — Emacs already tests its own prompts. The internal also becomes +reusable by other Elisp code without triggering UI. + +## Mocking + +Mock at boundaries: +- Shell: `cl-letf` on `shell-command`, `shell-command-to-string`, `call-process` +- File I/O when tests shouldn't touch disk +- Network: URL retrievers, HTTP clients +- Time: `cl-letf` on `current-time`, `format-time-string` + +Never mock: +- The code under test +- Core Emacs primitives (buffer ops, string ops, lists) +- Your own domain logic — restructure it to be testable instead + +## Idioms + +- `cl-letf` for scoped overrides (self-cleaning) +- `with-temp-buffer` for buffer manipulation tests +- `make-temp-file` with `.el` suffix for on-disk fixtures +- Tests must run in any order; no shared mutable state + +## Running Tests + +```bash +make test # All +make test-file FILE=tests/test-foo.el # One file +make test-name TEST=pattern # Match by test name pattern +``` + +A PostToolUse hook runs matching tests automatically after edits to a module, when the match count is small enough to be fast. + +## Anti-Patterns + +- Hardcoded timestamps — generate relative to `current-time` or mock +- Testing implementation details (private storage structure) instead of behavior +- Mocking the thing you're testing +- Skipping a failing test without an issue to track it diff --git a/.claude/rules/elisp.md b/.claude/rules/elisp.md new file mode 100644 index 0000000..e641058 --- /dev/null +++ b/.claude/rules/elisp.md @@ -0,0 +1,75 @@ +# Elisp / Emacs Rules + +Applies to: `**/*.el` + +## Style + +- 2-space indent, no tabs +- Hyphen-case for identifiers: `cj/do-thing`, not `cj/doThing` +- Naming prefixes: + - `cj/name` — user-facing functions and commands (bound to keys, called from init) + - `cj/--name` — private helpers (double-dash signals "internal") + - `<module>/name` — module-scoped where appropriate (e.g., `calendar-sync/parse-ics`) +- File header: `;;; foo-config.el --- brief description -*- lexical-binding: t -*-` +- `(provide 'foo-config)` at the bottom of every module +- `lexical-binding: t` is mandatory — no file without it + +## Function Design + +- Keep functions under 15 lines where possible +- One responsibility per function +- Extract helpers instead of nesting deeply — 5+ levels of nesting is a refactor signal +- Prefer named helpers over lambdas for anything nontrivial +- No premature abstraction — three similar lines beats a clever macro + +Small functions are the single strongest defense against paren errors. Deeply nested code is where AI and humans both fail. + +## Requires and Loading + +- Every `(require 'foo)` must correspond to a loadable file on the load-path +- Byte-compile warnings about free variables usually indicate a missing `require` or a typo in a symbol name — read them +- Use `use-package` for external (MELPA/ELPA) packages +- Use plain `(require 'foo-config)` for internal modules +- For optional features, `(when (require 'foo nil t) ...)` degrades gracefully if absent + +## Lexical-Binding Traps + +- `(boundp 'x)` where `x` is a lexical variable always returns nil. Bind with `defvar` at top level if you need `boundp` to work, or use the value directly. +- `setq` on an undeclared free variable is a warning — use `let` for locals or `defvar` for module-level state +- Closures capture by reference. Avoid capturing mutating loop variables in nested defuns. + +## Regex Gotchas + +- `\s` is NOT whitespace in Emacs regex. Use `[ \t]` or `\\s-` (syntax class). +- `^` in `string-match` matches after `\n` OR at position 0 — use `(= (match-beginning 0) start)` for positional checks when that matters. +- `replace-regexp-in-string` interprets backslashes in the replacement. Pass `t t` (FIXEDCASE LITERAL) when the replacement contains literal backslashes. + +## Keybindings + +- `keymap-global-set` for global; `keymap-set KEYMAP ...` for mode-local +- Group module-specific bindings inside the module's file +- Autoload cookies (`;;;###autoload`) don't activate through plain `(require ...)` — use the form directly, not an autoloaded wrapper + +## Module Template + +```elisp +;;; foo-config.el --- Foo feature configuration -*- lexical-binding: t -*- + +;;; Commentary: +;; One-line description. + +;;; Code: + +;; ... code ... + +(provide 'foo-config) +;;; foo-config.el ends here +``` + +Then `(require 'foo-config)` in `init.el` (or a config aggregator). + +## Editing Workflow + +- A PostToolUse hook runs `check-parens` and `byte-compile-file` on every `.el` save +- If it blocks, read the error — don't retry blindly +- Prefer Write over repeated Edits for nontrivial new code; incremental edits accumulate subtle paren mismatches diff --git a/.claude/rules/testing.md b/.claude/rules/testing.md new file mode 100644 index 0000000..b91b76c --- /dev/null +++ b/.claude/rules/testing.md @@ -0,0 +1,277 @@ +# Testing Standards + +Applies to: `**/*` + +Core TDD discipline and test quality rules. Language-specific patterns +(frameworks, fixture idioms, mocking tools) live in per-language testing files +under `languages/<lang>/claude/rules/`. + +## Test-Driven Development (Default) + +TDD is the default workflow for all code, including demos and prototypes. **Write tests first, before any implementation code.** Tests are how you prove you understand the problem — if you can't write a failing test, you don't yet understand what needs to change. + +1. **Red**: Write a failing test that defines the desired behavior +2. **Green**: Write the minimal code to make the test pass +3. **Refactor**: Clean up while keeping tests green + +Do not skip TDD for demo code. Demos build muscle memory — the habit carries into production. + +### Understand Before You Test + +Before writing tests, invest time in understanding the code: + +1. **Explore the codebase** — Read the module under test, its callers, and its dependencies. Understand the data flow end to end. +2. **Identify the root cause** — If fixing a bug, trace the problem to its origin. Don't test (or fix) surface symptoms when the real issue is deeper in the call chain. +3. **Reason through edge cases** — Consider boundary conditions, error states, concurrent access, and interactions with adjacent modules. Your tests should cover what could actually go wrong, not just the obvious happy path. + +### Adding Tests to Existing Untested Code + +When working in a codebase without tests: + +1. Write a **characterization test** that captures current behavior before making changes +2. Use the characterization test as a safety net while refactoring +3. Then follow normal TDD for the new change + +## Test Categories (Required for All Code) + +Every unit under test requires coverage across three categories: + +### 1. Normal Cases (Happy Path) +- Standard inputs and expected use cases +- Common workflows and default configurations +- Typical data volumes + +### 2. Boundary Cases +- Minimum/maximum values (0, 1, -1, MAX_INT) +- Empty vs null vs undefined (language-appropriate) +- Single-element collections +- Unicode and internationalization (emoji, RTL text, combining characters) +- Very long strings, deeply nested structures +- Timezone boundaries (midnight, DST transitions) +- Date edge cases (leap years, month boundaries) + +### 3. Error Cases +- Invalid inputs and type mismatches +- Network failures and timeouts +- Missing required parameters +- Permission denied scenarios +- Resource exhaustion +- Malformed data + +## Combinatorial Coverage + +For functions with 3+ parameters that each take multiple values (feature-flag +combinations, config matrices, permission/role interactions, multi-field +form validation, API parameter spaces), the exhaustive test count explodes +(M^N) while 3-5 ad-hoc cases miss pair interactions. Use **pairwise / +combinatorial testing** — generate a minimal matrix that hits every 2-way +combination of parameter values. Empirically catches 60-90% of combinatorial +bugs with 80-99% fewer tests. + +Invoke `/pairwise-tests` on the offending function; continue using `/add-tests` +and the Normal/Boundary/Error discipline for the rest. The two approaches +complement: pairwise covers parameter *interactions*; category discipline +covers each parameter's individual edge space. + +Skip pairwise when: the function has 1-2 parameters (just write the cases), +the context requires *provably* exhaustive coverage (regulated systems — document +in an ADR), or the testing target is non-parametric (single happy path, +performance regression, a specific error). + +## Test Organization + +Typical layout: + +``` +tests/ + unit/ # One test file per source file + integration/ # Multi-component workflows + e2e/ # Full system tests +``` + +Per-language files may adjust this (e.g. Elisp collates ERT tests into +`tests/test-<module>*.el` without subdirectories). + +### Testing Pyramid + +Rough proportions for most projects: +- Unit tests: 70-80% (fast, isolated, granular) +- Integration tests: 15-25% (component interactions, real dependencies) +- E2E tests: 5-10% (full system, slowest) + +Don't duplicate coverage: if unit tests fully exercise a function's logic, +integration tests should focus on *how* components interact — not repeat the +function's case coverage. + +## Integration Tests + +Integration tests exercise multiple components together. Two rules: + +**The docstring names every component integrated** and marks which are real vs +mocked. Integration failures are harder to pinpoint than unit failures; +enumerating the participants up front tells you where to start looking. + +Example: + +``` +def test_integration_refund_during_sync_updates_ledger_atomically(): + """Refund processed mid-sync updates order and ledger in one transaction. + + Components integrated: + - OrderService.refund (entry point) + - PaymentGateway.reverse (MOCKED — returns success) + - Ledger.credit (real) + - db.transaction (real) + + Validates: + - Refund rolls back if ledger write fails + - Both tables updated or neither + """ +``` + +**Write an integration test when** multiple components must work together, +state crosses function boundaries, or edge cases combine. **Don't** when +single-function behavior suffices, or when mocking would erase the interaction +you meant to test. + +## Naming Convention + +- Unit: `test_<module>_<function>_<scenario>_<expected>` +- Integration: `test_integration_<workflow>_<scenario>_<outcome>` + +Examples: +- `test_cart_apply_discount_expired_coupon_raises_error` +- `test_integration_order_sync_network_timeout_retries_three_times` + +Languages that prefer camelCase, kebab-case, or other conventions keep the +structure but use their idiom. Consistency within a project matters more than +the specific case choice. + +## Test Quality + +### Independence +- No shared mutable state between tests +- Each test runs successfully in isolation +- Explicit setup and teardown + +### Determinism +- Never hardcode dates or times — generate them relative to `now()` +- No reliance on test execution order +- No flaky network calls in unit tests + +### Performance +- Unit tests: <100ms each +- Integration tests: <1s each +- E2E tests: <10s each +- Mark slow tests with appropriate decorators/tags + +### Mocking Boundaries +Mock external dependencies at the system boundary: +- Network calls (HTTP, gRPC, WebSocket) +- File I/O and cloud storage +- Time and dates +- Third-party service clients + +Never mock: +- The code under test +- Internal domain logic +- Framework behavior (ORM queries, middleware, hooks, buffer primitives) + +### Signs of Overmocking + +Ask yourself: + +- Would this test still pass if I replaced the function body with `raise NotImplementedError` (or equivalent)? If yes, the mocks are doing the work — you're testing mocks, not code. +- Is the mock more complex than the function being tested? Smell. +- Am I mocking internal string / parsing / decoding helpers? Those aren't boundaries — they're the work. +- Does the test break when I refactor without changing behavior? Good tests survive refactors; overmocked ones couple to implementation. + +When tests demand heavy internal mocking, the fix isn't better mocks — it's +restructuring the code (see *If Tests Are Hard to Write* below). + +### Testing Code That Uses Frameworks + +When a function mostly delegates to framework or library code, test *your* +integration logic: +- ✓ "I call the library with the right arguments in the right context" +- ✓ "I handle its return value correctly" +- ✗ "The library works in 50 scenarios" — trust it; it has its own tests + +For polyglot behavior (e.g., comment handling across C/Java/Go/JS), test 2-3 +representative modes thoroughly plus a minimal smoke test in the others. +Exhaustive permutations are diminishing returns. + +### Test Real Code, Not Copies + +Never inline or copy production code into test files. Always `require`/`import` +the module under test. Copied code passes even when production breaks — the +bug hides behind the duplicate. + +Mock dependencies at their boundary; exercise the real function body. + +### Error Behavior, Not Error Text + +Test that errors occur with the right type; don't assert exact wording: +- ✓ Right exception type (`pytest.raises(ValueError)`, `(should-error ... :type 'user-error)`) +- ✓ Regex on values the message *must* contain (e.g., the offending filename) +- ✗ `assert str(e) == "File 'foo' not found"` — breaks when prose changes even though behavior is unchanged + +Production code should emit clear, contextual errors. Tests verify the +behavior (raised, caught, returned nil) and values that must appear — not the +prose. + +## If Tests Are Hard to Write, Refactor the Code + +If a test needs extensive mocking of internal helpers, elaborate fixture +scaffolding, or mocks that recreate the function's own logic, the production +code needs restructuring — not the test. + +Signals: +- Deep nesting (callbacks inside callbacks) +- Long functions doing multiple things ("fetch AND parse AND decode AND save") +- Tests that mock internal string / parsing / I/O helpers +- Tests that break on refactors with no behavior change + +Fix: extract focused helpers (one responsibility each), test each in isolation +with real inputs, compose them in a thin outer function. Several small unit +tests plus one composition test beats one monster test behind a wall of mocks. + +## Coverage Targets + +- Business logic and domain services: **90%+** +- API endpoints and views: **80%+** +- UI components: **70%+** +- Utilities and helpers: **90%+** +- Overall project minimum: **80%+** + +New code must not decrease coverage. PRs that lower coverage require justification. + +## TDD Discipline + +TDD is non-negotiable. These are the rationalizations agents use to skip it — don't fall for them: + +| Excuse | Why It's Wrong | +|--------|----------------| +| "This is too simple to need a test" | Simple code breaks too. The test takes 30 seconds. Write it. | +| "I'll add tests after the implementation" | You won't, and even if you do, they'll test what you wrote rather than what was needed. Test-after validates implementation, not behavior. | +| "Let me just get it working first" | That's not TDD. If you can't write a failing test, you don't understand the requirement yet. | +| "This is just a refactor" | Refactors without tests are guesses. Write a characterization test first, then refactor while it stays green. | +| "I'm only changing one line" | One-line changes cause production outages. Write a test that covers the line you're changing. | +| "The existing code has no tests" | Start with a characterization test. Don't make the problem worse. | +| "This is demo/prototype code" | Demos build habits. Untested demo code becomes untested production code. | +| "I need to spike first" | Spikes are fine — then throw away the spike, write the test, and implement properly. | + +If you catch yourself thinking any of these, stop and write the test. + +## Anti-Patterns (Do Not Do) + +- Hardcoded dates or timestamps (they rot) +- Testing implementation details instead of behavior +- Mocking the thing you're testing +- Mocking internal helpers (string ops, parsing, decoding) — those are the work +- Inlining production code into test files — always `require` / `import` the real module +- Asserting exact error-message text instead of type + key values +- Shared mutable state between tests +- Non-deterministic tests (random without seed, network in unit tests) +- Testing framework behavior instead of your code +- Ignoring or skipping failing tests without a tracking issue diff --git a/.claude/rules/verification.md b/.claude/rules/verification.md new file mode 100644 index 0000000..8993736 --- /dev/null +++ b/.claude/rules/verification.md @@ -0,0 +1,42 @@ +# Verification Before Completion + +Applies to: `**/*` + +## The Rule + +Do not claim work is done without fresh verification evidence. Run the command, read the output, confirm it matches the claim, then — and only then — declare success. + +This applies to every completion claim: +- "Tests pass" → Run the test suite. Read the output. Confirm all green. +- "Linter is clean" → Run the linter. Read the output. Confirm no warnings. +- "Build succeeds" → Run the build. Read the output. Confirm no errors. +- "Bug is fixed" → Run the reproduction steps. Confirm the bug is gone. +- "No regressions" → Run the full test suite, not just the tests you added. + +## What Fresh Means + +- Run the verification command **now**, in the current session +- Do not rely on a previous run from before your changes +- Do not assume your changes didn't break something unrelated +- Do not extrapolate from partial output — read the whole result + +## Red Flags + +If you find yourself using these words, you haven't verified: + +- "should" ("tests should pass") +- "probably" ("this probably works") +- "I believe" ("I believe the build is clean") +- "based on the changes" ("based on the changes, nothing should break") + +Replace beliefs with evidence. Run the command. + +## Before Committing + +Before any commit: +1. Run the test suite — confirm all tests pass +2. Run the linter — confirm no new warnings +3. Run the type checker — confirm no new errors +4. Review the diff — confirm only intended changes are staged + +Do not commit based on the assumption that nothing broke. Verify. |
