diff options
| author | Craig Jennings <c@cjennings.net> | 2026-05-16 01:25:50 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-05-16 01:25:50 -0500 |
| commit | c67b9aaed47f054b269c0244193e1189e022b939 (patch) | |
| tree | ecdc149b77e7f4ce7c071735d187ac77a89e907f | |
| parent | cd8b6e9e9c40e2fcd83104457afae1b286924a0e (diff) | |
| download | rulesets-c67b9aaed47f054b269c0244193e1189e022b939.tar.gz rulesets-c67b9aaed47f054b269c0244193e1189e022b939.zip | |
chore(ai): sync cj-scan from claude-templates
The project mirror at .ai/scripts/ was missing the wrapper_type state machine and TestCjScanNestedFencesIgnored suite that landed in dc1661c. That commit only touched claude-templates/.ai/scripts/. Phase A's startup rsync brings the mirror back in line with the canonical.
| -rw-r--r-- | .ai/scripts/cj-scan.py | 25 | ||||
| -rw-r--r-- | .ai/scripts/tests/test_cj_scan.py | 115 |
2 files changed, 140 insertions, 0 deletions
diff --git a/.ai/scripts/cj-scan.py b/.ai/scripts/cj-scan.py index 54e2bf9..275f5ca 100644 --- a/.ai/scripts/cj-scan.py +++ b/.ai/scripts/cj-scan.py @@ -30,6 +30,7 @@ VALID_VERIFY_DEPTHS = {2, 3} HEADING_RE = re.compile(r"^(\*+)\s+(.*)$") SRC_OPEN_RE = re.compile(r"^\s*#\+begin_src\s+cj:\s*(\S*)\s*$", re.IGNORECASE) SRC_CLOSE_RE = re.compile(r"^\s*#\+end_src\s*$", re.IGNORECASE) +BLOCK_OPEN_RE = re.compile(r"^\s*#\+begin_(\w+)(?:\s.*)?$", re.IGNORECASE) LEGACY_CJ_RE = re.compile(r"^\s*cj:\s*(.*)$") VERIFY_KEYWORD_RE = re.compile(r"^VERIFY(\s|\[|$)") @@ -66,6 +67,13 @@ def scan_file(path: Path) -> dict[str, object]: block_label: str | None = None block_body: list[str] = [] + # Tracks a non-cj `#+begin_<type>` wrapper currently in scope. Inside a + # wrapper, cj fence patterns are *content* (documentation examples, + # quoted prose, snippet definitions) -- not annotations -- so we + # suppress matching until the wrapper closes. The closer is type-keyed: + # `#+end_example` for example, `#+end_src` for src, etc. + wrapper_type: str | None = None + file_str = str(path) lines = path.read_text().splitlines() @@ -90,6 +98,15 @@ def scan_file(path: Path) -> dict[str, object]: block_body.append(line) continue + if wrapper_type is not None: + wrapper_close_re = re.compile( + rf"^\s*#\+end_{re.escape(wrapper_type)}\s*$", + re.IGNORECASE, + ) + if wrapper_close_re.match(line): + wrapper_type = None + continue + m_heading = HEADING_RE.match(line) if m_heading: depth = len(m_heading.group(1)) @@ -110,6 +127,9 @@ def scan_file(path: Path) -> dict[str, object]: }) continue + # cj-open must be checked before the generic begin-block match: a + # `#+begin_src cj: ...` line matches both patterns, and cj-open is + # the more specific intent. m_src_open = SRC_OPEN_RE.match(line) if m_src_open: in_cj_block = True @@ -118,6 +138,11 @@ def scan_file(path: Path) -> dict[str, object]: block_body = [] continue + m_block_open = BLOCK_OPEN_RE.match(line) + if m_block_open: + wrapper_type = m_block_open.group(1).lower() + continue + m_legacy = LEGACY_CJ_RE.match(line) if m_legacy: cj_blocks.append({ diff --git a/.ai/scripts/tests/test_cj_scan.py b/.ai/scripts/tests/test_cj_scan.py index 7844474..22bd467 100644 --- a/.ai/scripts/tests/test_cj_scan.py +++ b/.ai/scripts/tests/test_cj_scan.py @@ -248,3 +248,118 @@ class TestCjScanVerifyPlacement: ) result = run_scan(content) assert result["verify_tasks"] == [] + + +# ---------------------------------------------------------------------- +# Nested-fence suppression (no false positives inside wrapper blocks) +# ---------------------------------------------------------------------- + +class TestCjScanNestedFencesIgnored: + """A cj fence nested inside another #+begin_<type> block is content, not an annotation. + + Documentation patterns frequently embed the cj marker inside `#+begin_example` + blocks or inside `#+begin_src snippet` yasnippet definitions to *show* what + the marker looks like. A naive line-by-line scanner that only watches for + `#+begin_src cj:` and `#+end_src` matches those literally and misclassifies + the documentation as live annotations. + """ + + def test_cj_scan_fence_inside_begin_example_ignored(self, run_scan): + """Normal: cj fence inside #+begin_example ... #+end_example is documentation, not annotation.""" + content = ( + "* Section\n" + "Here is what the cj marker looks like:\n" + "#+begin_example\n" + "#+begin_src cj: comment\n" + "I am documentation, not a real annotation\n" + "#+end_src\n" + "#+end_example\n" + ) + result = run_scan(content) + assert result["cj_blocks"] == [] + assert result["unclosed_blocks"] == [] + + def test_cj_scan_fence_inside_begin_src_other_lang_ignored(self, run_scan): + """Normal: cj fence inside #+begin_src snippet (or any non-cj src block) is content. + + The outer #+begin_src snippet block claims everything until the FIRST + matching #+end_src, so the inner `#+begin_src cj:` line is literal text + inside that block — not a nested cj annotation. + """ + content = ( + "* Section\n" + "#+begin_src snippet\n" + "# name: cj-comment-block\n" + "# --\n" + "#+begin_src cj: comment\n" + "$0\n" + "#+end_src\n" + "#+end_src\n" + ) + result = run_scan(content) + assert result["cj_blocks"] == [] + + def test_cj_scan_fence_inside_begin_quote_ignored(self, run_scan): + """Boundary: cj fence inside #+begin_quote ... #+end_quote is quoted prose, not annotation.""" + content = ( + "* Section\n" + "#+begin_quote\n" + "#+begin_src cj: comment\n" + "quoted, not active\n" + "#+end_src\n" + "#+end_quote\n" + ) + result = run_scan(content) + assert result["cj_blocks"] == [] + + def test_cj_scan_real_cj_after_example_block_still_detected(self, run_scan): + """Normal: after a wrapper block closes, a subsequent real cj fence is detected. + + Regression guard: the wrapper-tracking state must reset when the + wrapper closes; otherwise everything after the first example block + would be silently swallowed. + """ + content = ( + "* Section\n" + "#+begin_example\n" + "#+begin_src cj: comment\n" + "doc, ignored\n" + "#+end_src\n" + "#+end_example\n" + "#+begin_src cj: comment\n" + "this one is real\n" + "#+end_src\n" + ) + result = run_scan(content) + assert len(result["cj_blocks"]) == 1 + assert result["cj_blocks"][0]["body"] == "this one is real" + + def test_cj_scan_legacy_inline_inside_wrapper_ignored(self, run_scan): + """Boundary: legacy `cj: ...` line inside a wrapper block is content, not annotation.""" + content = ( + "* Section\n" + "#+begin_example\n" + "cj: this is a documentation example, not a real annotation\n" + "#+end_example\n" + ) + result = run_scan(content) + assert result["cj_blocks"] == [] + + def test_cj_scan_unclosed_wrapper_does_not_swallow_rest_of_file_silently(self, run_scan): + """Error: an unclosed wrapper consumes everything after it — cj_blocks empty, not unclosed_blocks. + + Pinning current behaviour: an unclosed `#+begin_example` is a separate + org-level malformation that this scanner doesn't currently report. The + guard here is that the unclosed-wrapper case doesn't produce + false-positive cj_blocks downstream. + """ + content = ( + "* Section\n" + "#+begin_example\n" + "#+begin_src cj: comment\n" + "should be content of the unclosed example\n" + "#+end_src\n" + "more content here, no end_example\n" + ) + result = run_scan(content) + assert result["cj_blocks"] == [] |
