aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-05-16 01:25:50 -0500
committerCraig Jennings <c@cjennings.net>2026-05-16 01:25:50 -0500
commitc67b9aaed47f054b269c0244193e1189e022b939 (patch)
treeecdc149b77e7f4ce7c071735d187ac77a89e907f
parentcd8b6e9e9c40e2fcd83104457afae1b286924a0e (diff)
downloadrulesets-c67b9aaed47f054b269c0244193e1189e022b939.tar.gz
rulesets-c67b9aaed47f054b269c0244193e1189e022b939.zip
chore(ai): sync cj-scan from claude-templates
The project mirror at .ai/scripts/ was missing the wrapper_type state machine and TestCjScanNestedFencesIgnored suite that landed in dc1661c. That commit only touched claude-templates/.ai/scripts/. Phase A's startup rsync brings the mirror back in line with the canonical.
-rw-r--r--.ai/scripts/cj-scan.py25
-rw-r--r--.ai/scripts/tests/test_cj_scan.py115
2 files changed, 140 insertions, 0 deletions
diff --git a/.ai/scripts/cj-scan.py b/.ai/scripts/cj-scan.py
index 54e2bf9..275f5ca 100644
--- a/.ai/scripts/cj-scan.py
+++ b/.ai/scripts/cj-scan.py
@@ -30,6 +30,7 @@ VALID_VERIFY_DEPTHS = {2, 3}
HEADING_RE = re.compile(r"^(\*+)\s+(.*)$")
SRC_OPEN_RE = re.compile(r"^\s*#\+begin_src\s+cj:\s*(\S*)\s*$", re.IGNORECASE)
SRC_CLOSE_RE = re.compile(r"^\s*#\+end_src\s*$", re.IGNORECASE)
+BLOCK_OPEN_RE = re.compile(r"^\s*#\+begin_(\w+)(?:\s.*)?$", re.IGNORECASE)
LEGACY_CJ_RE = re.compile(r"^\s*cj:\s*(.*)$")
VERIFY_KEYWORD_RE = re.compile(r"^VERIFY(\s|\[|$)")
@@ -66,6 +67,13 @@ def scan_file(path: Path) -> dict[str, object]:
block_label: str | None = None
block_body: list[str] = []
+ # Tracks a non-cj `#+begin_<type>` wrapper currently in scope. Inside a
+ # wrapper, cj fence patterns are *content* (documentation examples,
+ # quoted prose, snippet definitions) -- not annotations -- so we
+ # suppress matching until the wrapper closes. The closer is type-keyed:
+ # `#+end_example` for example, `#+end_src` for src, etc.
+ wrapper_type: str | None = None
+
file_str = str(path)
lines = path.read_text().splitlines()
@@ -90,6 +98,15 @@ def scan_file(path: Path) -> dict[str, object]:
block_body.append(line)
continue
+ if wrapper_type is not None:
+ wrapper_close_re = re.compile(
+ rf"^\s*#\+end_{re.escape(wrapper_type)}\s*$",
+ re.IGNORECASE,
+ )
+ if wrapper_close_re.match(line):
+ wrapper_type = None
+ continue
+
m_heading = HEADING_RE.match(line)
if m_heading:
depth = len(m_heading.group(1))
@@ -110,6 +127,9 @@ def scan_file(path: Path) -> dict[str, object]:
})
continue
+ # cj-open must be checked before the generic begin-block match: a
+ # `#+begin_src cj: ...` line matches both patterns, and cj-open is
+ # the more specific intent.
m_src_open = SRC_OPEN_RE.match(line)
if m_src_open:
in_cj_block = True
@@ -118,6 +138,11 @@ def scan_file(path: Path) -> dict[str, object]:
block_body = []
continue
+ m_block_open = BLOCK_OPEN_RE.match(line)
+ if m_block_open:
+ wrapper_type = m_block_open.group(1).lower()
+ continue
+
m_legacy = LEGACY_CJ_RE.match(line)
if m_legacy:
cj_blocks.append({
diff --git a/.ai/scripts/tests/test_cj_scan.py b/.ai/scripts/tests/test_cj_scan.py
index 7844474..22bd467 100644
--- a/.ai/scripts/tests/test_cj_scan.py
+++ b/.ai/scripts/tests/test_cj_scan.py
@@ -248,3 +248,118 @@ class TestCjScanVerifyPlacement:
)
result = run_scan(content)
assert result["verify_tasks"] == []
+
+
+# ----------------------------------------------------------------------
+# Nested-fence suppression (no false positives inside wrapper blocks)
+# ----------------------------------------------------------------------
+
+class TestCjScanNestedFencesIgnored:
+ """A cj fence nested inside another #+begin_<type> block is content, not an annotation.
+
+ Documentation patterns frequently embed the cj marker inside `#+begin_example`
+ blocks or inside `#+begin_src snippet` yasnippet definitions to *show* what
+ the marker looks like. A naive line-by-line scanner that only watches for
+ `#+begin_src cj:` and `#+end_src` matches those literally and misclassifies
+ the documentation as live annotations.
+ """
+
+ def test_cj_scan_fence_inside_begin_example_ignored(self, run_scan):
+ """Normal: cj fence inside #+begin_example ... #+end_example is documentation, not annotation."""
+ content = (
+ "* Section\n"
+ "Here is what the cj marker looks like:\n"
+ "#+begin_example\n"
+ "#+begin_src cj: comment\n"
+ "I am documentation, not a real annotation\n"
+ "#+end_src\n"
+ "#+end_example\n"
+ )
+ result = run_scan(content)
+ assert result["cj_blocks"] == []
+ assert result["unclosed_blocks"] == []
+
+ def test_cj_scan_fence_inside_begin_src_other_lang_ignored(self, run_scan):
+ """Normal: cj fence inside #+begin_src snippet (or any non-cj src block) is content.
+
+ The outer #+begin_src snippet block claims everything until the FIRST
+ matching #+end_src, so the inner `#+begin_src cj:` line is literal text
+ inside that block — not a nested cj annotation.
+ """
+ content = (
+ "* Section\n"
+ "#+begin_src snippet\n"
+ "# name: cj-comment-block\n"
+ "# --\n"
+ "#+begin_src cj: comment\n"
+ "$0\n"
+ "#+end_src\n"
+ "#+end_src\n"
+ )
+ result = run_scan(content)
+ assert result["cj_blocks"] == []
+
+ def test_cj_scan_fence_inside_begin_quote_ignored(self, run_scan):
+ """Boundary: cj fence inside #+begin_quote ... #+end_quote is quoted prose, not annotation."""
+ content = (
+ "* Section\n"
+ "#+begin_quote\n"
+ "#+begin_src cj: comment\n"
+ "quoted, not active\n"
+ "#+end_src\n"
+ "#+end_quote\n"
+ )
+ result = run_scan(content)
+ assert result["cj_blocks"] == []
+
+ def test_cj_scan_real_cj_after_example_block_still_detected(self, run_scan):
+ """Normal: after a wrapper block closes, a subsequent real cj fence is detected.
+
+ Regression guard: the wrapper-tracking state must reset when the
+ wrapper closes; otherwise everything after the first example block
+ would be silently swallowed.
+ """
+ content = (
+ "* Section\n"
+ "#+begin_example\n"
+ "#+begin_src cj: comment\n"
+ "doc, ignored\n"
+ "#+end_src\n"
+ "#+end_example\n"
+ "#+begin_src cj: comment\n"
+ "this one is real\n"
+ "#+end_src\n"
+ )
+ result = run_scan(content)
+ assert len(result["cj_blocks"]) == 1
+ assert result["cj_blocks"][0]["body"] == "this one is real"
+
+ def test_cj_scan_legacy_inline_inside_wrapper_ignored(self, run_scan):
+ """Boundary: legacy `cj: ...` line inside a wrapper block is content, not annotation."""
+ content = (
+ "* Section\n"
+ "#+begin_example\n"
+ "cj: this is a documentation example, not a real annotation\n"
+ "#+end_example\n"
+ )
+ result = run_scan(content)
+ assert result["cj_blocks"] == []
+
+ def test_cj_scan_unclosed_wrapper_does_not_swallow_rest_of_file_silently(self, run_scan):
+ """Error: an unclosed wrapper consumes everything after it — cj_blocks empty, not unclosed_blocks.
+
+ Pinning current behaviour: an unclosed `#+begin_example` is a separate
+ org-level malformation that this scanner doesn't currently report. The
+ guard here is that the unclosed-wrapper case doesn't produce
+ false-positive cj_blocks downstream.
+ """
+ content = (
+ "* Section\n"
+ "#+begin_example\n"
+ "#+begin_src cj: comment\n"
+ "should be content of the unclosed example\n"
+ "#+end_src\n"
+ "more content here, no end_example\n"
+ )
+ result = run_scan(content)
+ assert result["cj_blocks"] == []