"""Tests for cj-scan.py — org-file cj-annotation scanner. The script parses an org file and emits JSON describing: - cj_blocks: every cj annotation found (source-block or legacy-inline form) - verify_tasks: every VERIFY heading + placement validity (top-level or first-level child only) - unclosed_blocks: any source-block fence that opened but never closed """ import json import subprocess from pathlib import Path import pytest SCRIPT = Path(__file__).parent.parent / "cj-scan.py" @pytest.fixture def run_scan(tmp_path): """Write content to a temp org file and run cj-scan; return parsed JSON output.""" def _run(content: str) -> dict: f = tmp_path / "test.org" f.write_text(content) result = subprocess.run( ["python3", str(SCRIPT), str(f)], capture_output=True, text=True, check=True, ) return json.loads(result.stdout) return _run # ---------------------------------------------------------------------- # cj-block detection # ---------------------------------------------------------------------- class TestCjScanCjBlockDetection: """Detection of cj annotations — source-block and legacy-inline forms.""" def test_cj_scan_source_block_single_detected(self, run_scan): """Normal: a single source-block cj is detected with correct line range and body.""" content = "* Section\n#+begin_src cj: comment\nplease check this\n#+end_src\n" result = run_scan(content) assert len(result["cj_blocks"]) == 1 b = result["cj_blocks"][0] assert b["form"] == "source-block" assert b["body"] == "please check this" assert b["start_line"] == 2 assert b["end_line"] == 4 def test_cj_scan_source_block_multiline_body_preserved(self, run_scan): """Normal: multi-line body is preserved with embedded newlines.""" content = "* S\n#+begin_src cj: comment\nline 1\nline 2\nline 3\n#+end_src\n" result = run_scan(content) assert result["cj_blocks"][0]["body"] == "line 1\nline 2\nline 3" def test_cj_scan_multiple_source_blocks_each_detected(self, run_scan): """Normal: multiple source-blocks in a file are detected as separate items.""" content = ( "* A\n#+begin_src cj: comment\nfirst\n#+end_src\n" "* B\n#+begin_src cj: comment\nsecond\n#+end_src\n" ) result = run_scan(content) assert len(result["cj_blocks"]) == 2 bodies = [b["body"] for b in result["cj_blocks"]] assert bodies == ["first", "second"] def test_cj_scan_legacy_inline_single_line_detected(self, run_scan): """Normal: a legacy inline cj line is detected with form=legacy-inline.""" content = "* Section\ncj: please check this\n" result = run_scan(content) assert len(result["cj_blocks"]) == 1 b = result["cj_blocks"][0] assert b["form"] == "legacy-inline" assert b["body"] == "please check this" assert b["start_line"] == 2 assert b["end_line"] == 2 def test_cj_scan_mixed_forms_in_same_file(self, run_scan): """Normal: source-block + legacy inline coexist; both detected as separate items.""" content = ( "* A\ncj: legacy form\n" "* B\n#+begin_src cj: comment\nnew form\n#+end_src\n" ) result = run_scan(content) assert len(result["cj_blocks"]) == 2 forms = sorted(b["form"] for b in result["cj_blocks"]) assert forms == ["legacy-inline", "source-block"] def test_cj_scan_empty_file_returns_empty_lists(self, run_scan): """Boundary: empty file → empty cj_blocks and verify_tasks lists.""" result = run_scan("") assert result["cj_blocks"] == [] assert result["verify_tasks"] == [] assert result["unclosed_blocks"] == [] def test_cj_scan_no_cj_content_returns_empty_blocks(self, run_scan): """Boundary: org file with no cj content → empty cj_blocks.""" content = "* Section\n** TODO Task\nbody text\n** TODO Another\n" result = run_scan(content) assert result["cj_blocks"] == [] def test_cj_scan_block_before_any_heading_empty_chain(self, run_scan): """Boundary: cj block at top of file (before any heading) → empty parent chain.""" content = "#+begin_src cj: comment\ntop-level note\n#+end_src\n" result = run_scan(content) assert result["cj_blocks"][0]["parent_heading_chain"] == [] assert result["cj_blocks"][0]["parent_depth"] == 0 @pytest.mark.parametrize("fence", [ "#+begin_src cj: comment", "#+begin_src cj:", "#+begin_src cj: anything", "#+BEGIN_SRC cj: comment", # case-insensitive ]) def test_cj_scan_source_block_fence_variants_all_recognized(self, run_scan, fence): """Boundary: fence label and case variants are all valid forms.""" content = f"* S\n{fence}\nbody\n#+end_src\n" result = run_scan(content) assert len(result["cj_blocks"]) == 1 assert result["cj_blocks"][0]["body"] == "body" def test_cj_scan_unclosed_source_block_reported(self, run_scan): """Error: a source-block that opens but never closes → reported in unclosed_blocks.""" content = "* S\n#+begin_src cj: comment\nbody that never ends\n" result = run_scan(content) assert result["cj_blocks"] == [] assert len(result["unclosed_blocks"]) == 1 assert result["unclosed_blocks"][0]["start_line"] == 2 # ---------------------------------------------------------------------- # Parent heading chain reconstruction # ---------------------------------------------------------------------- class TestCjScanParentChain: """Parent heading chain construction — walking the org tree backward.""" def test_cj_scan_nested_parent_chain_three_levels(self, run_scan): """Normal: cj block inside three nested headings → chain reflects all three.""" content = ( "* Work\n" "** DOING [#A] Kostya's contract\n" "*** VERIFY Question?\n" "#+begin_src cj: comment\nanswer\n#+end_src\n" ) result = run_scan(content) chain = result["cj_blocks"][0]["parent_heading_chain"] assert len(chain) == 3 assert chain[0] == {"depth": 1, "heading": "Work"} assert chain[1] == {"depth": 2, "heading": "DOING [#A] Kostya's contract"} assert chain[2] == {"depth": 3, "heading": "VERIFY Question?"} assert result["cj_blocks"][0]["parent_depth"] == 3 def test_cj_scan_depth_skip_only_actual_ancestors(self, run_scan): """Normal: heading depth skip (e.g., * then ***) → chain captures only present headings.""" content = "* Section\n*** Deep child\n#+begin_src cj: comment\nbody\n#+end_src\n" result = run_scan(content) chain = result["cj_blocks"][0]["parent_heading_chain"] assert [h["depth"] for h in chain] == [1, 3] def test_cj_scan_shallower_sibling_pops_deeper_frames(self, run_scan): """Normal: when a shallower heading appears, deeper frames pop off the stack.""" content = ( "* A\n** A.1\n*** A.1.1\n" "** B\n" "#+begin_src cj: comment\nunder B\n#+end_src\n" ) result = run_scan(content) chain = result["cj_blocks"][0]["parent_heading_chain"] assert len(chain) == 2 assert chain[0]["heading"] == "A" assert chain[1]["heading"] == "B" # ---------------------------------------------------------------------- # VERIFY task detection + placement audit # ---------------------------------------------------------------------- class TestCjScanVerifyPlacement: """VERIFY task detection and placement audit per the canonical rule.""" def test_cj_scan_verify_at_depth_2_is_valid(self, run_scan): """Normal: ** VERIFY (top-level) is valid placement.""" content = "* Work\n** VERIFY [#C] Hayk's Farearth Evaluation :research:hayk:\n" result = run_scan(content) assert len(result["verify_tasks"]) == 1 v = result["verify_tasks"][0] assert v["depth"] == 2 assert v["valid_depth"] is True assert v["promotion_target"] is None def test_cj_scan_verify_at_depth_3_is_valid(self, run_scan): """Normal: *** VERIFY (first-level child) is valid placement.""" content = "* Work\n** TODO Parent\n*** VERIFY Question?\n" result = run_scan(content) v = result["verify_tasks"][0] assert v["depth"] == 3 assert v["valid_depth"] is True def test_cj_scan_verify_at_depth_4_invalid_promote_to_3(self, run_scan): """Normal: **** VERIFY is buried; suggests promotion to depth 3.""" content = "* W\n** P\n*** Q\n**** VERIFY Buried?\n" result = run_scan(content) v = result["verify_tasks"][0] assert v["depth"] == 4 assert v["valid_depth"] is False assert v["promotion_target"] == 3 def test_cj_scan_verify_at_depth_6_invalid_promote_to_3(self, run_scan): """Normal: ****** VERIFY at any deep level → promotion target is still 3.""" content = "* W\n** P\n*** Q\n**** Q2\n***** Q3\n****** VERIFY Very buried?\n" result = run_scan(content) v = result["verify_tasks"][0] assert v["depth"] == 6 assert v["promotion_target"] == 3 def test_cj_scan_verify_at_depth_1_invalid_promote_to_2(self, run_scan): """Boundary: * VERIFY at top-section depth → promotion target is 2 (top-level under section).""" content = "* VERIFY Should-be-deeper\n" result = run_scan(content) v = result["verify_tasks"][0] assert v["depth"] == 1 assert v["valid_depth"] is False assert v["promotion_target"] == 2 def test_cj_scan_verify_heading_with_priority_and_tags(self, run_scan): """Boundary: VERIFY heading with priority cookie + tags → heading text captured fully.""" content = "* W\n** VERIFY [#C] Hayk's Farearth Evaluation :research:hayk:\n" result = run_scan(content) v = result["verify_tasks"][0] assert "Hayk's Farearth Evaluation" in v["heading"] assert ":research:" in v["heading"] def test_cj_scan_no_verify_tasks_empty_list(self, run_scan): """Boundary: file with only TODO/DOING headings → empty verify_tasks list.""" content = "* W\n** TODO X\n*** DOING Y\n" result = run_scan(content) assert result["verify_tasks"] == [] def test_cj_scan_verify_word_in_body_is_not_a_task(self, run_scan): """Error: the word VERIFY appearing in body prose is not detected as a task.""" content = ( "* Work\n" "** TODO Important task\n" "Body line mentioning VERIFY in prose.\n" ) result = run_scan(content) assert result["verify_tasks"] == [] # ---------------------------------------------------------------------- # Nested-fence suppression (no false positives inside wrapper blocks) # ---------------------------------------------------------------------- class TestCjScanNestedFencesIgnored: """A cj fence nested inside another #+begin_ block is content, not an annotation. Documentation patterns frequently embed the cj marker inside `#+begin_example` blocks or inside `#+begin_src snippet` yasnippet definitions to *show* what the marker looks like. A naive line-by-line scanner that only watches for `#+begin_src cj:` and `#+end_src` matches those literally and misclassifies the documentation as live annotations. """ def test_cj_scan_fence_inside_begin_example_ignored(self, run_scan): """Normal: cj fence inside #+begin_example ... #+end_example is documentation, not annotation.""" content = ( "* Section\n" "Here is what the cj marker looks like:\n" "#+begin_example\n" "#+begin_src cj: comment\n" "I am documentation, not a real annotation\n" "#+end_src\n" "#+end_example\n" ) result = run_scan(content) assert result["cj_blocks"] == [] assert result["unclosed_blocks"] == [] def test_cj_scan_fence_inside_begin_src_other_lang_ignored(self, run_scan): """Normal: cj fence inside #+begin_src snippet (or any non-cj src block) is content. The outer #+begin_src snippet block claims everything until the FIRST matching #+end_src, so the inner `#+begin_src cj:` line is literal text inside that block — not a nested cj annotation. """ content = ( "* Section\n" "#+begin_src snippet\n" "# name: cj-comment-block\n" "# --\n" "#+begin_src cj: comment\n" "$0\n" "#+end_src\n" "#+end_src\n" ) result = run_scan(content) assert result["cj_blocks"] == [] def test_cj_scan_fence_inside_begin_quote_ignored(self, run_scan): """Boundary: cj fence inside #+begin_quote ... #+end_quote is quoted prose, not annotation.""" content = ( "* Section\n" "#+begin_quote\n" "#+begin_src cj: comment\n" "quoted, not active\n" "#+end_src\n" "#+end_quote\n" ) result = run_scan(content) assert result["cj_blocks"] == [] def test_cj_scan_real_cj_after_example_block_still_detected(self, run_scan): """Normal: after a wrapper block closes, a subsequent real cj fence is detected. Regression guard: the wrapper-tracking state must reset when the wrapper closes; otherwise everything after the first example block would be silently swallowed. """ content = ( "* Section\n" "#+begin_example\n" "#+begin_src cj: comment\n" "doc, ignored\n" "#+end_src\n" "#+end_example\n" "#+begin_src cj: comment\n" "this one is real\n" "#+end_src\n" ) result = run_scan(content) assert len(result["cj_blocks"]) == 1 assert result["cj_blocks"][0]["body"] == "this one is real" def test_cj_scan_legacy_inline_inside_wrapper_ignored(self, run_scan): """Boundary: legacy `cj: ...` line inside a wrapper block is content, not annotation.""" content = ( "* Section\n" "#+begin_example\n" "cj: this is a documentation example, not a real annotation\n" "#+end_example\n" ) result = run_scan(content) assert result["cj_blocks"] == [] def test_cj_scan_unclosed_wrapper_does_not_swallow_rest_of_file_silently(self, run_scan): """Error: an unclosed wrapper consumes everything after it — cj_blocks empty, not unclosed_blocks. Pinning current behaviour: an unclosed `#+begin_example` is a separate org-level malformation that this scanner doesn't currently report. The guard here is that the unclosed-wrapper case doesn't produce false-positive cj_blocks downstream. """ content = ( "* Section\n" "#+begin_example\n" "#+begin_src cj: comment\n" "should be content of the unclosed example\n" "#+end_src\n" "more content here, no end_example\n" ) result = run_scan(content) assert result["cj_blocks"] == []