diff options
| -rw-r--r-- | modules/org-webclipper.el | 46 | ||||
| -rw-r--r-- | tests/test-org-webclipper-process.el | 210 |
2 files changed, 239 insertions, 17 deletions
diff --git a/modules/org-webclipper.el b/modules/org-webclipper.el index 67bfa6dd..7b024e43 100644 --- a/modules/org-webclipper.el +++ b/modules/org-webclipper.el @@ -11,6 +11,7 @@ ;; - Automatic conversion to Org format using eww-readable and Pandoc ;; - One-click capture from any web page ;; - Preserves page structure and formatting +;; - Smart heading adjustment (removes page title, demotes remaining headings) ;; ;; Setup: ;; 1. Ensure this file is loaded in your Emacs configuration @@ -30,6 +31,11 @@ ;; The clipped content will be added to the file specified by `webclipped-file` ;; under the "Webclipped Inbox" heading with proper formatting and metadata. ;; +;; Architecture: +;; - cj/--process-webclip-content: Pure function for content processing +;; - cj/org-protocol-webclip-handler: Handles URL fetching and capture +;; - cj/org-webclipper-EWW: Direct capture from EWW/W3M buffers +;; ;; Requirements: ;; - org-web-tools package ;; - Pandoc installed on your system @@ -89,6 +95,28 @@ (setq cj/webclipper-initialized t))) +(defun cj/--process-webclip-content (org-content) + "Process webclip ORG-CONTENT by removing first heading and demoting others. +ORG-CONTENT is the raw org-mode text from the web page conversion. +Returns the processed content as a string with: +- First top-level heading removed +- Initial blank lines removed +- All remaining headings demoted by one level" + (with-temp-buffer + (insert org-content) + (goto-char (point-min)) + ;; Skip the first heading line (we'll use our template's heading) + (when (looking-at "^\\* .*\n") + (delete-region (match-beginning 0) (match-end 0))) + ;; Remove any initial blank lines + (while (looking-at "^[ \t]*\n") + (delete-char 1)) + ;; Demote all remaining headings by one level + ;; since our template already provides the top-level heading + (while (re-search-forward "^\\(\\*+\\) " nil t) + (replace-match (concat (match-string 1) "* ") t t)) + (buffer-string))) + (defun cj/org-protocol-webclip (info) "Process org-protocol webclip requests. INFO is a plist containing :url and :title from the org-protocol call." @@ -121,22 +149,7 @@ It fetches the page content and converts it to Org format." (error "No URL provided for clipping") (condition-case err (let* ((org-content (org-web-tools--url-as-readable-org url)) - ;; Process the content to adjust heading levels - (processed-content - (with-temp-buffer - (insert org-content) - (goto-char (point-min)) - ;; Skip the first heading line (we'll use our template's heading) - (when (looking-at "^\\* .*\n") - (delete-region (match-beginning 0) (match-end 0))) - ;; Remove any initial blank lines - (while (looking-at "^[ \t]*\n") - (delete-char 1)) - ;; Demote all remaining headings by one level - ;; since our template already provides the top-level heading - (while (re-search-forward "^\\(\\*+\\) " nil t) - (replace-match (concat (match-string 1) "* ") t t)) - (buffer-string)))) + (processed-content (cj/--process-webclip-content org-content))) ;; Show success message with the title (require 'user-constants) ;; Ensure webclipped-file is available (message "'%s' added to %s" title webclipped-file) @@ -171,7 +184,6 @@ Return the yanked content as a string so templates can insert it." ;; ----------------------------- Webclipper Keymap ----------------------------- ;; keymaps shouldn't be required for webclipper -;; TASK Move org-branch to roam functionality under org-roam ;; Setup keymaps ;; ;; (defun cj/webclipper-setup-keymaps () diff --git a/tests/test-org-webclipper-process.el b/tests/test-org-webclipper-process.el new file mode 100644 index 00000000..9a25ef5c --- /dev/null +++ b/tests/test-org-webclipper-process.el @@ -0,0 +1,210 @@ +;;; test-org-webclipper-process.el --- Tests for cj/--process-webclip-content -*- lexical-binding: t; -*- + +;;; Commentary: +;; Tests for the cj/--process-webclip-content function from org-webclipper.el +;; +;; This function processes webclipped org-mode content by: +;; 1. Removing the first top-level heading +;; 2. Removing any initial blank lines +;; 3. Demoting all remaining headings by one level +;; +;; Examples: +;; Input: "* Title\nContent\n** Sub\n" +;; Output: "Content\n*** Sub\n" +;; +;; Input: "* Title\n\n\n** Sub\n" +;; Output: "*** Sub\n" + +;;; Code: + +(require 'ert) +(require 'testutil-general) + +;; Add modules directory to load path +(add-to-list 'load-path (expand-file-name "modules" user-emacs-directory)) + +;; Now load the actual production module +(require 'org-webclipper) + +;;; Test Helpers + +(defun test-process-webclip (content) + "Test cj/--process-webclip-content on CONTENT. +Returns the processed content." + (cj/--process-webclip-content content)) + +;;; Normal Cases - Single Heading Removal + +(ert-deftest test-process-removes-first-heading () + "Should remove the first top-level heading." + (let ((result (test-process-webclip "* Title\nContent\n"))) + (should (string= result "Content\n")))) + +(ert-deftest test-process-removes-heading-with-text () + "Should remove first heading preserving body text." + (let ((result (test-process-webclip "* Page Title\nParagraph text\n"))) + (should (string= result "Paragraph text\n")))) + +(ert-deftest test-process-removes-heading-with-tags () + "Should remove first heading even with tags." + (let ((result (test-process-webclip "* Title :tag1:tag2:\nContent\n"))) + (should (string= result "Content\n")))) + +(ert-deftest test-process-removes-heading-with-todo () + "Should remove first heading even with TODO keyword." + (let ((result (test-process-webclip "* TODO Task\nContent\n"))) + (should (string= result "Content\n")))) + +;;; Normal Cases - Blank Line Removal + +(ert-deftest test-process-removes-single-blank-line () + "Should remove single blank line after heading removal." + (let ((result (test-process-webclip "* Title\n\nContent\n"))) + (should (string= result "Content\n")))) + +(ert-deftest test-process-removes-multiple-blank-lines () + "Should remove multiple blank lines after heading removal." + (let ((result (test-process-webclip "* Title\n\n\n\nContent\n"))) + (should (string= result "Content\n")))) + +(ert-deftest test-process-removes-blank-lines-with-spaces () + "Should remove blank lines that contain only spaces." + (let ((result (test-process-webclip "* Title\n \n\t\nContent\n"))) + (should (string= result "Content\n")))) + +(ert-deftest test-process-preserves-blank-lines-in-content () + "Should preserve blank lines within the content." + (let ((result (test-process-webclip "* Title\nPara 1\n\nPara 2\n"))) + (should (string= result "Para 1\n\nPara 2\n")))) + +;;; Normal Cases - Heading Demotion + +(ert-deftest test-process-demotes-second-level () + "Should demote level 2 heading to level 3." + (let ((result (test-process-webclip "* Title\n** Section\n"))) + (should (string= result "*** Section\n")))) + +(ert-deftest test-process-demotes-third-level () + "Should demote level 3 heading to level 4." + (let ((result (test-process-webclip "* Title\n*** Subsection\n"))) + (should (string= result "**** Subsection\n")))) + +(ert-deftest test-process-demotes-multiple-headings () + "Should demote all headings in the content." + (let ((result (test-process-webclip "* Title\n** Section 1\n** Section 2\n"))) + (should (string= result "*** Section 1\n*** Section 2\n")))) + +(ert-deftest test-process-demotes-nested-hierarchy () + "Should demote nested heading structure." + (let ((result (test-process-webclip "* Title\n** Section\n*** Subsection\n"))) + (should (string= result "*** Section\n**** Subsection\n")))) + +;;; Normal Cases - Combined Processing + +(ert-deftest test-process-full-workflow () + "Should remove heading, blank lines, and demote remaining headings." + (let ((result (test-process-webclip "* Article Title\n\n** Introduction\nText\n** Conclusion\n"))) + (should (string= result "*** Introduction\nText\n*** Conclusion\n")))) + +(ert-deftest test-process-with-properties () + "Should preserve properties in demoted headings." + (let ((result (test-process-webclip "* Title\n** Heading\n:PROPERTIES:\n:ID: 123\n:END:\n"))) + (should (string= result "*** Heading\n:PROPERTIES:\n:ID: 123\n:END:\n")))) + +(ert-deftest test-process-with-mixed-content () + "Should handle mixed text and headings." + (let ((result (test-process-webclip "* Title\nIntro text\n** Section\nBody text\n"))) + (should (string= result "Intro text\n*** Section\nBody text\n")))) + +;;; Edge Cases - Empty and Minimal Content + +(ert-deftest test-process-empty-string () + "Should return empty string for empty input." + (let ((result (test-process-webclip ""))) + (should (string= result "")))) + +(ert-deftest test-process-only-heading () + "Should return empty string when only first heading present." + (let ((result (test-process-webclip "* Title\n"))) + (should (string= result "")))) + +(ert-deftest test-process-only-blank-lines () + "Should return empty string for only blank lines after heading." + (let ((result (test-process-webclip "* Title\n\n\n"))) + (should (string= result "")))) + +(ert-deftest test-process-no-heading () + "Should handle content without any heading." + (let ((result (test-process-webclip "Just plain text\n"))) + (should (string= result "Just plain text\n")))) + +(ert-deftest test-process-heading-no-newline () + "Should demote heading without trailing newline (doesn't match removal pattern)." + (let ((result (test-process-webclip "* Title"))) + (should (string= result "** Title")))) + +;;; Edge Cases - Heading Variations + +(ert-deftest test-process-heading-without-space () + "Should not match heading without space after stars." + (let ((result (test-process-webclip "*Title\nContent\n"))) + (should (string= result "*Title\nContent\n")))) + +(ert-deftest test-process-multiple-top-level-headings () + "Should only remove first top-level heading." + (let ((result (test-process-webclip "* Title 1\n* Title 2\n"))) + (should (string= result "** Title 2\n")))) + +(ert-deftest test-process-heading-with-priority () + "Should remove heading with priority marker." + (let ((result (test-process-webclip "* [#A] Important\nContent\n"))) + (should (string= result "Content\n")))) + +(ert-deftest test-process-heading-with-links () + "Should remove heading containing links." + (let ((result (test-process-webclip "* [[url][Link Title]]\nContent\n"))) + (should (string= result "Content\n")))) + +;;; Edge Cases - Special Content + +(ert-deftest test-process-preserves-lists () + "Should preserve list formatting." + (let ((result (test-process-webclip "* Title\n- Item 1\n- Item 2\n"))) + (should (string= result "- Item 1\n- Item 2\n")))) + +(ert-deftest test-process-preserves-code-blocks () + "Should preserve code block content." + (let ((result (test-process-webclip "* Title\n#+BEGIN_SRC python\nprint('hi')\n#+END_SRC\n"))) + (should (string= result "#+BEGIN_SRC python\nprint('hi')\n#+END_SRC\n")))) + +(ert-deftest test-process-preserves-tables () + "Should preserve org table content." + (let ((result (test-process-webclip "* Title\n| A | B |\n| 1 | 2 |\n"))) + (should (string= result "| A | B |\n| 1 | 2 |\n")))) + +;;; Edge Cases - Deep Nesting + +(ert-deftest test-process-very-deep-headings () + "Should demote very deep heading structures." + (let ((result (test-process-webclip "* Title\n****** Level 6\n"))) + (should (string= result "******* Level 6\n")))) + +(ert-deftest test-process-complex-document () + "Should handle complex document structure." + (let ((result (test-process-webclip "* Main Title\n\n** Section 1\nText 1\n*** Subsection 1.1\nText 2\n** Section 2\nText 3\n"))) + (should (string= result "*** Section 1\nText 1\n**** Subsection 1.1\nText 2\n*** Section 2\nText 3\n")))) + +;;; Integration Tests + +(ert-deftest test-process-realistic-webpage () + "Should process realistic webclipped content." + (let ((result (test-process-webclip "* How to Program in Emacs Lisp\n\n** Introduction\nEmacs Lisp is powerful.\n\n** Getting Started\nFirst, open Emacs.\n\n*** Installation\nDownload from gnu.org\n"))) + (should (string= result "*** Introduction\nEmacs Lisp is powerful.\n\n*** Getting Started\nFirst, open Emacs.\n\n**** Installation\nDownload from gnu.org\n")))) + +(ert-deftest test-process-article-with-metadata () + "Should handle article with org metadata." + (let ((result (test-process-webclip "* Article Title :article:web:\n#+DATE: 2024-01-01\n\n** Content\nBody text\n"))) + (should (string= result "#+DATE: 2024-01-01\n\n*** Content\nBody text\n")))) + +(provide 'test-org-webclipper-process) +;;; test-org-webclipper-process.el ends here |
