summaryrefslogtreecommitdiff
path: root/modules
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2025-10-12 23:20:40 -0500
committerCraig Jennings <c@cjennings.net>2025-10-12 23:20:40 -0500
commit26e8fb614ac2c5f872b0d8bdc14bd83de5361669 (patch)
tree129c0348f5d812e75669cc911f251b3b6e6c4d7c /modules
parent3d059855a509788714b5a36cf006c79af9a7abad (diff)
feat: webclipper): Add web clipping with org-protocol integration
- Reworked the org-webclipper.el module to support a "fire-and-forget" workflow using org-protocol and org-web-tools. - Added feature to handle web clipping directly from the browser with a bookmarklet, converting content to Org format using Pandoc. - New functions ensure lazy loading of necessary packages and setup of capture templates. - Enhanced keymap setup to integrate with custom keymaps, facilitating easier user interaction.
Diffstat (limited to 'modules')
-rw-r--r--modules/org-webclipper.el375
1 files changed, 268 insertions, 107 deletions
diff --git a/modules/org-webclipper.el b/modules/org-webclipper.el
index c7b80499..c1dbf092 100644
--- a/modules/org-webclipper.el
+++ b/modules/org-webclipper.el
@@ -1,52 +1,186 @@
-;;; org-webclipper.el --- Web Page Clipping Workflow to Org Roam -*- coding: utf-8; lexical-binding: t; -*-
+;;; org-webclipper.el --- Web Page Clipping via org-protocol -*- coding: utf-8; lexical-binding: t; -*-
;;; Commentary:
;;
-;; Allows saving a copy of the page EWW is visiting for offline reading.
-;; In other words, it's a "Pocket/Instapaper" that collects the articles in an Emacs org-mode file.
+;; This package provides a seamless "fire-and-forget" workflow for clipping
+;; web pages from the browser directly into an Org file using org-protocol
+;; and org-web-tools.
;;
-;; I review the articles, then add the ones I want for future reference by moving it to an
-;; org-roam file.
+;; Features:
+;; - Browser bookmarklet integration via org-protocol
+;; - Automatic conversion to Org format using eww-readable and Pandoc
+;; - One-click capture from any web page
+;; - Preserves page structure and formatting
;;
+;; Setup:
+;; 1. Ensure this file is loaded in your Emacs configuration
+;; 2. Make sure emacsclient is configured for org-protocol
+;; 3. Add the following bookmarklet to your browser's bookmarks bar:
+;;
+;; javascript:location.href='org-protocol://webclip?url='+encodeURIComponent(location.href)+'&title='+encodeURIComponent(document.title);void(0);
+;;
+;; To add the bookmarklet:
+;; a. Create a new bookmark in your browser
+;; b. Set the name to: Clip to Org (or your preference)
+;; c. Set the URL to the JavaScript code above
+;; d. Save it to your bookmarks bar for easy access
+;;
+;; 4. Click the bookmarklet on any web page to clip its content
+;;
+;; The clipped content will be added to the file specified by `webclipped-file`
+;; under the "Webclipped Inbox" heading with proper formatting and metadata.
+;;
+;; Requirements:
+;; - org-web-tools package
+;; - Pandoc installed on your system
+;; - Emacs server running (M-x server-start)
+
;;; Code:
-(require 'user-constants) ;; for location of 'webclipped-file'
+;; Declare functions and variables to avoid warnings
+(declare-function org-protocol-protocol-alist "org-protocol")
+(declare-function org-capture "org-capture")
+(declare-function org-capture-get "org-capture")
+(declare-function org-web-tools--url-as-readable-org "org-web-tools")
+(declare-function org-w3m-copy-for-org-mode "org-w3m")
+(declare-function org-eww-copy-for-org-mode "org-eww")
+(declare-function org-at-heading-p "org")
+(declare-function org-heading-components "org")
+(declare-function org-copy-subtree "org")
+(declare-function org-cut-subtree "org")
+(declare-function org-id-new "org-id")
+(declare-function org-roam-db-sync "org-roam")
+(defvar org-capture-templates)
+(defvar org-protocol-protocol-alist)
+(defvar org-roam-directory)
+(defvar webclipped-file)
+
+;; Variables for storing org-protocol data
+(defvar cj/webclip-current-url nil
+ "Temporary storage for URL passed via org-protocol.")
+
+(defvar cj/webclip-current-title nil
+ "Temporary storage for page title passed via org-protocol.")
+
+;; Flag to track if we've done initialization
+(defvar cj/webclipper-initialized nil
+ "Track if webclipper has been initialized.")
+
+;; Lazy initialization function
+(defun cj/webclipper-ensure-initialized ()
+ "Ensure webclipper is initialized when first used."
+ (unless cj/webclipper-initialized
+ ;; Load required packages now
+ (require 'org-protocol)
+ (require 'org-capture)
+ (require 'user-constants) ;; for webclipped-file
+
+ ;; Register the org-protocol handler
+ (add-to-list 'org-protocol-protocol-alist
+ '("webclip"
+ :protocol "webclip"
+ :function cj/org-protocol-webclip
+ :kill-client t))
+
+ ;; Add capture templates if not already present
+ (unless (assoc "W" org-capture-templates)
+ (add-to-list 'org-capture-templates
+ '("W" "Web Clipper (Protocol)" entry
+ (file+headline webclipped-file "Webclipped Inbox")
+ "* [[%(identity cj/webclip-current-url)][%(identity cj/webclip-current-title)]] :website:\nURL: %(identity cj/webclip-current-url)\nCaptured On:%U\n%(cj/org-protocol-webclip-handler)\n"
+ :prepend t
+ :immediate-finish t)
+ t))
+
+ (unless (assoc "w" org-capture-templates)
+ (add-to-list 'org-capture-templates
+ '("w" "Web Page Clipper" entry
+ (file+headline webclipped-file "Webclipped Inbox")
+ "* %a\nURL: %L\nCaptured On:%U\n%(cj/org-webclipper-EWW)\n"
+ :prepend t :immediate-finish t)
+ t))
+
+ (setq cj/webclipper-initialized t)))
+
+;;;###autoload
+(defun cj/org-protocol-webclip (info)
+ "Process org-protocol webclip requests.
+INFO is a plist containing :url and :title from the org-protocol call."
+ (cj/webclipper-ensure-initialized)
+ (let ((url (plist-get info :url))
+ (title (plist-get info :title)))
+ (when url
+ ;; Store the URL and title for the capture template to use
+ (setq cj/webclip-current-url url
+ cj/webclip-current-title (or title "Untitled")))
+ ;; Trigger the capture
+ (org-capture nil "W")
+ nil)) ; Return nil to indicate we handled it
+
+(defun cj/org-protocol-webclip-handler ()
+ "Handle web page clipping during org-capture.
+This function is called from the capture template.
+It fetches the page content and converts it to Org format."
+ ;; Load org-web-tools only when actually needed
+ (require 'org-web-tools)
+ (setopt org-web-tools-pandoc-sleep-time 0.5)
+
+ (let ((url cj/webclip-current-url)
+ (title cj/webclip-current-title))
+ ;; Clear the stored values after using them
+ (setq cj/webclip-current-url nil
+ cj/webclip-current-title nil)
+
+ (if (not url)
+ (error "No URL provided for clipping")
+ (condition-case err
+ (let* ((org-content (org-web-tools--url-as-readable-org url))
+ ;; Process the content to adjust heading levels
+ (processed-content
+ (with-temp-buffer
+ (insert org-content)
+ (goto-char (point-min))
+ ;; Skip the first heading line (we'll use our template's heading)
+ (when (looking-at "^\\* .*\n")
+ (delete-region (match-beginning 0) (match-end 0)))
+ ;; Remove any initial blank lines
+ (while (looking-at "^[ \t]*\n")
+ (delete-char 1))
+ ;; Demote all remaining headings by one level
+ ;; since our template already provides the top-level heading
+ (while (re-search-forward "^\\(\\*+\\) " nil t)
+ (replace-match (concat (match-string 1) "* ") t t))
+ (buffer-string))))
+ ;; Show success message with the title
+ (require 'user-constants) ;; Ensure webclipped-file is available
+ (message "'%s' added to %s" title webclipped-file)
+ ;; Return the processed content for insertion
+ processed-content)
+ (error
+ ;; Handle any errors during fetching or conversion
+ (error "Failed to clip web page: %s" (error-message-string err)))))))
;; ---------------------------- Org Webpage Clipper ----------------------------
-(defun cj/org-webpage-clipper ()
+;;;###autoload
+(defun cj/org-webclipper-EWW ()
"Capture the current web page for later viewing in an Org file.
-
Return the yanked content as a string so templates can insert it."
(interactive)
+ (cj/webclipper-ensure-initialized)
(let* ((source-buffer (org-capture-get :original-buffer))
- (source-mode (with-current-buffer source-buffer major-mode)))
- (cond
- ((eq source-mode 'w3m-mode)
- (with-current-buffer source-buffer
- (org-w3m-copy-for-org-mode)))
- ((eq source-mode 'eww-mode)
- (with-current-buffer source-buffer
- (org-eww-copy-for-org-mode)))
- (t
- (error "Not valid -- must be in w3m or eww mode")))
- ;; extract the webpage content from the kill ring
- (car kill-ring)))
-
-;; ------------------------------ Capture Template -----------------------------
-
-(with-eval-after-load 'org-capture
- ;; Ensure org-capture-templates exists before adding to it
- (unless (boundp 'org-capture-templates)
- (setq org-capture-templates nil))
-
- ;; Add the webclipper template to org-capture-templates
- (add-to-list 'org-capture-templates
- '("w" "Web Page Clipper" entry
- (file+headline webclipped-file "Webclipped Inbox")
- "* %a\nURL: %L\nCaptured On:%U\n%(cj/org-webpage-clipper)\n"
- :prepend t :immediate-finish t)
- t))
+ (source-mode (with-current-buffer source-buffer major-mode)))
+ (cond
+ ((eq source-mode 'w3m-mode)
+ (with-current-buffer source-buffer
+ (org-w3m-copy-for-org-mode)))
+ ((eq source-mode 'eww-mode)
+ (with-current-buffer source-buffer
+ (org-eww-copy-for-org-mode)))
+ (t
+ (error "Not valid -- must be in w3m or eww mode")))
+ ;; extract the webpage content from the kill ring
+ (car kill-ring)))
;; ------------------------ Org-Branch To Org-Roam-Node ------------------------
@@ -56,90 +190,117 @@ If TEXT contains an org link like [[url][description]], return description.
If TEXT contains multiple links, only process the first one.
Otherwise return TEXT unchanged."
(if (string-match "\\[\\[\\([^]]+\\)\\]\\(?:\\[\\([^]]+\\)\\]\\)?\\]" text)
- (let ((description (match-string 2 text))
- (url (match-string 1 text)))
- ;; If there's a description, use it; otherwise use the URL
- (or description url))
- text))
+ (let ((description (match-string 2 text))
+ (url (match-string 1 text)))
+ ;; If there's a description, use it; otherwise use the URL
+ (or description url))
+ text))
+;;;###autoload
(defun cj/move-org-branch-to-roam ()
"Move the org subtree at point to a new org-roam node.
The node filename will be timestamp-based with the heading name.
The heading becomes the node title, and the entire subtree is demoted to level 1.
If the heading contains a link, extract the description for the title."
(interactive)
+ ;; Lazy load org and org-roam when needed
+ (require 'org)
+ (require 'org-id)
+ (require 'org-roam)
+
(unless (org-at-heading-p)
- (user-error "Not at an org heading"))
+ (user-error "Not at an org heading"))
(let* ((heading-components (org-heading-components))
- (current-level (nth 0 heading-components))
- (raw-title (nth 4 heading-components))
- ;; Extract clean title from potential link
- (title (cj/org-link-get-description raw-title))
- (timestamp (format-time-string "%Y%m%d%H%M%S"))
- ;; Convert title to filename-safe format
- (title-slug (replace-regexp-in-string
- "[^a-zA-Z0-9]+" "-"
- (downcase title)))
- ;; Remove leading/trailing hyphens
- (title-slug (replace-regexp-in-string
- "^-\\|-$" "" title-slug))
- (filename (format "%s-%s.org" timestamp title-slug))
- (filepath (expand-file-name filename org-roam-directory))
- ;; Generate a unique ID for the node
- (node-id (org-id-new))
- ;; Store the subtree in a temporary buffer
- subtree-content)
-
- ;; Copy the subtree content
- (org-copy-subtree)
- (setq subtree-content (current-kill 0))
-
- ;; Now cut it to remove from original buffer
- (org-cut-subtree)
-
- ;; Process the subtree to demote it to level 1
- (with-temp-buffer
- (org-mode)
- (insert subtree-content)
- ;; Demote the entire tree so the top level becomes level 1
- (goto-char (point-min))
- (when (> current-level 1)
- (let ((demote-count (- current-level 1)))
- (while (re-search-forward "^\\*+ " nil t)
- (beginning-of-line)
- (dotimes (_ demote-count)
- (when (looking-at "^\\*\\*")
- (delete-char 1)))
- (forward-line))))
- (setq subtree-content (buffer-string)))
-
- ;; Create the new org-roam file
- (with-temp-file filepath
- ;; Insert the org-roam template with ID at file level
- (insert ":PROPERTIES:\n")
- (insert ":ID: " node-id "\n")
- (insert ":END:\n")
- (insert "#+TITLE: " title "\n")
- (insert "#+CATEGORY: " title "\n")
- (insert "#+FILETAGS: Topic\n\n")
-
- ;; Insert the demoted subtree content
- (insert subtree-content))
-
- ;; Sync the org-roam database
- (org-roam-db-sync)
-
- ;; Message to user
- (message "'%s' added as an org-roam node." title)))
+ (current-level (nth 0 heading-components))
+ (raw-title (nth 4 heading-components))
+ ;; Extract clean title from potential link
+ (title (cj/org-link-get-description raw-title))
+ (timestamp (format-time-string "%Y%m%d%H%M%S"))
+ ;; Convert title to filename-safe format
+ (title-slug (replace-regexp-in-string
+ "[^a-zA-Z0-9]+" "-"
+ (downcase title)))
+ ;; Remove leading/trailing hyphens
+ (title-slug (replace-regexp-in-string
+ "^-\\|-$" "" title-slug))
+ (filename (format "%s-%s.org" timestamp title-slug))
+ (filepath (expand-file-name filename org-roam-directory))
+ ;; Generate a unique ID for the node
+ (node-id (org-id-new))
+ ;; Store the subtree in a temporary buffer
+ subtree-content)
+
+ ;; Copy the subtree content
+ (org-copy-subtree)
+ (setq subtree-content (current-kill 0))
+
+ ;; Now cut it to remove from original buffer
+ (org-cut-subtree)
+
+ ;; Process the subtree to demote it to level 1
+ (with-temp-buffer
+ (org-mode)
+ (insert subtree-content)
+ ;; Demote the entire tree so the top level becomes level 1
+ (goto-char (point-min))
+ (when (> current-level 1)
+ (let ((demote-count (- current-level 1)))
+ (while (re-search-forward "^\\*+ " nil t)
+ (beginning-of-line)
+ (dotimes (_ demote-count)
+ (when (looking-at "^\\*\\*")
+ (delete-char 1)))
+ (forward-line))))
+ (setq subtree-content (buffer-string)))
+
+ ;; Create the new org-roam file
+ (with-temp-file filepath
+ ;; Insert the org-roam template with ID at file level
+ (insert ":PROPERTIES:\n")
+ (insert ":ID: " node-id "\n")
+ (insert ":END:\n")
+ (insert "#+TITLE: " title "\n")
+ (insert "#+CATEGORY: " title "\n")
+ (insert "#+FILETAGS: Topic\n\n")
+
+ ;; Insert the demoted subtree content
+ (insert subtree-content))
+
+ ;; Sync the org-roam database
+ (org-roam-db-sync)
+
+ ;; Message to user
+ (message "'%s' added as an org-roam node." title)))
;; ----------------------------- Webclipper Keymap -----------------------------
-;; Buffer & file operations prefix and keymap
-(define-prefix-command 'cj/webclipper-map nil
- "Keymap for weblipper operations.")
-(define-key cj/custom-keymap "w" 'cj/webclipper-map)
-(define-key cj/webclipper-map "N" 'cj/move-org-branch-to-roam) ;; for node
+;; Setup keymaps
+;;;###autoload
+(defun cj/webclipper-setup-keymaps ()
+ "Setup webclipper keymaps."
+ (define-prefix-command 'cj/webclipper-map nil
+ "Keymap for weblipper operations.")
+ (define-key cj/custom-keymap "w" 'cj/webclipper-map)
+ (define-key cj/webclipper-map "n" 'cj/move-org-branch-to-roam))
+
+;; Call keymap setup if cj/custom-keymap is already defined
+(when (boundp 'cj/custom-keymap)
+ (cj/webclipper-setup-keymaps))
+
+;; Register protocol handler early for external calls
+;;;###autoload
+(with-eval-after-load 'org-protocol
+ (unless (assoc "webclip" org-protocol-protocol-alist)
+ (add-to-list 'org-protocol-protocol-alist
+ '("webclip"
+ :protocol "webclip"
+ :function cj/org-protocol-webclip
+ :kill-client t))))
+
+(with-eval-after-load 'cj/custom-keymap
+ (require 'org-webclipper)
+ (cj/webclipper-setup-keymaps))
(provide 'org-webclipper)
-;;; org-webclipper.el ends here.
+;;; org-webclipper.el ends here