aboutsummaryrefslogtreecommitdiff
path: root/modules/org-webclipper.el
blob: 9c2f106173d2c798ada4705e145f35728e50992b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
;;; org-webclipper.el --- Web Page Clipping via org-protocol -*- coding: utf-8; lexical-binding: t; -*-

;;; Commentary:
;;
;; Layer: 4 (Optional).
;; Category: O/D/P.
;; Load shape: eager.
;; Eager reason: none; web clipping runs via org-protocol/command, a Phase 4
;;   protocol/command-loaded deferral candidate.
;; Top-level side effects: org-protocol handler registration via use-package.
;; Runtime requires: none (configures packages via use-package).
;; Direct test load: yes.
;;
;; This package provides a seamless "fire-and-forget" workflow for clipping
;; web pages from the browser directly into an Org file using org-protocol
;; and org-web-tools.
;;
;; Features:
;; - Browser bookmarklet integration via org-protocol
;; - Automatic conversion to Org format using eww-readable and Pandoc
;; - One-click capture from any web page
;; - Preserves page structure and formatting
;; - Smart heading adjustment (removes page title, demotes remaining headings)
;;
;; Setup:
;; 1. Ensure this file is loaded in your Emacs configuration
;; 2. Make sure emacsclient is configured for org-protocol
;; 3. Add the following bookmarklet to your browser's bookmarks bar:
;;
;;    javascript:location.href='org-protocol://webclip?url='+encodeURIComponent(location.href)+'&title='+encodeURIComponent(document.title);void(0);
;;
;;    To add the bookmarklet:
;;    a. Create a new bookmark in your browser
;;    b. Set the name to: Clip to Org (or your preference)
;;    c. Set the URL to the JavaScript code above
;;    d. Save it to your bookmarks bar for easy access
;;
;; 4. Click the bookmarklet on any web page to clip its content
;;
;; The clipped content will be added to the file specified by `webclipped-file`
;; under the "Webclipped Inbox" heading with proper formatting and metadata.
;;
;; Architecture:
;; - cj/--process-webclip-content: Pure function for content processing
;; - cj/org-protocol-webclip-handler: Handles URL fetching and capture
;; - cj/org-webclipper-EWW: Direct capture from EWW/W3M buffers
;;
;; Requirements:
;; - org-web-tools package
;; - Pandoc installed on your system
;; - Emacs server running (M-x server-start)

;;; Code:


;; Variables for storing org-protocol data
(defvar cj/--webclip-url nil
  "URL for the active web clip, dynamically bound around `org-capture'.
The org-protocol entry point `let'-binds this for the dynamic extent of
its capture call, so the capture template and handler see it while the
capture runs, and an aborted or erroring capture unwinds the binding
instead of leaving stale state for the next capture.")

(defvar cj/--webclip-title nil
  "Page title for the active web clip, dynamically bound around `org-capture'.
See `cj/--webclip-url' for the binding contract.")

;; Flag to track if we've done initialization
(defvar cj/webclipper-initialized nil
  "Track if webclipper has been initialized.")

(use-package org-web-tools
  :defer t)

;; Lazy initialization function
(defun cj/webclipper-ensure-initialized ()
  "Ensure webclipper is initialized when first used."
  (unless cj/webclipper-initialized
    ;; Load required packages now
    (require 'org-protocol)
    (require 'org-capture)
    (require 'org-web-tools)
    (require 'user-constants) ;; for webclipped-file

    ;; The org-protocol handler registration lives in the
    ;; `with-eval-after-load 'org-protocol' block at the bottom of
    ;; this module -- that's the more robust home (it survives
    ;; org-protocol being loaded before or after this module).  Two
    ;; registration sites would silently drift if the alist entry
    ;; shape ever changes.

    ;; Add capture templates if not already present
    (unless (assoc "W" org-capture-templates)
      (add-to-list 'org-capture-templates
                   '("W" "Web Clipper (Protocol)" entry
                     (file+headline webclipped-file "Webclipped Inbox")
                     "* [[%(identity cj/--webclip-url)][%(identity cj/--webclip-title)]] :website:\nURL: %(identity cj/--webclip-url)\nCaptured On:%U\n%(cj/org-protocol-webclip-handler)\n"
					 :prepend t
                     :immediate-finish t)
                   t))

    (unless (assoc "w" org-capture-templates)
      (add-to-list 'org-capture-templates
                   '("w" "Web Page Clipper" entry
                     (file+headline webclipped-file "Webclipped Inbox")
                     "* %a\nURL: %L\nCaptured On:%U\n%(cj/org-webclipper-EWW)\n"
                     :prepend t :immediate-finish t)
                   t))

    (setq cj/webclipper-initialized t)))

(defun cj/--process-webclip-content (org-content)
  "Process webclip ORG-CONTENT by removing first heading and demoting others.
ORG-CONTENT is the raw org-mode text from the web page conversion.
Returns the processed content as a string with:
- First top-level heading removed
- Initial blank lines removed
- All remaining headings demoted by one level"
  (with-temp-buffer
    (insert org-content)
    (goto-char (point-min))
    ;; Skip the first heading line (we'll use our template's heading)
    (when (looking-at "^\\* .*\n")
      (delete-region (match-beginning 0) (match-end 0)))
    ;; Remove any initial blank lines
    (while (looking-at "^[ \t]*\n")
      (delete-char 1))
    ;; Demote all remaining headings by one level
    ;; since our template already provides the top-level heading
    (while (re-search-forward "^\\(\\*+\\) " nil t)
      (replace-match (concat (match-string 1) "* ") t t))
    (buffer-string)))

(defun cj/org-protocol-webclip (info)
  "Process org-protocol webclip requests.
INFO is a plist containing :url and :title from the org-protocol call.
Signals `user-error' when :url is missing, nil, empty, or non-string -- an
unexpected plist shape used to silently set the globals to nil and fail
downstream inside the capture handler with confusing messages."
  (cj/webclipper-ensure-initialized)
  (let ((url (plist-get info :url))
        (title (plist-get info :title)))
    (unless (and (stringp url) (not (string-empty-p url)))
      (user-error
       "org-protocol webclip: expected non-empty :url string, got %S" url))
    (when (and title (not (stringp title)))
      (user-error
       "org-protocol webclip: :title must be a string when provided, got %S"
       title))
    ;; Bind url+title for the dynamic extent of the capture call only, so
    ;; the template and handler see them while the capture runs and an
    ;; aborted/erroring capture unwinds the binding rather than leaving
    ;; stale state for the next clip.
    (let ((cj/--webclip-url url)
          (cj/--webclip-title (or title "Untitled")))
      (org-capture nil "W"))
    nil))  ; Return nil to indicate we handled it

(defun cj/org-protocol-webclip-handler ()
  "Handle web page clipping during org-capture.
This function is called from the capture template.
It fetches the page content and converts it to Org format."
  ;; Load org-web-tools only when actually needed.  Use plain `setq'
  ;; rather than `setopt' because the variable is a plain float with no
  ;; custom-set handler that needs to fire, and `setopt' is a macro --
  ;; tests that try to stub it via `cl-letf' on the function cell hit
  ;; the already-expanded `setopt--set' in the byte-compiled handler
  ;; and fail with a `void-variable widget-field-keymap' error from
  ;; the customize machinery loading lazily.
  (require 'org-web-tools)
  (setq org-web-tools-pandoc-sleep-time 0.5)

  (let ((url cj/--webclip-url)
        (title cj/--webclip-title))
    (if (not url)
        (error "No URL provided for clipping")
      (condition-case err
          (let* ((org-content (org-web-tools--url-as-readable-org url))
                 (processed-content (cj/--process-webclip-content org-content)))
            ;; Show success message with the title
            (require 'user-constants) ;; Ensure webclipped-file is available
            (message "'%s' added to %s" title webclipped-file)
            ;; Return the processed content for insertion
            processed-content)
        (error
         ;; Handle any errors during fetching or conversion
         (error "Failed to clip web page: %s" (error-message-string err)))))))

;; ---------------------------- Org Webpage Clipper ----------------------------


(defun cj/org-webclipper-EWW ()
  "Capture the current web page for later viewing in an Org file.
Return the yanked content as a string so templates can insert it."
  (interactive)
  (cj/webclipper-ensure-initialized)
  (let* ((source-buffer (org-capture-get :original-buffer))
         (source-mode (with-current-buffer source-buffer major-mode)))
    (cond
     ((eq source-mode 'w3m-mode)
      (with-current-buffer source-buffer
        (org-w3m-copy-for-org-mode)))
     ((eq source-mode 'eww-mode)
      (with-current-buffer source-buffer
        (org-eww-copy-for-org-mode)))
     (t
      (error "Not valid -- must be in w3m or eww mode")))
    ;; extract the webpage content from the kill ring
    (car kill-ring)))

;; ----------------------------- Webclipper Keymap -----------------------------

;; keymaps shouldn't be required for webclipper
;; Setup keymaps
;;
;; (defun cj/webclipper-setup-keymaps ()
;;   "Setup webclipper keymaps."
;;   (define-prefix-command 'cj/webclipper-map nil
;;                          "Keymap for weblipper operations.")
;;   (define-key cj/custom-keymap "c" 'cj/webclipper-map)
;;   (define-key cj/webclipper-map "n" 'cj/move-org-branch-to-roam))

;; ;; Call keymap setup if cj/custom-keymap is already defined
;; (when (boundp 'cj/custom-keymap)
;;   (cj/webclipper-setup-keymaps))

;; Register protocol handler early for external calls
(with-eval-after-load 'org-protocol
  (unless (assoc "webclip" org-protocol-protocol-alist)
    (add-to-list 'org-protocol-protocol-alist
                 '("webclip"
                   :protocol "webclip"
                   :function cj/org-protocol-webclip
                   :kill-client t))))

;; (with-eval-after-load 'cj/custom-keymap
;;   (require 'org-webclipper)
;;   (cj/webclipper-setup-keymaps))

(provide 'org-webclipper)
;;; org-webclipper.el ends here