summaryrefslogtreecommitdiff
path: root/modules/org-webclipper.el
blob: c1dbf0920aa9ea0307e5e49f81e03e571a5e4299 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
;;; org-webclipper.el --- Web Page Clipping via org-protocol -*- coding: utf-8; lexical-binding: t; -*-

;;; Commentary:
;;
;; This package provides a seamless "fire-and-forget" workflow for clipping
;; web pages from the browser directly into an Org file using org-protocol
;; and org-web-tools.
;;
;; Features:
;; - Browser bookmarklet integration via org-protocol
;; - Automatic conversion to Org format using eww-readable and Pandoc
;; - One-click capture from any web page
;; - Preserves page structure and formatting
;;
;; Setup:
;; 1. Ensure this file is loaded in your Emacs configuration
;; 2. Make sure emacsclient is configured for org-protocol
;; 3. Add the following bookmarklet to your browser's bookmarks bar:
;;
;;    javascript:location.href='org-protocol://webclip?url='+encodeURIComponent(location.href)+'&title='+encodeURIComponent(document.title);void(0);
;;
;;    To add the bookmarklet:
;;    a. Create a new bookmark in your browser
;;    b. Set the name to: Clip to Org (or your preference)
;;    c. Set the URL to the JavaScript code above
;;    d. Save it to your bookmarks bar for easy access
;;
;; 4. Click the bookmarklet on any web page to clip its content
;;
;; The clipped content will be added to the file specified by `webclipped-file`
;; under the "Webclipped Inbox" heading with proper formatting and metadata.
;;
;; Requirements:
;; - org-web-tools package
;; - Pandoc installed on your system
;; - Emacs server running (M-x server-start)

;;; Code:

;; Declare functions and variables to avoid warnings
(declare-function org-protocol-protocol-alist "org-protocol")
(declare-function org-capture "org-capture")
(declare-function org-capture-get "org-capture")
(declare-function org-web-tools--url-as-readable-org "org-web-tools")
(declare-function org-w3m-copy-for-org-mode "org-w3m")
(declare-function org-eww-copy-for-org-mode "org-eww")
(declare-function org-at-heading-p "org")
(declare-function org-heading-components "org")
(declare-function org-copy-subtree "org")
(declare-function org-cut-subtree "org")
(declare-function org-id-new "org-id")
(declare-function org-roam-db-sync "org-roam")
(defvar org-capture-templates)
(defvar org-protocol-protocol-alist)
(defvar org-roam-directory)
(defvar webclipped-file)

;; Variables for storing org-protocol data
(defvar cj/webclip-current-url nil
  "Temporary storage for URL passed via org-protocol.")

(defvar cj/webclip-current-title nil
  "Temporary storage for page title passed via org-protocol.")

;; Flag to track if we've done initialization
(defvar cj/webclipper-initialized nil
  "Track if webclipper has been initialized.")

;; Lazy initialization function
(defun cj/webclipper-ensure-initialized ()
  "Ensure webclipper is initialized when first used."
  (unless cj/webclipper-initialized
    ;; Load required packages now
    (require 'org-protocol)
    (require 'org-capture)
    (require 'user-constants) ;; for webclipped-file

    ;; Register the org-protocol handler
    (add-to-list 'org-protocol-protocol-alist
                 '("webclip"
                   :protocol "webclip"
                   :function cj/org-protocol-webclip
                   :kill-client t))

    ;; Add capture templates if not already present
    (unless (assoc "W" org-capture-templates)
      (add-to-list 'org-capture-templates
                   '("W" "Web Clipper (Protocol)" entry
                     (file+headline webclipped-file "Webclipped Inbox")
                     "* [[%(identity cj/webclip-current-url)][%(identity cj/webclip-current-title)]] :website:\nURL: %(identity cj/webclip-current-url)\nCaptured On:%U\n%(cj/org-protocol-webclip-handler)\n"
					 :prepend t
                     :immediate-finish t)
                   t))

    (unless (assoc "w" org-capture-templates)
      (add-to-list 'org-capture-templates
                   '("w" "Web Page Clipper" entry
                     (file+headline webclipped-file "Webclipped Inbox")
                     "* %a\nURL: %L\nCaptured On:%U\n%(cj/org-webclipper-EWW)\n"
                     :prepend t :immediate-finish t)
                   t))

    (setq cj/webclipper-initialized t)))

;;;###autoload
(defun cj/org-protocol-webclip (info)
  "Process org-protocol webclip requests.
INFO is a plist containing :url and :title from the org-protocol call."
  (cj/webclipper-ensure-initialized)
  (let ((url (plist-get info :url))
        (title (plist-get info :title)))
    (when url
      ;; Store the URL and title for the capture template to use
      (setq cj/webclip-current-url url
            cj/webclip-current-title (or title "Untitled")))
    ;; Trigger the capture
    (org-capture nil "W")
    nil))  ; Return nil to indicate we handled it

(defun cj/org-protocol-webclip-handler ()
  "Handle web page clipping during org-capture.
This function is called from the capture template.
It fetches the page content and converts it to Org format."
  ;; Load org-web-tools only when actually needed
  (require 'org-web-tools)
  (setopt org-web-tools-pandoc-sleep-time 0.5)

  (let ((url cj/webclip-current-url)
        (title cj/webclip-current-title))
    ;; Clear the stored values after using them
    (setq cj/webclip-current-url nil
          cj/webclip-current-title nil)

    (if (not url)
        (error "No URL provided for clipping")
      (condition-case err
          (let* ((org-content (org-web-tools--url-as-readable-org url))
                 ;; Process the content to adjust heading levels
                 (processed-content
                  (with-temp-buffer
                    (insert org-content)
                    (goto-char (point-min))
                    ;; Skip the first heading line (we'll use our template's heading)
                    (when (looking-at "^\\* .*\n")
                      (delete-region (match-beginning 0) (match-end 0)))
                    ;; Remove any initial blank lines
                    (while (looking-at "^[ \t]*\n")
                      (delete-char 1))
                    ;; Demote all remaining headings by one level
                    ;; since our template already provides the top-level heading
                    (while (re-search-forward "^\\(\\*+\\) " nil t)
                      (replace-match (concat (match-string 1) "* ") t t))
                    (buffer-string))))
            ;; Show success message with the title
            (require 'user-constants) ;; Ensure webclipped-file is available
            (message "'%s' added to %s" title webclipped-file)
            ;; Return the processed content for insertion
            processed-content)
        (error
         ;; Handle any errors during fetching or conversion
         (error "Failed to clip web page: %s" (error-message-string err)))))))

;; ---------------------------- Org Webpage Clipper ----------------------------

;;;###autoload
(defun cj/org-webclipper-EWW ()
  "Capture the current web page for later viewing in an Org file.
Return the yanked content as a string so templates can insert it."
  (interactive)
  (cj/webclipper-ensure-initialized)
  (let* ((source-buffer (org-capture-get :original-buffer))
         (source-mode (with-current-buffer source-buffer major-mode)))
    (cond
     ((eq source-mode 'w3m-mode)
      (with-current-buffer source-buffer
        (org-w3m-copy-for-org-mode)))
     ((eq source-mode 'eww-mode)
      (with-current-buffer source-buffer
        (org-eww-copy-for-org-mode)))
     (t
      (error "Not valid -- must be in w3m or eww mode")))
    ;; extract the webpage content from the kill ring
    (car kill-ring)))

;; ------------------------ Org-Branch To Org-Roam-Node ------------------------

(defun cj/org-link-get-description (text)
  "Extract the description from an org link, or return the text unchanged.
If TEXT contains an org link like [[url][description]], return description.
If TEXT contains multiple links, only process the first one.
Otherwise return TEXT unchanged."
  (if (string-match "\\[\\[\\([^]]+\\)\\]\\(?:\\[\\([^]]+\\)\\]\\)?\\]" text)
      (let ((description (match-string 2 text))
            (url (match-string 1 text)))
        ;; If there's a description, use it; otherwise use the URL
        (or description url))
    text))

;;;###autoload
(defun cj/move-org-branch-to-roam ()
  "Move the org subtree at point to a new org-roam node.
The node filename will be timestamp-based with the heading name.
The heading becomes the node title, and the entire subtree is demoted to level 1.
If the heading contains a link, extract the description for the title."
  (interactive)
  ;; Lazy load org and org-roam when needed
  (require 'org)
  (require 'org-id)
  (require 'org-roam)

  (unless (org-at-heading-p)
    (user-error "Not at an org heading"))

  (let* ((heading-components (org-heading-components))
         (current-level (nth 0 heading-components))
         (raw-title (nth 4 heading-components))
         ;; Extract clean title from potential link
         (title (cj/org-link-get-description raw-title))
         (timestamp (format-time-string "%Y%m%d%H%M%S"))
         ;; Convert title to filename-safe format
         (title-slug (replace-regexp-in-string
                      "[^a-zA-Z0-9]+" "-"
                      (downcase title)))
         ;; Remove leading/trailing hyphens
         (title-slug (replace-regexp-in-string
                      "^-\\|-$" "" title-slug))
         (filename (format "%s-%s.org" timestamp title-slug))
         (filepath (expand-file-name filename org-roam-directory))
         ;; Generate a unique ID for the node
         (node-id (org-id-new))
         ;; Store the subtree in a temporary buffer
         subtree-content)

    ;; Copy the subtree content
    (org-copy-subtree)
    (setq subtree-content (current-kill 0))

    ;; Now cut it to remove from original buffer
    (org-cut-subtree)

    ;; Process the subtree to demote it to level 1
    (with-temp-buffer
      (org-mode)
      (insert subtree-content)
      ;; Demote the entire tree so the top level becomes level 1
      (goto-char (point-min))
      (when (> current-level 1)
        (let ((demote-count (- current-level 1)))
          (while (re-search-forward "^\\*+ " nil t)
            (beginning-of-line)
            (dotimes (_ demote-count)
              (when (looking-at "^\\*\\*")
                (delete-char 1)))
            (forward-line))))
      (setq subtree-content (buffer-string)))

    ;; Create the new org-roam file
    (with-temp-file filepath
      ;; Insert the org-roam template with ID at file level
      (insert ":PROPERTIES:\n")
      (insert ":ID:       " node-id "\n")
      (insert ":END:\n")
      (insert "#+TITLE: " title "\n")
      (insert "#+CATEGORY: " title "\n")
      (insert "#+FILETAGS: Topic\n\n")

      ;; Insert the demoted subtree content
      (insert subtree-content))

    ;; Sync the org-roam database
    (org-roam-db-sync)

    ;; Message to user
    (message "'%s' added as an org-roam node." title)))

;; ----------------------------- Webclipper Keymap -----------------------------

;; Setup keymaps
;;;###autoload
(defun cj/webclipper-setup-keymaps ()
  "Setup webclipper keymaps."
  (define-prefix-command 'cj/webclipper-map nil
                         "Keymap for weblipper operations.")
  (define-key cj/custom-keymap "w" 'cj/webclipper-map)
  (define-key cj/webclipper-map "n" 'cj/move-org-branch-to-roam))

;; Call keymap setup if cj/custom-keymap is already defined
(when (boundp 'cj/custom-keymap)
  (cj/webclipper-setup-keymaps))

;; Register protocol handler early for external calls
;;;###autoload
(with-eval-after-load 'org-protocol
  (unless (assoc "webclip" org-protocol-protocol-alist)
    (add-to-list 'org-protocol-protocol-alist
                 '("webclip"
                   :protocol "webclip"
                   :function cj/org-protocol-webclip
                   :kill-client t))))

(with-eval-after-load 'cj/custom-keymap
  (require 'org-webclipper)
  (cj/webclipper-setup-keymaps))

(provide 'org-webclipper)
;;; org-webclipper.el ends here