diff options
| author | Craig Jennings <c@cjennings.net> | 2026-05-24 14:40:03 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-05-24 14:40:03 -0500 |
| commit | c097b5b4540d51fd279a81c0834b008331e936c9 (patch) | |
| tree | 50f58a77594d1ea98ae23cbcc1b747cab68b39e9 /modules | |
| parent | 94ef5242e72a39faa9eb14a705387ccad339be14 (diff) | |
| download | dotemacs-c097b5b4540d51fd279a81c0834b008331e936c9.tar.gz dotemacs-c097b5b4540d51fd279a81c0834b008331e936c9.zip | |
fix(elfeed): bound and clean up the synchronous YouTube fetch
cj/youtube-to-elfeed-feed-format called url-retrieve-synchronously with no timeout, so a hung YouTube request would block Emacs indefinitely, and it only killed the temporary URL buffer when an ID was successfully extracted — a page without the expected markers leaked the buffer.
Passed cj/elfeed-url-fetch-timeout (10s) to the synchronous fetch, and moved the fetch+parse into an unwind-protect that always kills the temp buffer (live-p guarded), including the parse-failure path. Tests mock the network boundary and cover a normal channel parse, that a timeout is passed, and that the buffer is not leaked when parsing fails.
Also added tests for the EWW user-agent advice (no code change): it already injects the desktop UA only from eww-mode buffers, so package.el and other non-EWW url callers pass through untouched — the tests pin that scoping and the replace-not-duplicate header behavior.
Diffstat (limited to 'modules')
| -rw-r--r-- | modules/elfeed-config.el | 81 |
1 files changed, 46 insertions, 35 deletions
diff --git a/modules/elfeed-config.el b/modules/elfeed-config.el index 58d961f9..dff26410 100644 --- a/modules/elfeed-config.el +++ b/modules/elfeed-config.el @@ -208,6 +208,10 @@ Note: Function name kept for backwards compatibility." ;; --------------------- Youtube Url To Elfeed Feed Format --------------------- +(defconst cj/elfeed-url-fetch-timeout 10 + "Seconds to wait for a synchronous YouTube page fetch before giving up. +Without a timeout a hung request would block Emacs indefinitely.") + (defun cj/youtube-to-elfeed-feed-format (url type) "Convert YouTube URL to elfeed-feeds format. @@ -225,44 +229,51 @@ TYPE should be either \='channel or \='playlist." "Could not extract channel information" "Could not extract playlist information"))) - ;; Extract ID based on type - (if (eq type 'channel) - ;; For channels, we need to fetch the page to get the channel_id + (unwind-protect (progn - (setq buffer (url-retrieve-synchronously url)) - (when buffer + ;; Extract ID based on type + (if (eq type 'channel) + ;; For channels, we need to fetch the page to get the channel_id + (progn + (setq buffer (url-retrieve-synchronously + url nil nil cj/elfeed-url-fetch-timeout)) + (when buffer + (with-current-buffer buffer + ;; Decode the content as UTF-8 + (set-buffer-multibyte t) + (decode-coding-region (point-min) (point-max) 'utf-8) + (goto-char (point-min)) + ;; Search for the channel_id in the RSS feed link + (when (re-search-forward id-pattern nil t) + (setq id (match-string 1)))))) + ;; For playlists, extract from URL first + (when (string-match id-pattern url) + (setq id (match-string 1 url)) + (setq buffer (url-retrieve-synchronously + url nil nil cj/elfeed-url-fetch-timeout)))) + + ;; Get title from the page + (when (and buffer id) (with-current-buffer buffer - ;; Decode the content as UTF-8 - (set-buffer-multibyte t) - (decode-coding-region (point-min) (point-max) 'utf-8) + (unless (eq type 'channel) + ;; Decode for playlist (already done for channel above) + (set-buffer-multibyte t) + (decode-coding-region (point-min) (point-max) 'utf-8)) + ;; Search for the title in og:title meta tag (goto-char (point-min)) - ;; Search for the channel_id in the RSS feed link - (when (re-search-forward id-pattern nil t) - (setq id (match-string 1)))))) - ;; For playlists, extract from URL first - (when (string-match id-pattern url) - (setq id (match-string 1 url)) - (setq buffer (url-retrieve-synchronously url)))) - - ;; Get title from the page - (when (and buffer id) - (with-current-buffer buffer - (unless (eq type 'channel) - ;; Decode for playlist (already done for channel above) - (set-buffer-multibyte t) - (decode-coding-region (point-min) (point-max) 'utf-8)) - ;; Search for the title in og:title meta tag - (goto-char (point-min)) - (when (re-search-forward "<meta property=\"og:title\" content=\"\\([^\"]+\\)\"" nil t) - (setq title (match-string 1)) - ;; Simple HTML entity decoding - (setq title (replace-regexp-in-string "&" "&" title)) - (setq title (replace-regexp-in-string "<" "<" title)) - (setq title (replace-regexp-in-string ">" ">" title)) - (setq title (replace-regexp-in-string """ "\"" title)) - (setq title (replace-regexp-in-string "'" "'" title)) - (setq title (replace-regexp-in-string "'" "'" title)))) - (kill-buffer buffer)) + (when (re-search-forward "<meta property=\"og:title\" content=\"\\([^\"]+\\)\"" nil t) + (setq title (match-string 1)) + ;; Simple HTML entity decoding + (setq title (replace-regexp-in-string "&" "&" title)) + (setq title (replace-regexp-in-string "<" "<" title)) + (setq title (replace-regexp-in-string ">" ">" title)) + (setq title (replace-regexp-in-string """ "\"" title)) + (setq title (replace-regexp-in-string "'" "'" title)) + (setq title (replace-regexp-in-string "'" "'" title)))))) + ;; Always kill the temporary URL buffer, even when extraction failed -- + ;; the old code only killed it when an ID was found, leaking it otherwise. + (when (buffer-live-p buffer) + (kill-buffer buffer))) (if (and id title) (format ";; %s\n(\"%s\" yt)" |
