aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-05-24 14:40:03 -0500
committerCraig Jennings <c@cjennings.net>2026-05-24 14:40:03 -0500
commitc097b5b4540d51fd279a81c0834b008331e936c9 (patch)
tree50f58a77594d1ea98ae23cbcc1b747cab68b39e9
parent94ef5242e72a39faa9eb14a705387ccad339be14 (diff)
downloaddotemacs-c097b5b4540d51fd279a81c0834b008331e936c9.tar.gz
dotemacs-c097b5b4540d51fd279a81c0834b008331e936c9.zip
fix(elfeed): bound and clean up the synchronous YouTube fetch
cj/youtube-to-elfeed-feed-format called url-retrieve-synchronously with no timeout, so a hung YouTube request would block Emacs indefinitely, and it only killed the temporary URL buffer when an ID was successfully extracted — a page without the expected markers leaked the buffer. Passed cj/elfeed-url-fetch-timeout (10s) to the synchronous fetch, and moved the fetch+parse into an unwind-protect that always kills the temp buffer (live-p guarded), including the parse-failure path. Tests mock the network boundary and cover a normal channel parse, that a timeout is passed, and that the buffer is not leaked when parsing fails. Also added tests for the EWW user-agent advice (no code change): it already injects the desktop UA only from eww-mode buffers, so package.el and other non-EWW url callers pass through untouched — the tests pin that scoping and the replace-not-duplicate header behavior.
-rw-r--r--modules/elfeed-config.el81
-rw-r--r--tests/test-elfeed-config-youtube-feed-format.el69
-rw-r--r--tests/test-eww-config-user-agent-advice.el56
3 files changed, 171 insertions, 35 deletions
diff --git a/modules/elfeed-config.el b/modules/elfeed-config.el
index 58d961f9..dff26410 100644
--- a/modules/elfeed-config.el
+++ b/modules/elfeed-config.el
@@ -208,6 +208,10 @@ Note: Function name kept for backwards compatibility."
;; --------------------- Youtube Url To Elfeed Feed Format ---------------------
+(defconst cj/elfeed-url-fetch-timeout 10
+ "Seconds to wait for a synchronous YouTube page fetch before giving up.
+Without a timeout a hung request would block Emacs indefinitely.")
+
(defun cj/youtube-to-elfeed-feed-format (url type)
"Convert YouTube URL to elfeed-feeds format.
@@ -225,44 +229,51 @@ TYPE should be either \='channel or \='playlist."
"Could not extract channel information"
"Could not extract playlist information")))
- ;; Extract ID based on type
- (if (eq type 'channel)
- ;; For channels, we need to fetch the page to get the channel_id
+ (unwind-protect
(progn
- (setq buffer (url-retrieve-synchronously url))
- (when buffer
+ ;; Extract ID based on type
+ (if (eq type 'channel)
+ ;; For channels, we need to fetch the page to get the channel_id
+ (progn
+ (setq buffer (url-retrieve-synchronously
+ url nil nil cj/elfeed-url-fetch-timeout))
+ (when buffer
+ (with-current-buffer buffer
+ ;; Decode the content as UTF-8
+ (set-buffer-multibyte t)
+ (decode-coding-region (point-min) (point-max) 'utf-8)
+ (goto-char (point-min))
+ ;; Search for the channel_id in the RSS feed link
+ (when (re-search-forward id-pattern nil t)
+ (setq id (match-string 1))))))
+ ;; For playlists, extract from URL first
+ (when (string-match id-pattern url)
+ (setq id (match-string 1 url))
+ (setq buffer (url-retrieve-synchronously
+ url nil nil cj/elfeed-url-fetch-timeout))))
+
+ ;; Get title from the page
+ (when (and buffer id)
(with-current-buffer buffer
- ;; Decode the content as UTF-8
- (set-buffer-multibyte t)
- (decode-coding-region (point-min) (point-max) 'utf-8)
+ (unless (eq type 'channel)
+ ;; Decode for playlist (already done for channel above)
+ (set-buffer-multibyte t)
+ (decode-coding-region (point-min) (point-max) 'utf-8))
+ ;; Search for the title in og:title meta tag
(goto-char (point-min))
- ;; Search for the channel_id in the RSS feed link
- (when (re-search-forward id-pattern nil t)
- (setq id (match-string 1))))))
- ;; For playlists, extract from URL first
- (when (string-match id-pattern url)
- (setq id (match-string 1 url))
- (setq buffer (url-retrieve-synchronously url))))
-
- ;; Get title from the page
- (when (and buffer id)
- (with-current-buffer buffer
- (unless (eq type 'channel)
- ;; Decode for playlist (already done for channel above)
- (set-buffer-multibyte t)
- (decode-coding-region (point-min) (point-max) 'utf-8))
- ;; Search for the title in og:title meta tag
- (goto-char (point-min))
- (when (re-search-forward "<meta property=\"og:title\" content=\"\\([^\"]+\\)\"" nil t)
- (setq title (match-string 1))
- ;; Simple HTML entity decoding
- (setq title (replace-regexp-in-string "&amp;" "&" title))
- (setq title (replace-regexp-in-string "&lt;" "<" title))
- (setq title (replace-regexp-in-string "&gt;" ">" title))
- (setq title (replace-regexp-in-string "&quot;" "\"" title))
- (setq title (replace-regexp-in-string "&#39;" "'" title))
- (setq title (replace-regexp-in-string "&#x27;" "'" title))))
- (kill-buffer buffer))
+ (when (re-search-forward "<meta property=\"og:title\" content=\"\\([^\"]+\\)\"" nil t)
+ (setq title (match-string 1))
+ ;; Simple HTML entity decoding
+ (setq title (replace-regexp-in-string "&amp;" "&" title))
+ (setq title (replace-regexp-in-string "&lt;" "<" title))
+ (setq title (replace-regexp-in-string "&gt;" ">" title))
+ (setq title (replace-regexp-in-string "&quot;" "\"" title))
+ (setq title (replace-regexp-in-string "&#39;" "'" title))
+ (setq title (replace-regexp-in-string "&#x27;" "'" title))))))
+ ;; Always kill the temporary URL buffer, even when extraction failed --
+ ;; the old code only killed it when an ID was found, leaking it otherwise.
+ (when (buffer-live-p buffer)
+ (kill-buffer buffer)))
(if (and id title)
(format ";; %s\n(\"%s\" yt)"
diff --git a/tests/test-elfeed-config-youtube-feed-format.el b/tests/test-elfeed-config-youtube-feed-format.el
new file mode 100644
index 00000000..bda90aa7
--- /dev/null
+++ b/tests/test-elfeed-config-youtube-feed-format.el
@@ -0,0 +1,69 @@
+;;; test-elfeed-config-youtube-feed-format.el --- Tests for YouTube feed conversion -*- lexical-binding: t; -*-
+
+;;; Commentary:
+;; cj/youtube-to-elfeed-feed-format fetches a YouTube page synchronously to
+;; derive the channel/playlist feed URL and title. These tests mock the
+;; network boundary (url-retrieve-synchronously) and verify it passes a
+;; timeout and always kills the temporary URL buffer — including when the
+;; page doesn't contain the expected markers, the path that previously leaked
+;; the buffer.
+
+;;; Code:
+
+(require 'ert)
+(require 'cl-lib)
+(require 'url)
+
+(add-to-list 'load-path (expand-file-name "modules" user-emacs-directory))
+(require 'elfeed-config)
+
+(defun test-elfeed--url-buffer (html)
+ "Return a fresh buffer containing HTML, as url-retrieve-synchronously would."
+ (let ((b (generate-new-buffer " *test-url-retrieve*")))
+ (with-current-buffer b (insert html))
+ b))
+
+;;; Normal Cases
+
+(ert-deftest test-elfeed-youtube-channel-parses-and-cleans-up ()
+ "Normal: a channel page yields the feed line and the temp buffer is killed."
+ (let ((url-buf nil))
+ (cl-letf (((symbol-function 'url-retrieve-synchronously)
+ (lambda (&rest _)
+ (setq url-buf
+ (test-elfeed--url-buffer
+ (concat "<link href=\"https://www.youtube.com/feeds/videos.xml"
+ "?channel_id=UCabc123\">"
+ "<meta property=\"og:title\" content=\"Test Channel\">"))))))
+ (let ((result (cj/youtube-to-elfeed-feed-format "https://youtube.com/@test" 'channel)))
+ (should (string-match-p "channel_id=UCabc123" result))
+ (should (string-match-p "Test Channel" result)))
+ (should-not (buffer-live-p url-buf)))))
+
+;;; Boundary Cases
+
+(ert-deftest test-elfeed-youtube-passes-a-timeout ()
+ "Boundary: the synchronous fetch is given a timeout, not left unbounded."
+ (let ((captured nil))
+ (cl-letf (((symbol-function 'url-retrieve-synchronously)
+ (lambda (&rest args)
+ (setq captured args)
+ (test-elfeed--url-buffer ""))))
+ (ignore-errors (cj/youtube-to-elfeed-feed-format "https://youtube.com/@t" 'channel))
+ ;; (url &optional silent inhibit-cookies timeout) — a 4th arg is present.
+ (should (>= (length captured) 4))
+ (should (numberp (nth 3 captured))))))
+
+;;; Error Cases
+
+(ert-deftest test-elfeed-youtube-cleans-up-buffer-on-parse-failure ()
+ "Error: a page without the markers errors, and the temp buffer is not leaked."
+ (let ((url-buf nil))
+ (cl-letf (((symbol-function 'url-retrieve-synchronously)
+ (lambda (&rest _)
+ (setq url-buf (test-elfeed--url-buffer "<html>nothing useful</html>")))))
+ (should-error (cj/youtube-to-elfeed-feed-format "https://youtube.com/@t" 'channel))
+ (should-not (buffer-live-p url-buf)))))
+
+(provide 'test-elfeed-config-youtube-feed-format)
+;;; test-elfeed-config-youtube-feed-format.el ends here
diff --git a/tests/test-eww-config-user-agent-advice.el b/tests/test-eww-config-user-agent-advice.el
new file mode 100644
index 00000000..c045a503
--- /dev/null
+++ b/tests/test-eww-config-user-agent-advice.el
@@ -0,0 +1,56 @@
+;;; test-eww-config-user-agent-advice.el --- Tests for EWW user-agent advice -*- lexical-binding: t; -*-
+
+;;; Commentary:
+;; my-eww--inject-user-agent is an :around advice on url-retrieve(-synchronously)
+;; that adds a desktop User-Agent only for requests originating in an EWW
+;; buffer. These tests pin that scoping: package.el and other non-EWW URL
+;; callers must pass through untouched, so the advice can't change how the
+;; rest of Emacs fetches URLs.
+
+;;; Code:
+
+(require 'ert)
+(require 'cl-lib)
+(require 'eww)
+
+(add-to-list 'load-path (expand-file-name "modules" user-emacs-directory))
+(require 'eww-config)
+
+;;; Normal Cases
+
+(ert-deftest test-eww-ua-injected-in-eww-buffer ()
+ "Normal: in an eww-mode buffer the advice adds the configured User-Agent."
+ (with-temp-buffer
+ (setq major-mode 'eww-mode)
+ (let (seen)
+ (my-eww--inject-user-agent
+ (lambda (&rest _) (setq seen (assoc "User-Agent" url-request-extra-headers))))
+ (should seen)
+ (should (equal (cdr seen) my-eww-user-agent)))))
+
+;;; Boundary Cases
+
+(ert-deftest test-eww-ua-not-injected-outside-eww ()
+ "Boundary: outside eww-mode (package.el, normal url fetches) no UA is added."
+ (with-temp-buffer
+ (fundamental-mode)
+ (let ((url-request-extra-headers nil) captured)
+ (my-eww--inject-user-agent
+ (lambda (&rest _) (setq captured url-request-extra-headers)))
+ (should (null (assoc "User-Agent" captured))))))
+
+(ert-deftest test-eww-ua-replaces-existing-and-keeps-other-headers ()
+ "Boundary: an existing User-Agent is replaced (not duplicated) and other
+headers survive, when the request comes from EWW."
+ (with-temp-buffer
+ (setq major-mode 'eww-mode)
+ (let ((url-request-extra-headers '(("User-Agent" . "old") ("Accept" . "x")))
+ seen)
+ (my-eww--inject-user-agent
+ (lambda (&rest _) (setq seen url-request-extra-headers)))
+ (should (= 1 (cl-count "User-Agent" seen :key #'car :test #'string-equal)))
+ (should (equal (cdr (assoc "User-Agent" seen)) my-eww-user-agent))
+ (should (assoc "Accept" seen)))))
+
+(provide 'test-eww-config-user-agent-advice)
+;;; test-eww-config-user-agent-advice.el ends here