diff options
| author | Craig Jennings <c@cjennings.net> | 2026-05-24 14:40:03 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-05-24 14:40:03 -0500 |
| commit | c097b5b4540d51fd279a81c0834b008331e936c9 (patch) | |
| tree | 50f58a77594d1ea98ae23cbcc1b747cab68b39e9 | |
| parent | 94ef5242e72a39faa9eb14a705387ccad339be14 (diff) | |
| download | dotemacs-c097b5b4540d51fd279a81c0834b008331e936c9.tar.gz dotemacs-c097b5b4540d51fd279a81c0834b008331e936c9.zip | |
fix(elfeed): bound and clean up the synchronous YouTube fetch
cj/youtube-to-elfeed-feed-format called url-retrieve-synchronously with no timeout, so a hung YouTube request would block Emacs indefinitely, and it only killed the temporary URL buffer when an ID was successfully extracted — a page without the expected markers leaked the buffer.
Passed cj/elfeed-url-fetch-timeout (10s) to the synchronous fetch, and moved the fetch+parse into an unwind-protect that always kills the temp buffer (live-p guarded), including the parse-failure path. Tests mock the network boundary and cover a normal channel parse, that a timeout is passed, and that the buffer is not leaked when parsing fails.
Also added tests for the EWW user-agent advice (no code change): it already injects the desktop UA only from eww-mode buffers, so package.el and other non-EWW url callers pass through untouched — the tests pin that scoping and the replace-not-duplicate header behavior.
| -rw-r--r-- | modules/elfeed-config.el | 81 | ||||
| -rw-r--r-- | tests/test-elfeed-config-youtube-feed-format.el | 69 | ||||
| -rw-r--r-- | tests/test-eww-config-user-agent-advice.el | 56 |
3 files changed, 171 insertions, 35 deletions
diff --git a/modules/elfeed-config.el b/modules/elfeed-config.el index 58d961f9..dff26410 100644 --- a/modules/elfeed-config.el +++ b/modules/elfeed-config.el @@ -208,6 +208,10 @@ Note: Function name kept for backwards compatibility." ;; --------------------- Youtube Url To Elfeed Feed Format --------------------- +(defconst cj/elfeed-url-fetch-timeout 10 + "Seconds to wait for a synchronous YouTube page fetch before giving up. +Without a timeout a hung request would block Emacs indefinitely.") + (defun cj/youtube-to-elfeed-feed-format (url type) "Convert YouTube URL to elfeed-feeds format. @@ -225,44 +229,51 @@ TYPE should be either \='channel or \='playlist." "Could not extract channel information" "Could not extract playlist information"))) - ;; Extract ID based on type - (if (eq type 'channel) - ;; For channels, we need to fetch the page to get the channel_id + (unwind-protect (progn - (setq buffer (url-retrieve-synchronously url)) - (when buffer + ;; Extract ID based on type + (if (eq type 'channel) + ;; For channels, we need to fetch the page to get the channel_id + (progn + (setq buffer (url-retrieve-synchronously + url nil nil cj/elfeed-url-fetch-timeout)) + (when buffer + (with-current-buffer buffer + ;; Decode the content as UTF-8 + (set-buffer-multibyte t) + (decode-coding-region (point-min) (point-max) 'utf-8) + (goto-char (point-min)) + ;; Search for the channel_id in the RSS feed link + (when (re-search-forward id-pattern nil t) + (setq id (match-string 1)))))) + ;; For playlists, extract from URL first + (when (string-match id-pattern url) + (setq id (match-string 1 url)) + (setq buffer (url-retrieve-synchronously + url nil nil cj/elfeed-url-fetch-timeout)))) + + ;; Get title from the page + (when (and buffer id) (with-current-buffer buffer - ;; Decode the content as UTF-8 - (set-buffer-multibyte t) - (decode-coding-region (point-min) (point-max) 'utf-8) + (unless (eq type 'channel) + ;; Decode for playlist (already done for channel above) + (set-buffer-multibyte t) + (decode-coding-region (point-min) (point-max) 'utf-8)) + ;; Search for the title in og:title meta tag (goto-char (point-min)) - ;; Search for the channel_id in the RSS feed link - (when (re-search-forward id-pattern nil t) - (setq id (match-string 1)))))) - ;; For playlists, extract from URL first - (when (string-match id-pattern url) - (setq id (match-string 1 url)) - (setq buffer (url-retrieve-synchronously url)))) - - ;; Get title from the page - (when (and buffer id) - (with-current-buffer buffer - (unless (eq type 'channel) - ;; Decode for playlist (already done for channel above) - (set-buffer-multibyte t) - (decode-coding-region (point-min) (point-max) 'utf-8)) - ;; Search for the title in og:title meta tag - (goto-char (point-min)) - (when (re-search-forward "<meta property=\"og:title\" content=\"\\([^\"]+\\)\"" nil t) - (setq title (match-string 1)) - ;; Simple HTML entity decoding - (setq title (replace-regexp-in-string "&" "&" title)) - (setq title (replace-regexp-in-string "<" "<" title)) - (setq title (replace-regexp-in-string ">" ">" title)) - (setq title (replace-regexp-in-string """ "\"" title)) - (setq title (replace-regexp-in-string "'" "'" title)) - (setq title (replace-regexp-in-string "'" "'" title)))) - (kill-buffer buffer)) + (when (re-search-forward "<meta property=\"og:title\" content=\"\\([^\"]+\\)\"" nil t) + (setq title (match-string 1)) + ;; Simple HTML entity decoding + (setq title (replace-regexp-in-string "&" "&" title)) + (setq title (replace-regexp-in-string "<" "<" title)) + (setq title (replace-regexp-in-string ">" ">" title)) + (setq title (replace-regexp-in-string """ "\"" title)) + (setq title (replace-regexp-in-string "'" "'" title)) + (setq title (replace-regexp-in-string "'" "'" title)))))) + ;; Always kill the temporary URL buffer, even when extraction failed -- + ;; the old code only killed it when an ID was found, leaking it otherwise. + (when (buffer-live-p buffer) + (kill-buffer buffer))) (if (and id title) (format ";; %s\n(\"%s\" yt)" diff --git a/tests/test-elfeed-config-youtube-feed-format.el b/tests/test-elfeed-config-youtube-feed-format.el new file mode 100644 index 00000000..bda90aa7 --- /dev/null +++ b/tests/test-elfeed-config-youtube-feed-format.el @@ -0,0 +1,69 @@ +;;; test-elfeed-config-youtube-feed-format.el --- Tests for YouTube feed conversion -*- lexical-binding: t; -*- + +;;; Commentary: +;; cj/youtube-to-elfeed-feed-format fetches a YouTube page synchronously to +;; derive the channel/playlist feed URL and title. These tests mock the +;; network boundary (url-retrieve-synchronously) and verify it passes a +;; timeout and always kills the temporary URL buffer — including when the +;; page doesn't contain the expected markers, the path that previously leaked +;; the buffer. + +;;; Code: + +(require 'ert) +(require 'cl-lib) +(require 'url) + +(add-to-list 'load-path (expand-file-name "modules" user-emacs-directory)) +(require 'elfeed-config) + +(defun test-elfeed--url-buffer (html) + "Return a fresh buffer containing HTML, as url-retrieve-synchronously would." + (let ((b (generate-new-buffer " *test-url-retrieve*"))) + (with-current-buffer b (insert html)) + b)) + +;;; Normal Cases + +(ert-deftest test-elfeed-youtube-channel-parses-and-cleans-up () + "Normal: a channel page yields the feed line and the temp buffer is killed." + (let ((url-buf nil)) + (cl-letf (((symbol-function 'url-retrieve-synchronously) + (lambda (&rest _) + (setq url-buf + (test-elfeed--url-buffer + (concat "<link href=\"https://www.youtube.com/feeds/videos.xml" + "?channel_id=UCabc123\">" + "<meta property=\"og:title\" content=\"Test Channel\">")))))) + (let ((result (cj/youtube-to-elfeed-feed-format "https://youtube.com/@test" 'channel))) + (should (string-match-p "channel_id=UCabc123" result)) + (should (string-match-p "Test Channel" result))) + (should-not (buffer-live-p url-buf))))) + +;;; Boundary Cases + +(ert-deftest test-elfeed-youtube-passes-a-timeout () + "Boundary: the synchronous fetch is given a timeout, not left unbounded." + (let ((captured nil)) + (cl-letf (((symbol-function 'url-retrieve-synchronously) + (lambda (&rest args) + (setq captured args) + (test-elfeed--url-buffer "")))) + (ignore-errors (cj/youtube-to-elfeed-feed-format "https://youtube.com/@t" 'channel)) + ;; (url &optional silent inhibit-cookies timeout) — a 4th arg is present. + (should (>= (length captured) 4)) + (should (numberp (nth 3 captured)))))) + +;;; Error Cases + +(ert-deftest test-elfeed-youtube-cleans-up-buffer-on-parse-failure () + "Error: a page without the markers errors, and the temp buffer is not leaked." + (let ((url-buf nil)) + (cl-letf (((symbol-function 'url-retrieve-synchronously) + (lambda (&rest _) + (setq url-buf (test-elfeed--url-buffer "<html>nothing useful</html>"))))) + (should-error (cj/youtube-to-elfeed-feed-format "https://youtube.com/@t" 'channel)) + (should-not (buffer-live-p url-buf))))) + +(provide 'test-elfeed-config-youtube-feed-format) +;;; test-elfeed-config-youtube-feed-format.el ends here diff --git a/tests/test-eww-config-user-agent-advice.el b/tests/test-eww-config-user-agent-advice.el new file mode 100644 index 00000000..c045a503 --- /dev/null +++ b/tests/test-eww-config-user-agent-advice.el @@ -0,0 +1,56 @@ +;;; test-eww-config-user-agent-advice.el --- Tests for EWW user-agent advice -*- lexical-binding: t; -*- + +;;; Commentary: +;; my-eww--inject-user-agent is an :around advice on url-retrieve(-synchronously) +;; that adds a desktop User-Agent only for requests originating in an EWW +;; buffer. These tests pin that scoping: package.el and other non-EWW URL +;; callers must pass through untouched, so the advice can't change how the +;; rest of Emacs fetches URLs. + +;;; Code: + +(require 'ert) +(require 'cl-lib) +(require 'eww) + +(add-to-list 'load-path (expand-file-name "modules" user-emacs-directory)) +(require 'eww-config) + +;;; Normal Cases + +(ert-deftest test-eww-ua-injected-in-eww-buffer () + "Normal: in an eww-mode buffer the advice adds the configured User-Agent." + (with-temp-buffer + (setq major-mode 'eww-mode) + (let (seen) + (my-eww--inject-user-agent + (lambda (&rest _) (setq seen (assoc "User-Agent" url-request-extra-headers)))) + (should seen) + (should (equal (cdr seen) my-eww-user-agent))))) + +;;; Boundary Cases + +(ert-deftest test-eww-ua-not-injected-outside-eww () + "Boundary: outside eww-mode (package.el, normal url fetches) no UA is added." + (with-temp-buffer + (fundamental-mode) + (let ((url-request-extra-headers nil) captured) + (my-eww--inject-user-agent + (lambda (&rest _) (setq captured url-request-extra-headers))) + (should (null (assoc "User-Agent" captured)))))) + +(ert-deftest test-eww-ua-replaces-existing-and-keeps-other-headers () + "Boundary: an existing User-Agent is replaced (not duplicated) and other +headers survive, when the request comes from EWW." + (with-temp-buffer + (setq major-mode 'eww-mode) + (let ((url-request-extra-headers '(("User-Agent" . "old") ("Accept" . "x"))) + seen) + (my-eww--inject-user-agent + (lambda (&rest _) (setq seen url-request-extra-headers))) + (should (= 1 (cl-count "User-Agent" seen :key #'car :test #'string-equal))) + (should (equal (cdr (assoc "User-Agent" seen)) my-eww-user-agent)) + (should (assoc "Accept" seen))))) + +(provide 'test-eww-config-user-agent-advice) +;;; test-eww-config-user-agent-advice.el ends here |
