aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--modules/elfeed-config.el30
-rw-r--r--tests/test-elfeed-config--decode-html-entities.el31
2 files changed, 43 insertions, 18 deletions
diff --git a/modules/elfeed-config.el b/modules/elfeed-config.el
index ad7bda83a..7712f48db 100644
--- a/modules/elfeed-config.el
+++ b/modules/elfeed-config.el
@@ -126,23 +126,13 @@ Returns the stream URL or nil on failure."
(cmd-args (append '("yt-dlp" "-q" "-g")
format-args
(list url)))
- ;; DEBUG: Log the command
- (_ (cj/log-silently "DEBUG: Extracting with command: %s"
- (mapconcat #'shell-quote-argument cmd-args " ")))
(output (with-temp-buffer
(let ((exit-code (apply #'call-process
(car cmd-args) nil t nil
(cdr cmd-args))))
(if (zerop exit-code)
(string-trim (buffer-string))
- (progn
- ;; DEBUG: Log failure
- (cj/log-silently "DEBUG: yt-dlp failed with exit code %d" exit-code)
- (cj/log-silently "DEBUG: Error output: %s" (buffer-string))
- nil))))))
- ;; DEBUG: Log the result
- (cj/log-silently "DEBUG: Extracted URL: %s"
- (if output (truncate-string-to-width output 100) "nil"))
+ nil)))))
(when (and output (string-match-p "^https?://" output))
output)))
@@ -223,6 +213,15 @@ Note: Function name kept for backwards compatibility."
"Seconds to wait for a synchronous YouTube page fetch before giving up.
Without a timeout a hung request would block Emacs indefinitely.")
+(defun cj/--decode-html-entities (text)
+ "Decode the common HTML entities in TEXT.
+Handles & < > " ' and ' -- the entities YouTube's
+og:title meta tag emits. Decoded left-to-right, & first."
+ (let ((entities '(("&amp;" . "&") ("&lt;" . "<") ("&gt;" . ">")
+ ("&quot;" . "\"") ("&#39;" . "'") ("&#x27;" . "'"))))
+ (dolist (pair entities text)
+ (setq text (replace-regexp-in-string (car pair) (cdr pair) text)))))
+
(defun cj/youtube-to-elfeed-feed-format (url type)
"Convert YouTube URL to elfeed-feeds format.
@@ -274,13 +273,8 @@ TYPE should be either \='channel or \='playlist."
(goto-char (point-min))
(when (re-search-forward "<meta property=\"og:title\" content=\"\\([^\"]+\\)\"" nil t)
(setq title (match-string 1))
- ;; Simple HTML entity decoding
- (setq title (replace-regexp-in-string "&amp;" "&" title))
- (setq title (replace-regexp-in-string "&lt;" "<" title))
- (setq title (replace-regexp-in-string "&gt;" ">" title))
- (setq title (replace-regexp-in-string "&quot;" "\"" title))
- (setq title (replace-regexp-in-string "&#39;" "'" title))
- (setq title (replace-regexp-in-string "&#x27;" "'" title))))))
+ ;; Decode HTML entities in the extracted title
+ (setq title (cj/--decode-html-entities title))))))
;; Always kill the temporary URL buffer, even when extraction failed --
;; the old code only killed it when an ID was found, leaking it otherwise.
(when (buffer-live-p buffer)
diff --git a/tests/test-elfeed-config--decode-html-entities.el b/tests/test-elfeed-config--decode-html-entities.el
new file mode 100644
index 000000000..a3fba3c49
--- /dev/null
+++ b/tests/test-elfeed-config--decode-html-entities.el
@@ -0,0 +1,31 @@
+;;; test-elfeed-config--decode-html-entities.el --- Tests for cj/--decode-html-entities -*- lexical-binding: t; -*-
+
+;;; Commentary:
+;; cj/--decode-html-entities replaces the six inline replace-regexp-in-string
+;; calls that cj/youtube-to-elfeed-feed-format used to hand-decode an og:title.
+
+;;; Code:
+
+(require 'ert)
+
+(add-to-list 'load-path (expand-file-name "modules" user-emacs-directory))
+(require 'elfeed-config)
+
+(ert-deftest test-elfeed-decode-html-entities-all ()
+ "Normal: every supported entity is decoded."
+ (should (equal (cj/--decode-html-entities
+ "a &amp; b &lt;c&gt; &quot;d&quot; &#39;e&#x27;")
+ "a & b <c> \"d\" 'e'")))
+
+(ert-deftest test-elfeed-decode-html-entities-no-entities ()
+ "Boundary: text without entities is unchanged."
+ (should (equal (cj/--decode-html-entities "plain title") "plain title"))
+ (should (equal (cj/--decode-html-entities "") "")))
+
+(ert-deftest test-elfeed-decode-html-entities-amp-first ()
+ "Boundary: &amp; is decoded before the others (no double-decoding chains)."
+ (should (equal (cj/--decode-html-entities "Tom &amp; Jerry &lt;3")
+ "Tom & Jerry <3")))
+
+(provide 'test-elfeed-config--decode-html-entities)
+;;; test-elfeed-config--decode-html-entities.el ends here