aboutsummaryrefslogtreecommitdiff
path: root/tests/test-gloss-fetch--strip-html.el
blob: 61f7e70d9b32e0c3ce689bc22eaf98374a2fa788 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
;;; test-gloss-fetch--strip-html.el --- HTML strip helper tests -*- lexical-binding: t -*-

;; SPDX-License-Identifier: GPL-3.0-or-later

;;; Commentary:
;; `gloss-fetch--strip-html' converts a fragment of HTML to plain text
;; using `libxml-parse-html-region' (no italic/bold preservation).
;; Returns nil when libxml fails on the fragment so the caller can drop
;; that sense.

;;; Code:

(require 'ert)
(require 'gloss-fetch)

(ert-deftest test-gloss-fetch-strip-html-plain-text-roundtrips ()
  "Normal: a string with no markup comes back unchanged (modulo whitespace)."
  (skip-unless (fboundp 'libxml-parse-html-region))
  (should (equal (gloss-fetch--strip-html "Just plain text.")
                 "Just plain text.")))

(ert-deftest test-gloss-fetch-strip-html-removes-tags ()
  "Normal: anchor and span tags are stripped, leaving inner text."
  (skip-unless (fboundp 'libxml-parse-html-region))
  (let ((stripped (gloss-fetch--strip-html
                   "The <a href=\"/foo\">repetition</a> of a <i>phrase</i>.")))
    (should (string-match-p "repetition" stripped))
    (should (string-match-p "phrase" stripped))
    (should-not (string-match-p "<" stripped))
    (should-not (string-match-p "href" stripped))))

(ert-deftest test-gloss-fetch-strip-html-empty-string-returns-empty ()
  "Boundary: empty input returns an empty string (or nil)."
  (skip-unless (fboundp 'libxml-parse-html-region))
  (let ((result (gloss-fetch--strip-html "")))
    (should (or (null result) (equal result "")))))

(ert-deftest test-gloss-fetch-strip-html-entities-decoded ()
  "Boundary: HTML entities decode to their characters."
  (skip-unless (fboundp 'libxml-parse-html-region))
  (let ((stripped (gloss-fetch--strip-html "Smith &amp; Wesson")))
    (should (string-match-p "&" stripped))
    (should-not (string-match-p "&amp;" stripped))))

(ert-deftest test-gloss-fetch-strip-html-collapses-whitespace ()
  "Boundary: runs of internal whitespace collapse to single spaces; result is trimmed."
  (skip-unless (fboundp 'libxml-parse-html-region))
  (let ((stripped (gloss-fetch--strip-html "  hello   <b>world</b>  ")))
    (should (equal stripped "hello world"))))

(ert-deftest test-gloss-fetch-strip-html-failure-returns-nil ()
  "Error: when libxml-parse-html-region raises, return nil so the caller can drop the sense."
  (skip-unless (fboundp 'libxml-parse-html-region))
  (cl-letf (((symbol-function 'libxml-parse-html-region)
             (lambda (&rest _) (error "libxml exploded"))))
    (should-not (gloss-fetch--strip-html "any input"))))

(provide 'test-gloss-fetch--strip-html)
;;; test-gloss-fetch--strip-html.el ends here