summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2026-05-16 05:17:21 -0500
committerCraig Jennings <c@cjennings.net>2026-05-16 05:17:21 -0500
commit99d93203a867294addf4927ceec5644b9d3bf322 (patch)
treef579382868960c77cd38eb350fba4dfdaab13c21 /tests
parentceeae9b5e2625e23e6e3792d06a6c8122a36d18b (diff)
downloaddotemacs-99d93203a867294addf4927ceec5644b9d3bf322.tar.gz
dotemacs-99d93203a867294addf4927ceec5644b9d3bf322.zip
feat(gptel-tools): wire web_fetch as a local tool
Fourth ADOPT entry from `docs/design/gptel-tools-shortlist.org'. Lets gptel pull a URL into the conversation so the model can read docs / current API shapes / etc. without me copy-pasting. Shape: - URL must be `http://' or `https://' (file://, ftp://, javascript:, scheme-less, etc. are rejected at the validator). - HTML responses go through `pandoc -f html -t plain' so the model gets a reading shape that isn't full of markup; falls back to `w3m -dump -T text/html' if pandoc isn't on PATH; signals `user-error' if neither is. Pass `raw=t' to skip stripping. - Output capped at 200KB by default, hard cap 1MB; `max_bytes' argument lets the caller pick a lower cap. Truncation reported inline. - 4xx / 5xx response codes signal `error' with the code -- the alternative is returning an error page body, which the model would treat as content. `:confirm t' on the tool because every call is a real outbound network request. The tool's description warns that URLs go wherever the user-agent points, including internal networks if that's what the URL names. `tests/test-gptel-tools-web-fetch.el' -- 20 tests across Normal / Boundary / Error. URL validator covers http / https / non-string / empty / non-http schemes. `--effective-max-bytes' covers default / low-clamp / hard-cap / passthrough. Truncate helper covers under-cap / at-cap / over-cap with the marker. HTML stripper runs against real pandoc / w3m (both installed in dev env, neither should mangle simple markup). Orchestrator stubs `cj/gptel-web-fetch--retrieve' via `cl-letf' to cover normal / raw / 4xx / 5xx / oversize / bad-scheme paths. Wired into `cj/gptel-local-tool-features' so gptel exposes the tool on next restart. Note: `call-process-region' invocation flattened to a single `with-temp-buffer' with DELETE=t -- the initial draft nested a second temp buffer and routed output to the inner one, which got killed before `buffer-string' on the outer ran. Test caught it.
Diffstat (limited to 'tests')
-rw-r--r--tests/test-gptel-tools-web-fetch.el158
1 files changed, 158 insertions, 0 deletions
diff --git a/tests/test-gptel-tools-web-fetch.el b/tests/test-gptel-tools-web-fetch.el
new file mode 100644
index 00000000..0206af3f
--- /dev/null
+++ b/tests/test-gptel-tools-web-fetch.el
@@ -0,0 +1,158 @@
+;;; test-gptel-tools-web-fetch.el --- Tests for web_fetch gptel tool -*- lexical-binding: t; -*-
+
+;;; Commentary:
+;; Validators and helpers tested directly. The orchestrator's network
+;; call is stubbed via `cl-letf' on `url-retrieve-synchronously' / the
+;; module's `--retrieve' helper; HTML stripping runs against real
+;; pandoc / w3m (both are installed in this dev environment, and
+;; verifying they don't mangle inputs is the point).
+
+;;; Code:
+
+(require 'ert)
+(require 'cl-lib)
+
+(eval-and-compile
+ (add-to-list 'load-path (expand-file-name "tests" user-emacs-directory))
+ (add-to-list 'load-path (expand-file-name "gptel-tools" user-emacs-directory))
+ (setq load-prefer-newer t)
+ (unless (featurep 'gptel)
+ (defvar gptel-tools nil)
+ (defun gptel-make-tool (&rest _args) nil)
+ (defun gptel-get-tool (&rest _args) nil)
+ (provide 'gptel)))
+
+(require 'web_fetch)
+
+;; ---------- validate-url
+
+(ert-deftest test-gptel-tools-web-fetch-validate-url-http ()
+ "Normal: http URL passes."
+ (should (equal (cj/gptel-web-fetch--validate-url "http://example.com")
+ "http://example.com")))
+
+(ert-deftest test-gptel-tools-web-fetch-validate-url-https ()
+ "Normal: https URL passes."
+ (should (equal (cj/gptel-web-fetch--validate-url "https://example.com/path")
+ "https://example.com/path")))
+
+(ert-deftest test-gptel-tools-web-fetch-validate-url-error-non-string ()
+ "Error: non-string URL signals."
+ (should-error (cj/gptel-web-fetch--validate-url nil))
+ (should-error (cj/gptel-web-fetch--validate-url 42)))
+
+(ert-deftest test-gptel-tools-web-fetch-validate-url-error-empty ()
+ "Error: empty URL signals."
+ (should-error (cj/gptel-web-fetch--validate-url "")))
+
+(ert-deftest test-gptel-tools-web-fetch-validate-url-error-non-http-scheme ()
+ "Error: schemes other than http/https are rejected."
+ (should-error (cj/gptel-web-fetch--validate-url "file:///etc/hostname"))
+ (should-error (cj/gptel-web-fetch--validate-url "ftp://example.com"))
+ (should-error (cj/gptel-web-fetch--validate-url "javascript:alert(1)"))
+ (should-error (cj/gptel-web-fetch--validate-url "example.com"))) ; no scheme
+
+;; ---------- effective-max-bytes
+
+(ert-deftest test-gptel-tools-web-fetch-max-bytes-default-on-nil ()
+ "Boundary: nil falls back to the default cap."
+ (should (= (cj/gptel-web-fetch--effective-max-bytes nil)
+ cj/gptel-web-fetch--default-max-bytes)))
+
+(ert-deftest test-gptel-tools-web-fetch-max-bytes-clamp-low ()
+ "Boundary: zero / negative fall back to the default."
+ (should (= (cj/gptel-web-fetch--effective-max-bytes 0)
+ cj/gptel-web-fetch--default-max-bytes))
+ (should (= (cj/gptel-web-fetch--effective-max-bytes -1)
+ cj/gptel-web-fetch--default-max-bytes)))
+
+(ert-deftest test-gptel-tools-web-fetch-max-bytes-cap-high ()
+ "Boundary: values above the hard cap are clamped."
+ (should (= (cj/gptel-web-fetch--effective-max-bytes (* 10 1024 1024))
+ cj/gptel-web-fetch--hard-max-bytes)))
+
+(ert-deftest test-gptel-tools-web-fetch-max-bytes-normal ()
+ "Normal: a sensible value passes through."
+ (should (= (cj/gptel-web-fetch--effective-max-bytes 50000) 50000)))
+
+;; ---------- truncate
+
+(ert-deftest test-gptel-tools-web-fetch-truncate-under-cap ()
+ "Normal: small input returns unchanged."
+ (should (equal (cj/gptel-web-fetch--truncate "short" 1000) "short")))
+
+(ert-deftest test-gptel-tools-web-fetch-truncate-at-cap ()
+ "Boundary: input exactly at cap returns unchanged."
+ (let ((s (make-string 10 ?x)))
+ (should (equal (cj/gptel-web-fetch--truncate s 10) s))))
+
+(ert-deftest test-gptel-tools-web-fetch-truncate-over-cap ()
+ "Boundary: oversize input is truncated and marked."
+ (let* ((s (make-string 1000 ?x))
+ (out (cj/gptel-web-fetch--truncate s 100)))
+ (should (string-match-p "\\[truncated:" out))
+ (should (string-match-p "1000 bytes total" out))))
+
+;; ---------- html-to-text
+
+(ert-deftest test-gptel-tools-web-fetch-html-to-text-strips-tags ()
+ "Normal: pandoc / w3m strip HTML tags from real markup."
+ (let ((out (cj/gptel-web-fetch--html-to-text
+ "<html><body><h1>Hello</h1><p>World</p></body></html>")))
+ (should (string-match-p "Hello" out))
+ (should (string-match-p "World" out))
+ (should-not (string-match-p "<h1>" out))
+ (should-not (string-match-p "<p>" out))))
+
+(ert-deftest test-gptel-tools-web-fetch-html-to-text-error-when-neither-on-path ()
+ "Error: when neither pandoc nor w3m is on PATH, signals user-error."
+ (cl-letf (((symbol-function 'executable-find) (lambda (_) nil)))
+ (should-error (cj/gptel-web-fetch--html-to-text "<p>x</p>"))))
+
+;; ---------- run (orchestrator)
+
+(ert-deftest test-gptel-tools-web-fetch-run-normal-strips-html ()
+ "Normal: orchestrator returns stripped text by default."
+ (cl-letf (((symbol-function 'cj/gptel-web-fetch--retrieve)
+ (lambda (_url)
+ (cons 200 "<html><body><p>fetched</p></body></html>"))))
+ (let ((out (cj/gptel-web-fetch--run "https://example.com")))
+ (should (string-match-p "fetched" out))
+ (should-not (string-match-p "<p>" out)))))
+
+(ert-deftest test-gptel-tools-web-fetch-run-raw-returns-body-verbatim ()
+ "Normal: raw=t returns the response body without HTML stripping."
+ (cl-letf (((symbol-function 'cj/gptel-web-fetch--retrieve)
+ (lambda (_url)
+ (cons 200 "<html><body><p>raw</p></body></html>"))))
+ (let ((out (cj/gptel-web-fetch--run "https://example.com" t)))
+ (should (string-match-p "<p>raw</p>" out)))))
+
+(ert-deftest test-gptel-tools-web-fetch-run-error-on-4xx ()
+ "Error: HTTP 4xx response signals."
+ (cl-letf (((symbol-function 'cj/gptel-web-fetch--retrieve)
+ (lambda (_url) (cons 404 "not found"))))
+ (should-error (cj/gptel-web-fetch--run "https://example.com"))))
+
+(ert-deftest test-gptel-tools-web-fetch-run-error-on-5xx ()
+ "Error: HTTP 5xx response signals."
+ (cl-letf (((symbol-function 'cj/gptel-web-fetch--retrieve)
+ (lambda (_url) (cons 503 "service unavailable"))))
+ (should-error (cj/gptel-web-fetch--run "https://example.com"))))
+
+(ert-deftest test-gptel-tools-web-fetch-run-truncates-oversized-body ()
+ "Boundary: an oversize body is truncated by the run wrapper."
+ (let ((big (concat "<html><body>"
+ (make-string 1000 ?x)
+ "</body></html>")))
+ (cl-letf (((symbol-function 'cj/gptel-web-fetch--retrieve)
+ (lambda (_url) (cons 200 big))))
+ (let ((out (cj/gptel-web-fetch--run "https://example.com" t 200)))
+ (should (string-match-p "\\[truncated:" out))))))
+
+(ert-deftest test-gptel-tools-web-fetch-run-error-on-bad-scheme ()
+ "Error: non-http URL fails fast at the validator."
+ (should-error (cj/gptel-web-fetch--run "file:///etc/passwd")))
+
+(provide 'test-gptel-tools-web-fetch)
+;;; test-gptel-tools-web-fetch.el ends here