diff options
| author | Craig Jennings <c@cjennings.net> | 2026-05-16 05:17:21 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-05-16 05:17:21 -0500 |
| commit | 99d93203a867294addf4927ceec5644b9d3bf322 (patch) | |
| tree | f579382868960c77cd38eb350fba4dfdaab13c21 /tests | |
| parent | ceeae9b5e2625e23e6e3792d06a6c8122a36d18b (diff) | |
| download | dotemacs-99d93203a867294addf4927ceec5644b9d3bf322.tar.gz dotemacs-99d93203a867294addf4927ceec5644b9d3bf322.zip | |
feat(gptel-tools): wire web_fetch as a local tool
Fourth ADOPT entry from `docs/design/gptel-tools-shortlist.org'.
Lets gptel pull a URL into the conversation so the model can read
docs / current API shapes / etc. without me copy-pasting.
Shape:
- URL must be `http://' or `https://' (file://, ftp://, javascript:,
scheme-less, etc. are rejected at the validator).
- HTML responses go through `pandoc -f html -t plain' so the model
gets a reading shape that isn't full of markup; falls back to
`w3m -dump -T text/html' if pandoc isn't on PATH; signals
`user-error' if neither is. Pass `raw=t' to skip stripping.
- Output capped at 200KB by default, hard cap 1MB; `max_bytes'
argument lets the caller pick a lower cap. Truncation reported
inline.
- 4xx / 5xx response codes signal `error' with the code -- the
alternative is returning an error page body, which the model
would treat as content.
`:confirm t' on the tool because every call is a real outbound
network request. The tool's description warns that URLs go
wherever the user-agent points, including internal networks if
that's what the URL names.
`tests/test-gptel-tools-web-fetch.el' -- 20 tests across Normal /
Boundary / Error. URL validator covers http / https / non-string
/ empty / non-http schemes. `--effective-max-bytes' covers default
/ low-clamp / hard-cap / passthrough. Truncate helper covers
under-cap / at-cap / over-cap with the marker. HTML stripper runs
against real pandoc / w3m (both installed in dev env, neither
should mangle simple markup). Orchestrator stubs
`cj/gptel-web-fetch--retrieve' via `cl-letf' to cover normal /
raw / 4xx / 5xx / oversize / bad-scheme paths.
Wired into `cj/gptel-local-tool-features' so gptel exposes the
tool on next restart.
Note: `call-process-region' invocation flattened to a single
`with-temp-buffer' with DELETE=t -- the initial draft nested a
second temp buffer and routed output to the inner one, which got
killed before `buffer-string' on the outer ran. Test caught it.
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/test-gptel-tools-web-fetch.el | 158 |
1 files changed, 158 insertions, 0 deletions
diff --git a/tests/test-gptel-tools-web-fetch.el b/tests/test-gptel-tools-web-fetch.el new file mode 100644 index 00000000..0206af3f --- /dev/null +++ b/tests/test-gptel-tools-web-fetch.el @@ -0,0 +1,158 @@ +;;; test-gptel-tools-web-fetch.el --- Tests for web_fetch gptel tool -*- lexical-binding: t; -*- + +;;; Commentary: +;; Validators and helpers tested directly. The orchestrator's network +;; call is stubbed via `cl-letf' on `url-retrieve-synchronously' / the +;; module's `--retrieve' helper; HTML stripping runs against real +;; pandoc / w3m (both are installed in this dev environment, and +;; verifying they don't mangle inputs is the point). + +;;; Code: + +(require 'ert) +(require 'cl-lib) + +(eval-and-compile + (add-to-list 'load-path (expand-file-name "tests" user-emacs-directory)) + (add-to-list 'load-path (expand-file-name "gptel-tools" user-emacs-directory)) + (setq load-prefer-newer t) + (unless (featurep 'gptel) + (defvar gptel-tools nil) + (defun gptel-make-tool (&rest _args) nil) + (defun gptel-get-tool (&rest _args) nil) + (provide 'gptel))) + +(require 'web_fetch) + +;; ---------- validate-url + +(ert-deftest test-gptel-tools-web-fetch-validate-url-http () + "Normal: http URL passes." + (should (equal (cj/gptel-web-fetch--validate-url "http://example.com") + "http://example.com"))) + +(ert-deftest test-gptel-tools-web-fetch-validate-url-https () + "Normal: https URL passes." + (should (equal (cj/gptel-web-fetch--validate-url "https://example.com/path") + "https://example.com/path"))) + +(ert-deftest test-gptel-tools-web-fetch-validate-url-error-non-string () + "Error: non-string URL signals." + (should-error (cj/gptel-web-fetch--validate-url nil)) + (should-error (cj/gptel-web-fetch--validate-url 42))) + +(ert-deftest test-gptel-tools-web-fetch-validate-url-error-empty () + "Error: empty URL signals." + (should-error (cj/gptel-web-fetch--validate-url ""))) + +(ert-deftest test-gptel-tools-web-fetch-validate-url-error-non-http-scheme () + "Error: schemes other than http/https are rejected." + (should-error (cj/gptel-web-fetch--validate-url "file:///etc/hostname")) + (should-error (cj/gptel-web-fetch--validate-url "ftp://example.com")) + (should-error (cj/gptel-web-fetch--validate-url "javascript:alert(1)")) + (should-error (cj/gptel-web-fetch--validate-url "example.com"))) ; no scheme + +;; ---------- effective-max-bytes + +(ert-deftest test-gptel-tools-web-fetch-max-bytes-default-on-nil () + "Boundary: nil falls back to the default cap." + (should (= (cj/gptel-web-fetch--effective-max-bytes nil) + cj/gptel-web-fetch--default-max-bytes))) + +(ert-deftest test-gptel-tools-web-fetch-max-bytes-clamp-low () + "Boundary: zero / negative fall back to the default." + (should (= (cj/gptel-web-fetch--effective-max-bytes 0) + cj/gptel-web-fetch--default-max-bytes)) + (should (= (cj/gptel-web-fetch--effective-max-bytes -1) + cj/gptel-web-fetch--default-max-bytes))) + +(ert-deftest test-gptel-tools-web-fetch-max-bytes-cap-high () + "Boundary: values above the hard cap are clamped." + (should (= (cj/gptel-web-fetch--effective-max-bytes (* 10 1024 1024)) + cj/gptel-web-fetch--hard-max-bytes))) + +(ert-deftest test-gptel-tools-web-fetch-max-bytes-normal () + "Normal: a sensible value passes through." + (should (= (cj/gptel-web-fetch--effective-max-bytes 50000) 50000))) + +;; ---------- truncate + +(ert-deftest test-gptel-tools-web-fetch-truncate-under-cap () + "Normal: small input returns unchanged." + (should (equal (cj/gptel-web-fetch--truncate "short" 1000) "short"))) + +(ert-deftest test-gptel-tools-web-fetch-truncate-at-cap () + "Boundary: input exactly at cap returns unchanged." + (let ((s (make-string 10 ?x))) + (should (equal (cj/gptel-web-fetch--truncate s 10) s)))) + +(ert-deftest test-gptel-tools-web-fetch-truncate-over-cap () + "Boundary: oversize input is truncated and marked." + (let* ((s (make-string 1000 ?x)) + (out (cj/gptel-web-fetch--truncate s 100))) + (should (string-match-p "\\[truncated:" out)) + (should (string-match-p "1000 bytes total" out)))) + +;; ---------- html-to-text + +(ert-deftest test-gptel-tools-web-fetch-html-to-text-strips-tags () + "Normal: pandoc / w3m strip HTML tags from real markup." + (let ((out (cj/gptel-web-fetch--html-to-text + "<html><body><h1>Hello</h1><p>World</p></body></html>"))) + (should (string-match-p "Hello" out)) + (should (string-match-p "World" out)) + (should-not (string-match-p "<h1>" out)) + (should-not (string-match-p "<p>" out)))) + +(ert-deftest test-gptel-tools-web-fetch-html-to-text-error-when-neither-on-path () + "Error: when neither pandoc nor w3m is on PATH, signals user-error." + (cl-letf (((symbol-function 'executable-find) (lambda (_) nil))) + (should-error (cj/gptel-web-fetch--html-to-text "<p>x</p>")))) + +;; ---------- run (orchestrator) + +(ert-deftest test-gptel-tools-web-fetch-run-normal-strips-html () + "Normal: orchestrator returns stripped text by default." + (cl-letf (((symbol-function 'cj/gptel-web-fetch--retrieve) + (lambda (_url) + (cons 200 "<html><body><p>fetched</p></body></html>")))) + (let ((out (cj/gptel-web-fetch--run "https://example.com"))) + (should (string-match-p "fetched" out)) + (should-not (string-match-p "<p>" out))))) + +(ert-deftest test-gptel-tools-web-fetch-run-raw-returns-body-verbatim () + "Normal: raw=t returns the response body without HTML stripping." + (cl-letf (((symbol-function 'cj/gptel-web-fetch--retrieve) + (lambda (_url) + (cons 200 "<html><body><p>raw</p></body></html>")))) + (let ((out (cj/gptel-web-fetch--run "https://example.com" t))) + (should (string-match-p "<p>raw</p>" out))))) + +(ert-deftest test-gptel-tools-web-fetch-run-error-on-4xx () + "Error: HTTP 4xx response signals." + (cl-letf (((symbol-function 'cj/gptel-web-fetch--retrieve) + (lambda (_url) (cons 404 "not found")))) + (should-error (cj/gptel-web-fetch--run "https://example.com")))) + +(ert-deftest test-gptel-tools-web-fetch-run-error-on-5xx () + "Error: HTTP 5xx response signals." + (cl-letf (((symbol-function 'cj/gptel-web-fetch--retrieve) + (lambda (_url) (cons 503 "service unavailable")))) + (should-error (cj/gptel-web-fetch--run "https://example.com")))) + +(ert-deftest test-gptel-tools-web-fetch-run-truncates-oversized-body () + "Boundary: an oversize body is truncated by the run wrapper." + (let ((big (concat "<html><body>" + (make-string 1000 ?x) + "</body></html>"))) + (cl-letf (((symbol-function 'cj/gptel-web-fetch--retrieve) + (lambda (_url) (cons 200 big)))) + (let ((out (cj/gptel-web-fetch--run "https://example.com" t 200))) + (should (string-match-p "\\[truncated:" out)))))) + +(ert-deftest test-gptel-tools-web-fetch-run-error-on-bad-scheme () + "Error: non-http URL fails fast at the validator." + (should-error (cj/gptel-web-fetch--run "file:///etc/passwd"))) + +(provide 'test-gptel-tools-web-fetch) +;;; test-gptel-tools-web-fetch.el ends here |
