aboutsummaryrefslogtreecommitdiff
path: root/gloss-fetch.el
diff options
context:
space:
mode:
Diffstat (limited to 'gloss-fetch.el')
-rw-r--r--gloss-fetch.el254
1 files changed, 253 insertions, 1 deletions
diff --git a/gloss-fetch.el b/gloss-fetch.el
index e4477f2..a5aa84d 100644
--- a/gloss-fetch.el
+++ b/gloss-fetch.el
@@ -29,7 +29,259 @@
;;; Code:
-;; Implementation pending. Track via todo.org.
+(require 'json)
+(require 'subr-x)
+(require 'url)
+
+(defcustom gloss-fetch-sources '(wiktionary)
+ "Ordered list of source symbols consulted by `gloss-fetch-definitions'.
+Each symbol must be a key in `gloss-fetch--sources'. Symbols not
+registered there are silently skipped (forward-compat for v2+ sources)."
+ :type '(repeat symbol)
+ :group 'gloss)
+
+(defcustom gloss-fetch-timeout 5
+ "Seconds before a single online fetch is treated as unreachable."
+ :type 'integer
+ :group 'gloss)
+
+(defconst gloss-fetch--wiktionary-url
+ "https://en.wiktionary.org/api/rest_v1/page/definition/%s"
+ "URL template for the Wiktionary REST definition endpoint.
+The `%s' placeholder receives the URL-encoded term.")
+
+(defvar gloss-fetch--libxml-checked nil
+ "Non-nil once the libxml availability probe has run for this session.")
+
+(defvar gloss-fetch--libxml-disabled nil
+ "Non-nil when libxml was probed and found absent.
+While non-nil, every call to `gloss-fetch-definitions' signals
+`user-error' rather than touching the network.")
+
+(defun gloss-fetch--debug (fmt &rest args)
+ "Append a formatted line to *gloss-debug* when `gloss-debug' is non-nil.
+FMT and ARGS are passed to `format'."
+ (when (and (boundp 'gloss-debug) gloss-debug)
+ (with-current-buffer (get-buffer-create "*gloss-debug*")
+ (goto-char (point-max))
+ (insert (format-time-string "%Y-%m-%d %H:%M:%S "))
+ (insert (apply #'format fmt args))
+ (insert "\n"))))
+
+(defun gloss-fetch--libxml-available-p ()
+ "Return non-nil when `libxml-parse-html-region' is bound and functional."
+ (and (fboundp 'libxml-parse-html-region)
+ (with-temp-buffer
+ (insert "<p>x</p>")
+ (condition-case _err
+ (and (libxml-parse-html-region (point-min) (point-max)) t)
+ (error nil)))))
+
+(defun gloss-fetch--ensure-libxml ()
+ "Probe libxml on first call; disable online fetching for the session if absent.
+Signals `user-error' when libxml is unavailable."
+ (unless gloss-fetch--libxml-checked
+ (setq gloss-fetch--libxml-checked t)
+ (unless (gloss-fetch--libxml-available-p)
+ (setq gloss-fetch--libxml-disabled t)))
+ (when gloss-fetch--libxml-disabled
+ (user-error
+ "Online fetch requires Emacs built with libxml2; manual add still works")))
+
+(defun gloss-fetch--strip-html (html)
+ "Return plain-text contents of HTML, with whitespace collapsed and trimmed.
+Returns nil if `libxml-parse-html-region' raises an error so the caller
+can drop that sense. Empty input returns the empty string."
+ (when (stringp html)
+ (if (string-empty-p html)
+ ""
+ (condition-case err
+ (with-temp-buffer
+ (insert html)
+ (let* ((tree (libxml-parse-html-region (point-min) (point-max)))
+ (text (gloss-fetch--dom-text tree)))
+ (gloss-fetch--collapse-whitespace text)))
+ (error
+ (gloss-fetch--debug "[fetch] strip-html error: %s"
+ (error-message-string err))
+ nil)))))
+
+(defun gloss-fetch--dom-text (node)
+ "Return the concatenated text content of NODE, an `libxml' DOM node."
+ (cond
+ ((null node) "")
+ ((stringp node) node)
+ ((listp node)
+ ;; Node shape: (TAG ATTRS . CHILDREN). Skip TAG and ATTRS.
+ (mapconcat #'gloss-fetch--dom-text (cddr node) ""))
+ (t "")))
+
+(defun gloss-fetch--collapse-whitespace (s)
+ "Collapse runs of whitespace in S to single spaces and trim."
+ (string-trim
+ (replace-regexp-in-string "[ \t\n\r]+" " " s)))
+
+(defun gloss-fetch--http-status (buf)
+ "Return the HTTP status code as an integer from response buffer BUF, or nil."
+ (with-current-buffer buf
+ (save-excursion
+ (goto-char (point-min))
+ (when (re-search-forward "^HTTP/[0-9.]+[ \t]+\\([0-9]+\\)" nil t)
+ (string-to-number (match-string 1))))))
+
+(defun gloss-fetch--http-body (buf)
+ "Return the body of response buffer BUF as a UTF-8 decoded string.
+The body is everything after the first blank line (end of headers)."
+ (with-current-buffer buf
+ (save-excursion
+ (goto-char (point-min))
+ (if (re-search-forward "^\r?$" nil t)
+ (let ((raw (buffer-substring-no-properties (1+ (point)) (point-max))))
+ (decode-coding-string raw 'utf-8))
+ ""))))
+
+(defun gloss-fetch--url-encode (term)
+ "Return TERM URL-encoded for use as a Wiktionary REST path segment."
+ (url-hexify-string term))
+
+(defun gloss-fetch--retrieve (url)
+ "GET URL synchronously honoring `gloss-fetch-timeout'.
+Returns the response buffer on success. Returns the symbol
+`:unreachable' when the call returns nil (timeout) or signals an
+error (DNS, connection refused)."
+ (condition-case err
+ (let ((url-request-method "GET")
+ (buf (url-retrieve-synchronously url t t gloss-fetch-timeout)))
+ (or buf :unreachable))
+ (error
+ (gloss-fetch--debug "[fetch] retrieve error: %s"
+ (error-message-string err))
+ :unreachable)))
+
+(defun gloss-fetch--classify-status (code)
+ "Map an HTTP status CODE to a per-source status symbol."
+ (cond
+ ((null code) :server-error)
+ ((and (>= code 200) (< code 300)) :ok)
+ ((= code 404) :no-defs)
+ ((= code 429) :rate-limited)
+ ((and (>= code 500) (< code 600)) :server-error)
+ (t :server-error)))
+
+(defun gloss-fetch--parse-json (body)
+ "Parse JSON BODY into a plist-shaped value.
+Returns the parsed object or signals an error if BODY is malformed."
+ (let ((json-object-type 'alist)
+ (json-array-type 'list)
+ (json-key-type 'string)
+ (json-false nil)
+ (json-null nil))
+ (json-read-from-string body)))
+
+(defun gloss-fetch--wiktionary-extract-defs (parsed)
+ "Return a list of definition plists from PARSED Wiktionary JSON.
+Only English (`en') entries contribute. Each yielded plist has
+:source `wiktionary' and :text (HTML-stripped, whitespace-collapsed).
+Empty stripped strings and senses where strip fails are dropped."
+ (let ((english (cdr (assoc "en" parsed)))
+ (defs nil))
+ (dolist (section english)
+ (let ((senses (cdr (assoc "definitions" section))))
+ (dolist (sense senses)
+ (let* ((html (cdr (assoc "definition" sense)))
+ (text (and (stringp html)
+ (gloss-fetch--strip-html html))))
+ (when (and text (not (string-empty-p text)))
+ (push (list :source 'wiktionary :text text) defs))))))
+ (nreverse defs)))
+
+(defun gloss-fetch--fetch-wiktionary (term)
+ "Fetch TERM from Wiktionary; return a per-source result plist.
+The returned plist has :source `wiktionary', :status, and either
+:defs (on :ok) or :reason (on every other status)."
+ (let* ((url (format gloss-fetch--wiktionary-url
+ (gloss-fetch--url-encode term)))
+ (buf-or-status (gloss-fetch--retrieve url)))
+ (cond
+ ((eq buf-or-status :unreachable)
+ (list :source 'wiktionary :status :unreachable
+ :reason (format "timeout (%ss) or unreachable" gloss-fetch-timeout)))
+ (t
+ (unwind-protect
+ (let* ((code (gloss-fetch--http-status buf-or-status))
+ (status (gloss-fetch--classify-status code)))
+ (gloss-fetch--debug "[fetch:wiktionary] GET %s -> %S" url code)
+ (cond
+ ((eq status :ok)
+ (gloss-fetch--wiktionary-build-ok-result buf-or-status code))
+ (t
+ (list :source 'wiktionary :status status
+ :reason (format "HTTP %s" code)))))
+ (when (buffer-live-p buf-or-status)
+ (kill-buffer buf-or-status)))))))
+
+(defun gloss-fetch--wiktionary-build-ok-result (buf code)
+ "Inspect a 200 response BUF and return a per-source result plist.
+CODE is the HTTP status (passed through to :reason on failure paths)."
+ (let ((body (gloss-fetch--http-body buf)))
+ (condition-case err
+ (let* ((parsed (gloss-fetch--parse-json body))
+ (defs (gloss-fetch--wiktionary-extract-defs parsed)))
+ (if defs
+ (list :source 'wiktionary :status :ok :defs defs)
+ (list :source 'wiktionary :status :no-defs
+ :reason (format "HTTP %s, no English senses" code))))
+ (error
+ (list :source 'wiktionary :status :server-error
+ :reason (format "malformed JSON: %s"
+ (error-message-string err)))))))
+
+(defvar gloss-fetch--sources
+ `((wiktionary . ,#'gloss-fetch--fetch-wiktionary))
+ "Alist mapping source symbol to a fetcher function.
+Each fetcher accepts TERM and returns a per-source result plist of the
+shape (:source SYM :status STATUS [:defs DEFS] [:reason STRING]).")
+
+(defun gloss-fetch--collect (term)
+ "Walk every entry of `gloss-fetch-sources' that maps to a fetcher.
+Return the per-source results in walk order. Symbols not registered
+in `gloss-fetch--sources' are silently skipped."
+ (let (results)
+ (dolist (sym gloss-fetch-sources)
+ (let ((fetcher (cdr (assq sym gloss-fetch--sources))))
+ (when fetcher
+ (push (funcall fetcher term) results))))
+ (nreverse results)))
+
+(defun gloss-fetch--rollup (per-source)
+ "Roll up PER-SOURCE results into the user-facing response shape.
+Returns (:ok DEFS) when any source returned :ok with non-empty :defs.
+Otherwise returns (:empty :no-defs (...) :failed (...))."
+ (let (ok-defs no-defs failed)
+ (dolist (entry per-source)
+ (let ((sym (plist-get entry :source))
+ (status (plist-get entry :status)))
+ (cond
+ ((and (eq status :ok) (plist-get entry :defs))
+ (setq ok-defs (append ok-defs (plist-get entry :defs))))
+ ((eq status :no-defs)
+ (push sym no-defs))
+ ((memq status '(:unreachable :server-error :rate-limited))
+ (push sym failed)))))
+ (if ok-defs
+ (list :ok ok-defs)
+ (list :empty
+ :no-defs (nreverse no-defs)
+ :failed (nreverse failed)))))
+
+(defun gloss-fetch-definitions (term)
+ "Fetch candidate definitions for TERM from each source in `gloss-fetch-sources'.
+Returns (:ok DEFS) when any source returns at least one definition,
+otherwise (:empty :no-defs (SYM ...) :failed (SYM ...)). Signals
+`user-error' the first time it runs in a session without libxml, and
+on every subsequent call in that session."
+ (gloss-fetch--ensure-libxml)
+ (gloss-fetch--rollup (gloss-fetch--collect term)))
(provide 'gloss-fetch)
;;; gloss-fetch.el ends here