summaryrefslogtreecommitdiff
path: root/modules/lorem-generator.el
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2025-10-26 23:56:03 -0500
committerCraig Jennings <c@cjennings.net>2025-10-26 23:56:03 -0500
commitd0b16101cc7c07b2ebcc141be75f0436ae440348 (patch)
treeabddeccf7bc54d72fbcfcb8f54fc26f01c0d9273 /modules/lorem-generator.el
parentcd64af4642fd54a4d7b7be93bfb317fc64f623a6 (diff)
feat:text-generation: improve and rename lorem generator
Rename `lorem-generator.el` to `lorem-optimum.el` for fun. Enhance text tokenization, Markov chain learning, and text generation functions. Introduce new configuration variables for training files and improve efficiency with vectorized access. Add comprehensive benchmarks and unit tests under `tests/`. This improves performance and lays groundwork for further extensions.
Diffstat (limited to 'modules/lorem-generator.el')
-rw-r--r--modules/lorem-generator.el235
1 files changed, 0 insertions, 235 deletions
diff --git a/modules/lorem-generator.el b/modules/lorem-generator.el
deleted file mode 100644
index 6f0520c6..00000000
--- a/modules/lorem-generator.el
+++ /dev/null
@@ -1,235 +0,0 @@
-;;; lorem-generator.el --- Fake Latin Text Generator -*- coding: utf-8; lexical-binding: t; -*-
-;;
-;; Author: Craig Jennings
-;; Version: 0.5
-;; Package-Requires: ((emacs "26.1") (cl-lib "0.6"))
-;; Keywords: text, lorem-ipsum, dummy, filler, markov
-;; URL: https://github.com/yourname/cj-lipsum
-
-;;; Commentary:
-;; Generate pseudo-Latin placeholder text using a simple order-two
-;; Markov chain. You can train the chain on region, buffer, or file.
-;; By default, it learns from a bundled Latin wordlist, which you can
-;; change via customization.
-;;
-;; Interactive commands:
-;; M-x cj/lipsum – Return N words
-;; M-x cj/lipsum-insert – Insert N words at point
-;; M-x cj/lipsum-title – Generate a pseudo-Latin heading
-;; M-x cj/lipsum-paragraphs – Insert multiple paragraphs
-;; M-x cj/lipsum-learn-* – Train the chain
-;; M-x cj/lipsum-reset – Clear chain
-
-;;; Code:
-
-(require 'cl-lib)
-
-(cl-defstruct (cj/markov-chain
- (:constructor cj/markov-chain-create))
- "An order-two Markov chain."
- (map (make-hash-table :test 'equal))
- (keys nil))
-
-(defun cj/markov-tokenize (text)
- "Split TEXT into tokens: words and punctuation separately."
- (let ((case-fold-search nil))
- (split-string text "\\b" t "[[:space:]\n]+")))
-
-(defun cj/markov-learn (chain text)
- "Add TEXT into the Markov CHAIN with tokenized input."
- (let* ((words (cj/markov-tokenize text))
- (len (length words)))
- (cl-loop for i from 0 to (- len 3)
- for a = (nth i words)
- for b = (nth (1+ i) words)
- for c = (nth (+ i 2) words)
- do (let* ((bigram (list a b))
- (nexts (gethash bigram (cj/markov-chain-map chain))))
- (puthash bigram (cons c nexts)
- (cj/markov-chain-map chain)))))
- (setf (cj/markov-chain-keys chain)
- (cl-loop for k being the hash-keys of (cj/markov-chain-map chain)
- collect k)))
-
-(defun cj/markov-fix-capitalization (sentence)
- "Capitalize the first word and the first word after .!? in SENTENCE."
- (let* ((tokens (split-string sentence "\\b" t)))
- (cl-loop with capitalize-next = t
- for i from 0 below (length tokens)
- for tok = (nth i tokens)
- do (when (and capitalize-next (string-match-p "^[[:alpha:]]" tok))
- (setf (nth i tokens)
- (concat (upcase (substring tok 0 1))
- (substring tok 1)))
- (setq capitalize-next nil))
- do (when (string-match-p "[.!?]" tok) ; <-- Changed: removed $ anchor
- (setq capitalize-next t)))
- (mapconcat #'identity tokens "")))
-
-(defun cj/markov-join-tokens (tokens)
- "Join TOKENS into a sentence with proper spacing/punctuation."
- (let ((sentence "") (need-space nil))
- (dolist (tok tokens)
- (cond
- ;; punctuation attaches directly
- ((string-match-p "^[[:punct:]]+$" tok)
- (setq sentence (concat sentence tok))
- (setq need-space t))
- ;; word
- (t
- (when need-space
- (setq sentence (concat sentence " ")))
- (setq sentence (concat sentence tok))
- (setq need-space t))))
- ;; fix capitalization of first word only
- (when (string-match "\\`\\([[:alpha:]]\\)" sentence)
- (setq sentence
- (replace-match (upcase (match-string 1 sentence))
- nil nil sentence)))
- ;; ensure it ends with .!?
- (unless (string-match-p "[.!?]$" sentence)
- (setq sentence (concat (replace-regexp-in-string "[[:punct:]]+$" "" sentence) ".")))
- (setq sentence (cj/markov-fix-capitalization sentence))
- sentence))
-
-(defun cj/markov-generate (chain n &optional start)
- "Generate a sentence of N tokens from CHAIN."
- (when (cj/markov-chain-keys chain)
- (let* ((state (or (and start
- (gethash start (cj/markov-chain-map chain))
- start)
- (cj/markov-random-key chain)))
- (w1 (car state))
- (w2 (cadr state))
- (tokens (list w1 w2)))
- (dotimes (_ (- n 2))
- (let ((next (cj/markov-next-word chain state)))
- (if next
- (setq tokens (append tokens (list next))
- state (list w2 next)
- w1 w2
- w2 next)
- (setq state (cj/markov-random-key chain)
- w1 (car state)
- w2 (cadr state)
- tokens (append tokens (list w1 w2))))))
- (cj/markov-join-tokens tokens))))
-
-(defun cj/markov-random-key (chain)
- (nth (random (length (cj/markov-chain-keys chain)))
- (cj/markov-chain-keys chain)))
-
-(defun cj/markov-next-word (chain bigram)
- (let ((candidates (gethash bigram (cj/markov-chain-map chain))))
- (when candidates
- (nth (random (length candidates)) candidates))))
-
-(defvar cj/lipsum-chain (cj/markov-chain-create)
- "Global Markov chain for lipsum generation.")
-
-(defun cj/lipsum-reset ()
- "Reset the global lipsum Markov chain."
- (interactive)
- (setq cj/lipsum-chain (cj/markov-chain-create))
- (message "cj/lipsum-chain reset."))
-
-(defun cj/lipsum-learn-region (beg end)
- "Learn text from region."
- (interactive "r")
- (cj/markov-learn cj/lipsum-chain (buffer-substring-no-properties beg end))
- (message "Learned from region."))
-
-(defun cj/lipsum-learn-buffer ()
- "Learn from entire buffer."
- (interactive)
- (cj/markov-learn cj/lipsum-chain
- (buffer-substring-no-properties (point-min) (point-max)))
- (message "Learned from buffer."))
-
-(defun cj/lipsum-learn-file (file)
- "Learn from FILE containing plain text."
- (interactive "fTrain from file: ")
- (with-temp-buffer
- (insert-file-contents file)
- (cj/markov-learn cj/lipsum-chain (buffer-string)))
- (message "Learned from file: %s" file))
-
-(defun cj/lipsum (n)
- "Return N words of lorem ipsum."
- (cj/markov-generate cj/lipsum-chain n '("Lorem" "ipsum")))
-
-(defun cj/lipsum-insert (n)
- "Insert N words of lorem ipsum at point."
- (interactive "nNumber of words: ")
- (insert (cj/lipsum n)))
-
-;;; Title generation
-
-(defconst cj/lipsum-title-min 3)
-(defconst cj/lipsum-title-max 8)
-(defconst cj/lipsum-title-small 3)
-
-(defun cj/lipsum-title ()
- "Generate a pseudo-Latin title."
- (interactive)
- (let* ((n (+ cj/lipsum-title-min
- (random (1+ (- cj/lipsum-title-max cj/lipsum-title-min)))))
- (words
- (cl-loop with state = (cj/markov-random-key cj/lipsum-chain)
- for i from 0 below n
- for w = (car state)
- do (setq state (list (cadr state)
- (or (cj/markov-next-word cj/lipsum-chain state)
- (cadr (cj/markov-random-key cj/lipsum-chain))))))
- collect (replace-regexp-in-string "^[[:punct:]]+\\|[[:punct:]]+$" "" w))))
- (setq words (cl-remove-if #'string-empty-p words))
- (mapconcat
- (lambda (word idx)
- (if (or (zerop idx) (> (length word) cj/lipsum-title-small))
- (capitalize word)
- word))
- words " "))
-
-;;; Paragraphs
-
-(defun cj/lipsum-paragraphs (count &optional min max)
- "Insert COUNT paragraphs of lipsum.
-
-Each paragraph has a random length between MIN and MAX words.
-Defaults: MIN=30, MAX=80."
- (interactive "nNumber of paragraphs: ")
- (let ((min (or min 30))
- (max (or max 80)))
- (dotimes (_ count)
- (let ((len (+ min (random (1+ (- max min))))))
- (insert (cj/lipsum len) "\n\n")))))
-
-;;; Customization
-
-(defgroup cj-lipsum nil
- "Pseudo-Latin lorem ipsum text generator."
- :prefix "cj/lipsum-"
- :group 'text)
-
-(defcustom cj/lipsum-default-file
- (expand-file-name "latin.txt"
- (file-name-directory (or load-file-name buffer-file-name)))
- "Default training file for cj-lipsum.
-
-This should be a plain UTF-8 text file with hundreds of Latin words
-or sentences. By default it points to the bundled `latin.txt`."
- :type 'file
- :group 'cj-lipsum)
-
-;;; Initialization: train on default file
-(defun cj/lipsum--init ()
- "Initialize cj-lipsum by learning from `cj/lipsum-default-file`."
- (when (and cj/lipsum-default-file
- (file-readable-p cj/lipsum-default-file))
- (cj/lipsum-reset)
- (cj/lipsum-learn-file cj/lipsum-default-file)))
-
-(cj/lipsum--init)
-
-(provide 'lorem-generator)
-;;; lorem-generator.el ends here.