diff options
Diffstat (limited to 'tests/test-lorem-optimum.el')
| -rw-r--r-- | tests/test-lorem-optimum.el | 242 |
1 files changed, 242 insertions, 0 deletions
diff --git a/tests/test-lorem-optimum.el b/tests/test-lorem-optimum.el new file mode 100644 index 00000000..ca2e52f4 --- /dev/null +++ b/tests/test-lorem-optimum.el @@ -0,0 +1,242 @@ +;;; test-lorem-optimum.el --- Tests for lorem-optimum.el -*- lexical-binding: t; -*- + +;;; Commentary: +;; Unit tests for lorem-optimum.el Markov chain text generation. +;; +;; Tests cover: +;; - Tokenization +;; - Learning and chain building +;; - Text generation +;; - Capitalization fixing +;; - Token joining + +;;; Code: + +(require 'ert) +(require 'testutil-general) + +;; Add modules directory to load path +(add-to-list 'load-path (expand-file-name "modules" user-emacs-directory)) + +;; Load the module +(require 'lorem-optimum) + +;;; Test Helpers + +(defun test-chain () + "Create a fresh test chain." + (cj/markov-chain-create)) + +(defun test-learn (text) + "Create a chain and learn TEXT." + (let ((chain (test-chain))) + (cj/markov-learn chain text) + chain)) + +;;; Tokenization Tests + +(ert-deftest test-tokenize-simple () + "Should tokenize simple words." + (let ((result (cj/markov-tokenize "hello world"))) + (should (equal result '("hello" "world"))))) + +(ert-deftest test-tokenize-with-punctuation () + "Should separate punctuation as tokens." + (let ((result (cj/markov-tokenize "Hello, world!"))) + (should (equal result '("Hello" "," "world" "!"))))) + +(ert-deftest test-tokenize-multiple-spaces () + "Should handle multiple spaces." + (let ((result (cj/markov-tokenize "hello world"))) + (should (equal result '("hello" "world"))))) + +(ert-deftest test-tokenize-newlines () + "Should handle newlines as whitespace." + (let ((result (cj/markov-tokenize "hello\nworld"))) + (should (equal result '("hello" "world"))))) + +(ert-deftest test-tokenize-mixed-punctuation () + "Should tokenize complex punctuation." + (let ((result (cj/markov-tokenize "one, two; three."))) + (should (equal result '("one" "," "two" ";" "three" "."))))) + +(ert-deftest test-tokenize-empty () + "Should handle empty string." + (let ((result (cj/markov-tokenize ""))) + (should (null result)))) + +(ert-deftest test-tokenize-whitespace-only () + "Should return nil for whitespace only." + (let ((result (cj/markov-tokenize " \n\t "))) + (should (null result)))) + +;;; Markov Learn Tests + +(ert-deftest test-learn-basic () + "Should learn simple text." + (let ((chain (test-learn "one two three four"))) + (should (cj/markov-chain-p chain)) + (should (> (hash-table-count (cj/markov-chain-map chain)) 0)))) + +(ert-deftest test-learn-creates-bigrams () + "Should create bigram mappings." + (let ((chain (test-learn "one two three"))) + (should (gethash '("one" "two") (cj/markov-chain-map chain))))) + +(ert-deftest test-learn-stores-following-word () + "Should store following word for bigram." + (let ((chain (test-learn "one two three"))) + (should (member "three" (gethash '("one" "two") (cj/markov-chain-map chain)))))) + +(ert-deftest test-learn-builds-keys-list () + "Should build keys list lazily when accessed." + (let ((chain (test-learn "one two three four"))) + ;; Keys are built lazily, so initially nil + (should (null (cj/markov-chain-keys chain))) + ;; After calling random-key, keys should be built + (cj/markov-random-key chain) + (should (> (length (cj/markov-chain-keys chain)) 0)))) + +(ert-deftest test-learn-repeated-patterns () + "Should accumulate repeated patterns." + (let ((chain (test-learn "one two three one two four"))) + (let ((nexts (gethash '("one" "two") (cj/markov-chain-map chain)))) + (should (= (length nexts) 2)) + (should (member "three" nexts)) + (should (member "four" nexts))))) + +(ert-deftest test-learn-incremental () + "Should support incremental learning." + (let ((chain (test-chain))) + (cj/markov-learn chain "one two three") + (cj/markov-learn chain "four five six") + (should (> (hash-table-count (cj/markov-chain-map chain)) 0)))) + +;;; Token Joining Tests + +(ert-deftest test-join-simple-words () + "Should join words with spaces." + (let ((result (cj/markov-join-tokens '("hello" "world")))) + (should (string-match-p "^Hello world" result)))) + +(ert-deftest test-join-with-punctuation () + "Should attach punctuation without spaces." + (let ((result (cj/markov-join-tokens '("hello" "," "world")))) + (should (string-match-p "Hello, world" result)))) + +(ert-deftest test-join-capitalizes-first () + "Should capitalize first word." + (let ((result (cj/markov-join-tokens '("hello" "world")))) + (should (string-match-p "^H" result)))) + +(ert-deftest test-join-adds-period () + "Should add period if missing." + (let ((result (cj/markov-join-tokens '("hello" "world")))) + (should (string-match-p "\\.$" result)))) + +(ert-deftest test-join-preserves-existing-period () + "Should not double-add period." + (let ((result (cj/markov-join-tokens '("hello" "world" ".")))) + (should (string-match-p "\\.$" result)) + (should-not (string-match-p "\\.\\.$" result)))) + +(ert-deftest test-join-empty-tokens () + "Should handle empty token list." + (let ((result (cj/markov-join-tokens '()))) + (should (equal result ".")))) + +;;; Capitalization Tests + +(ert-deftest test-capitalize-first-word () + "Should capitalize first word." + (let ((result (cj/markov-fix-capitalization "hello world"))) + (should (string-match-p "^Hello" result)))) + +(ert-deftest test-capitalize-after-period () + "Should capitalize after period." + (let ((result (cj/markov-fix-capitalization "hello. world"))) + (should (string-match-p "Hello\\. World" result)))) + +(ert-deftest test-capitalize-after-exclamation () + "Should capitalize after exclamation." + (let ((result (cj/markov-fix-capitalization "hello! world"))) + (should (string-match-p "Hello! World" result)))) + +(ert-deftest test-capitalize-after-question () + "Should capitalize after question mark." + (let ((result (cj/markov-fix-capitalization "hello? world"))) + (should (string-match-p "Hello\\? World" result)))) + +(ert-deftest test-capitalize-skip-non-alpha () + "Should skip non-alphabetic tokens." + (let ((result (cj/markov-fix-capitalization "hello. 123 world"))) + (should (string-match-p "123" result)))) + +(ert-deftest test-capitalize-multiple-sentences () + "Should capitalize all sentences." + (let ((result (cj/markov-fix-capitalization "first. second. third"))) + (should (string-match-p "First\\. Second\\. Third" result)))) + +;;; Generation Tests (deterministic with fixed chain) + +(ert-deftest test-generate-produces-output () + "Should generate non-empty output." + (let ((chain (test-learn "Lorem ipsum dolor sit amet consectetur adipiscing elit"))) + (let ((result (cj/markov-generate chain 5))) + (should (stringp result)) + (should (> (length result) 0))))) + +(ert-deftest test-generate-empty-chain () + "Should handle empty chain gracefully." + (let ((chain (test-chain))) + (let ((result (cj/markov-generate chain 5))) + (should (or (null result) (string-empty-p result)))))) + +(ert-deftest test-generate-respects-start () + "Should use provided start state if available." + (let ((chain (test-learn "Lorem ipsum dolor sit amet"))) + (let ((result (cj/markov-generate chain 3 '("Lorem" "ipsum")))) + (should (stringp result)) + ;; Should start with Lorem or similar + (should (> (length result) 0))))) + +;;; Integration Tests + +(ert-deftest test-full-workflow () + "Should complete full learn-generate workflow." + (let ((chain (test-chain))) + (cj/markov-learn chain "The quick brown fox jumps over the lazy dog") + (let ((result (cj/markov-generate chain 8))) + (should (stringp result)) + (should (> (length result) 0)) + (should (string-match-p "^[A-Z]" result)) + (should (string-match-p "[.!?]$" result))))) + +(ert-deftest test-latin-like-output () + "Should generate Latin-like text from Latin input." + (let ((chain (test-chain))) + (cj/markov-learn chain "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.") + (let ((result (cj/markov-generate chain 10))) + (should (stringp result)) + (should (> (length result) 10))))) + +;;; Edge Cases + +(ert-deftest test-learn-short-text () + "Should handle text shorter than trigram." + (let ((chain (test-learn "one two"))) + (should (cj/markov-chain-p chain)))) + +(ert-deftest test-learn-single-word () + "Should handle single word." + (let ((chain (test-learn "word"))) + (should (cj/markov-chain-p chain)))) + +(ert-deftest test-generate-requested-count-small () + "Should handle small generation count." + (let ((chain (test-learn "one two three four five"))) + (let ((result (cj/markov-generate chain 2))) + (should (stringp result))))) + +(provide 'test-lorem-optimum) +;;; test-lorem-optimum.el ends here |
