1 files changed, 242 insertions, 0 deletions
diff --git a/tests/test-lorem-optimum.el b/tests/test-lorem-optimum.el
new file mode 100644
index 00000000..ca2e52f4
--- /dev/null
+++ b/tests/test-lorem-optimum.el
@@ -0,0 +1,242 @@
+;;; test-lorem-optimum.el --- Tests for lorem-optimum.el -*- lexical-binding: t; -*-
+
+;;; Commentary:
+;; Unit tests for lorem-optimum.el Markov chain text generation.
+;;
+;; Tests cover:
+;; - Tokenization
+;; - Learning and chain building
+;; - Text generation
+;; - Capitalization fixing
+;; - Token joining
+
+;;; Code:
+
+(require 'ert)
+(require 'testutil-general)
+
+;; Add modules directory to load path
+(add-to-list 'load-path (expand-file-name "modules" user-emacs-directory))
+
+;; Load the module
+(require 'lorem-optimum)
+
+;;; Test Helpers
+
+(defun test-chain ()
+  "Create a fresh test chain."
+  (cj/markov-chain-create))
+
+(defun test-learn (text)
+  "Create a chain and learn TEXT."
+  (let ((chain (test-chain)))
+    (cj/markov-learn chain text)
+    chain))
+
+;;; Tokenization Tests
+
+(ert-deftest test-tokenize-simple ()
+  "Should tokenize simple words."
+  (let ((result (cj/markov-tokenize "hello world")))
+    (should (equal result '("hello" "world")))))
+
+(ert-deftest test-tokenize-with-punctuation ()
+  "Should separate punctuation as tokens."
+  (let ((result (cj/markov-tokenize "Hello, world!")))
+    (should (equal result '("Hello" "," "world" "!")))))
+
+(ert-deftest test-tokenize-multiple-spaces ()
+  "Should handle multiple spaces."
+  (let ((result (cj/markov-tokenize "hello    world")))
+    (should (equal result '("hello" "world")))))
+
+(ert-deftest test-tokenize-newlines ()
+  "Should handle newlines as whitespace."
+  (let ((result (cj/markov-tokenize "hello\nworld")))
+    (should (equal result '("hello" "world")))))
+
+(ert-deftest test-tokenize-mixed-punctuation ()
+  "Should tokenize complex punctuation."
+  (let ((result (cj/markov-tokenize "one, two; three.")))
+    (should (equal result '("one" "," "two" ";" "three" ".")))))
+
+(ert-deftest test-tokenize-empty ()
+  "Should handle empty string."
+  (let ((result (cj/markov-tokenize "")))
+    (should (null result))))
+
+(ert-deftest test-tokenize-whitespace-only ()
+  "Should return nil for whitespace only."
+  (let ((result (cj/markov-tokenize "   \n\t  ")))
+    (should (null result))))
+
+;;; Markov Learn Tests
+
+(ert-deftest test-learn-basic ()
+  "Should learn simple text."
+  (let ((chain (test-learn "one two three four")))
+    (should (cj/markov-chain-p chain))
+    (should (> (hash-table-count (cj/markov-chain-map chain)) 0))))
+
+(ert-deftest test-learn-creates-bigrams ()
+  "Should create bigram mappings."
+  (let ((chain (test-learn "one two three")))
+    (should (gethash '("one" "two") (cj/markov-chain-map chain)))))
+
+(ert-deftest test-learn-stores-following-word ()
+  "Should store following word for bigram."
+  (let ((chain (test-learn "one two three")))
+    (should (member "three" (gethash '("one" "two") (cj/markov-chain-map chain))))))
+
+(ert-deftest test-learn-builds-keys-list ()
+  "Should build keys list lazily when accessed."
+  (let ((chain (test-learn "one two three four")))
+    ;; Keys are built lazily, so initially nil
+    (should (null (cj/markov-chain-keys chain)))
+    ;; After calling random-key, keys should be built
+    (cj/markov-random-key chain)
+    (should (> (length (cj/markov-chain-keys chain)) 0))))
+
+(ert-deftest test-learn-repeated-patterns ()
+  "Should accumulate repeated patterns."
+  (let ((chain (test-learn "one two three one two four")))
+    (let ((nexts (gethash '("one" "two") (cj/markov-chain-map chain))))
+      (should (= (length nexts) 2))
+      (should (member "three" nexts))
+      (should (member "four" nexts)))))
+
+(ert-deftest test-learn-incremental ()
+  "Should support incremental learning."
+  (let ((chain (test-chain)))
+    (cj/markov-learn chain "one two three")
+    (cj/markov-learn chain "four five six")
+    (should (> (hash-table-count (cj/markov-chain-map chain)) 0))))
+
+;;; Token Joining Tests
+
+(ert-deftest test-join-simple-words ()
+  "Should join words with spaces."
+  (let ((result (cj/markov-join-tokens '("hello" "world"))))
+    (should (string-match-p "^Hello world" result))))
+
+(ert-deftest test-join-with-punctuation ()
+  "Should attach punctuation without spaces."
+  (let ((result (cj/markov-join-tokens '("hello" "," "world"))))
+    (should (string-match-p "Hello, world" result))))
+
+(ert-deftest test-join-capitalizes-first ()
+  "Should capitalize first word."
+  (let ((result (cj/markov-join-tokens '("hello" "world"))))
+    (should (string-match-p "^H" result))))
+
+(ert-deftest test-join-adds-period ()
+  "Should add period if missing."
+  (let ((result (cj/markov-join-tokens '("hello" "world"))))
+    (should (string-match-p "\\.$" result))))
+
+(ert-deftest test-join-preserves-existing-period ()
+  "Should not double-add period."
+  (let ((result (cj/markov-join-tokens '("hello" "world" "."))))
+    (should (string-match-p "\\.$" result))
+    (should-not (string-match-p "\\.\\.$" result))))
+
+(ert-deftest test-join-empty-tokens ()
+  "Should handle empty token list."
+  (let ((result (cj/markov-join-tokens '())))
+    (should (equal result "."))))
+
+;;; Capitalization Tests
+
+(ert-deftest test-capitalize-first-word ()
+  "Should capitalize first word."
+  (let ((result (cj/markov-fix-capitalization "hello world")))
+    (should (string-match-p "^Hello" result))))
+
+(ert-deftest test-capitalize-after-period ()
+  "Should capitalize after period."
+  (let ((result (cj/markov-fix-capitalization "hello. world")))
+    (should (string-match-p "Hello\\. World" result))))
+
+(ert-deftest test-capitalize-after-exclamation ()
+  "Should capitalize after exclamation."
+  (let ((result (cj/markov-fix-capitalization "hello! world")))
+    (should (string-match-p "Hello! World" result))))
+
+(ert-deftest test-capitalize-after-question ()
+  "Should capitalize after question mark."
+  (let ((result (cj/markov-fix-capitalization "hello? world")))
+    (should (string-match-p "Hello\\? World" result))))
+
+(ert-deftest test-capitalize-skip-non-alpha ()
+  "Should skip non-alphabetic tokens."
+  (let ((result (cj/markov-fix-capitalization "hello. 123 world")))
+    (should (string-match-p "123" result))))
+
+(ert-deftest test-capitalize-multiple-sentences ()
+  "Should capitalize all sentences."
+  (let ((result (cj/markov-fix-capitalization "first. second. third")))
+    (should (string-match-p "First\\. Second\\. Third" result))))
+
+;;; Generation Tests (deterministic with fixed chain)
+
+(ert-deftest test-generate-produces-output ()
+  "Should generate non-empty output."
+  (let ((chain (test-learn "Lorem ipsum dolor sit amet consectetur adipiscing elit")))
+    (let ((result (cj/markov-generate chain 5)))
+      (should (stringp result))
+      (should (> (length result) 0)))))
+
+(ert-deftest test-generate-empty-chain ()
+  "Should handle empty chain gracefully."
+  (let ((chain (test-chain)))
+    (let ((result (cj/markov-generate chain 5)))
+      (should (or (null result) (string-empty-p result))))))
+
+(ert-deftest test-generate-respects-start ()
+  "Should use provided start state if available."
+  (let ((chain (test-learn "Lorem ipsum dolor sit amet")))
+    (let ((result (cj/markov-generate chain 3 '("Lorem" "ipsum"))))
+      (should (stringp result))
+      ;; Should start with Lorem or similar
+      (should (> (length result) 0)))))
+
+;;; Integration Tests
+
+(ert-deftest test-full-workflow ()
+  "Should complete full learn-generate workflow."
+  (let ((chain (test-chain)))
+    (cj/markov-learn chain "The quick brown fox jumps over the lazy dog")
+    (let ((result (cj/markov-generate chain 8)))
+      (should (stringp result))
+      (should (> (length result) 0))
+      (should (string-match-p "^[A-Z]" result))
+      (should (string-match-p "[.!?]$" result)))))
+
+(ert-deftest test-latin-like-output ()
+  "Should generate Latin-like text from Latin input."
+  (let ((chain (test-chain)))
+    (cj/markov-learn chain "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.")
+    (let ((result (cj/markov-generate chain 10)))
+      (should (stringp result))
+      (should (> (length result) 10)))))
+
+;;; Edge Cases
+
+(ert-deftest test-learn-short-text ()
+  "Should handle text shorter than trigram."
+  (let ((chain (test-learn "one two")))
+    (should (cj/markov-chain-p chain))))
+
+(ert-deftest test-learn-single-word ()
+  "Should handle single word."
+  (let ((chain (test-learn "word")))
+    (should (cj/markov-chain-p chain))))
+
+(ert-deftest test-generate-requested-count-small ()
+  "Should handle small generation count."
+  (let ((chain (test-learn "one two three four five")))
+    (let ((result (cj/markov-generate chain 2)))
+      (should (stringp result)))))
+
+(provide 'test-lorem-optimum)
+;;; test-lorem-optimum.el ends here