4 files changed, 529 insertions, 35 deletions
diff --git a/init.el b/init.el
index 12ea9c21..a06d25d6 100644
--- a/init.el
+++ b/init.el
@@ -144,7 +144,7 @@
 ;; ------------------------------ Modules In Test ------------------------------
 (require 'browser-config)
 ;;(require 'wip)
-;;(require 'lipsum-generator)
+(require 'lorem-optimum)
 ;;(require 'jumper)
 
 ;; ---------------------------------- Wrap Up ----------------------------------
diff --git a/modules/lorem-generator.el b/modules/lorem-optimum.el
index 6f0520c6..6ccca55f 100644
--- a/modules/lorem-generator.el
+++ b/modules/lorem-optimum.el
@@ -1,4 +1,4 @@
-;;; lorem-generator.el --- Fake Latin Text Generator -*- coding: utf-8; lexical-binding: t; -*-
+;;; lorem-optimum.el --- Fake Latin Text Generator -*- coding: utf-8; lexical-binding: t; -*-
 ;;
 ;; Author: Craig Jennings
 ;; Version: 0.5
@@ -24,6 +24,19 @@
 
 (require 'cl-lib)
 
+;;; Configuration
+
+(defvar cj/lipsum-training-file "assets/liber-primus.txt"
+  "Default training file name (relative to `user-emacs-directory`).")
+
+(defvar cj/lipsum-default-file
+  (expand-file-name cj/lipsum-training-file user-emacs-directory)
+  "Default training file for cj-lipsum.
+
+This should be a plain UTF-8 text file with hundreds of Latin words
+or sentences.  By default it points to the file specified in
+`cj/lipsum-training-file` relative to `user-emacs-directory`.")
+
 (cl-defstruct (cj/markov-chain
 			   (:constructor cj/markov-chain-create))
   "An order-two Markov chain."
@@ -31,25 +44,45 @@
   (keys nil))
 
 (defun cj/markov-tokenize (text)
-  "Split TEXT into tokens: words and punctuation separately."
-  (let ((case-fold-search nil))
-	(split-string text "\\b" t "[[:space:]\n]+")))
-
+  "Split TEXT into tokens: words and punctuation separately.
+Returns a list of words and punctuation marks as separate tokens."
+  (let ((tokens '())
+        (pos 0)
+        (len (length text)))
+    (while (< pos len)
+      (cond
+       ;; Skip whitespace
+       ((string-match-p "[[:space:]]" (substring text pos (1+ pos)))
+        (setq pos (1+ pos)))
+       ;; Match word (sequence of alphanumeric characters)
+       ((string-match "\\`\\([[:alnum:]]+\\)" (substring text pos))
+        (let ((word (match-string 1 (substring text pos))))
+          (push word tokens)
+          (setq pos (+ pos (length word)))))
+       ;; Match punctuation (single character)
+       ((string-match "\\`\\([[:punct:]]\\)" (substring text pos))
+        (let ((punct (match-string 1 (substring text pos))))
+          (push punct tokens)
+          (setq pos (+ pos (length punct)))))
+       ;; Skip any other character
+       (t (setq pos (1+ pos)))))
+    (nreverse tokens)))
 (defun cj/markov-learn (chain text)
   "Add TEXT into the Markov CHAIN with tokenized input."
-  (let* ((words (cj/markov-tokenize text))
+  (let* ((word-list (cj/markov-tokenize text))
+         ;; Convert to vector for O(1) access instead of O(n) with nth
+         (words (vconcat word-list))
 		 (len (length words)))
 	(cl-loop for i from 0 to (- len 3)
-			 for a = (nth i words)
-			 for b = (nth (1+ i) words)
-			 for c = (nth (+ i 2) words)
+			 for a = (aref words i)
+			 for b = (aref words (1+ i))
+			 for c = (aref words (+ i 2))
 			 do (let* ((bigram (list a b))
 					   (nexts (gethash bigram (cj/markov-chain-map chain))))
 				  (puthash bigram (cons c nexts)
 						   (cj/markov-chain-map chain)))))
-	(setf (cj/markov-chain-keys chain)
-		  (cl-loop for k being the hash-keys of (cj/markov-chain-map chain)
-				   collect k)))
+  ;; Invalidate cached keys after learning new data
+  (setf (cj/markov-chain-keys chain) nil))
 
 (defun cj/markov-fix-capitalization (sentence)
   "Capitalize the first word and the first word after .!? in SENTENCE."
@@ -94,7 +127,7 @@
 
 (defun cj/markov-generate (chain n &optional start)
   "Generate a sentence of N tokens from CHAIN."
-  (when (cj/markov-chain-keys chain)
+  (when (> (hash-table-count (cj/markov-chain-map chain)) 0)
 	(let* ((state (or (and start
 						   (gethash start (cj/markov-chain-map chain))
 						   start)
@@ -116,8 +149,16 @@
 	  (cj/markov-join-tokens tokens))))
 
 (defun cj/markov-random-key (chain)
-  (nth (random (length (cj/markov-chain-keys chain)))
-	   (cj/markov-chain-keys chain)))
+  "Return a random bigram key from CHAIN.
+Builds and caches the keys list lazily if not already cached."
+  (unless (cj/markov-chain-keys chain)
+    ;; Lazily build keys list only when needed
+    (setf (cj/markov-chain-keys chain)
+          (cl-loop for k being the hash-keys of (cj/markov-chain-map chain)
+                   collect k)))
+  (let ((keys (cj/markov-chain-keys chain)))
+    (when keys
+      (nth (random (length keys)) keys))))
 
 (defun cj/markov-next-word (chain bigram)
   (let ((candidates (gethash bigram (cj/markov-chain-map chain))))
@@ -182,6 +223,7 @@
 										(or (cj/markov-next-word cj/lipsum-chain state)
 											(cadr (cj/markov-random-key cj/lipsum-chain))))))
 				   collect (replace-regexp-in-string "^[[:punct:]]+\\|[[:punct:]]+$" "" w))))
+	;; Filter empty strings from generated words
 	(setq words (cl-remove-if #'string-empty-p words))
 	(mapconcat
 	 (lambda (word idx)
@@ -204,23 +246,6 @@ Defaults: MIN=30, MAX=80."
       (let ((len (+ min (random (1+ (- max min))))))
 		(insert (cj/lipsum len) "\n\n")))))
 
-;;; Customization
-
-(defgroup cj-lipsum nil
-  "Pseudo-Latin lorem ipsum text generator."
-  :prefix "cj/lipsum-"
-  :group 'text)
-
-(defcustom cj/lipsum-default-file
-  (expand-file-name "latin.txt"
-					(file-name-directory (or load-file-name buffer-file-name)))
-  "Default training file for cj-lipsum.
-
-This should be a plain UTF-8 text file with hundreds of Latin words
-or sentences.  By default it points to the bundled `latin.txt`."
-  :type 'file
-  :group 'cj-lipsum)
-
 ;;; Initialization: train on default file
 (defun cj/lipsum--init ()
   "Initialize cj-lipsum by learning from `cj/lipsum-default-file`."
@@ -231,5 +256,5 @@ or sentences.  By default it points to the bundled `latin.txt`."
 
 (cj/lipsum--init)
 
-(provide 'lorem-generator)
-;;; lorem-generator.el ends here.
+(provide 'lorem-optimum)
+;;; lorem-optimum.el ends here.
diff --git a/tests/test-lorem-optimum-benchmark.el b/tests/test-lorem-optimum-benchmark.el
new file mode 100644
index 00000000..d3ca2873
--- /dev/null
+++ b/tests/test-lorem-optimum-benchmark.el
@@ -0,0 +1,227 @@
+;;; test-lorem-optimum-benchmark.el --- Performance tests for lorem-optimum.el -*- lexical-binding: t; -*-
+
+;;; Commentary:
+;; Benchmark and performance tests for the Markov chain implementation.
+;;
+;; These tests measure:
+;; - Learning time scaling with input size
+;; - Multiple learning operations (exposes key rebuild overhead)
+;; - Generation time scaling
+;; - Memory usage (hash table growth)
+;;
+;; Performance baseline targets (on modern hardware):
+;; - Learn 1000 words: < 10ms
+;; - Learn 10,000 words: < 100ms
+;; - 100 learn operations of 100 words each: < 500ms (current bottleneck!)
+;; - Generate 100 words: < 5ms
+
+;;; Code:
+
+(require 'ert)
+(require 'testutil-general)
+
+;; Add modules directory to load path
+(add-to-list 'load-path (expand-file-name "modules" user-emacs-directory))
+
+;; Load the module
+(require 'lorem-optimum)
+
+;;; Benchmark Helpers
+
+(defun benchmark-time (func)
+  "Time execution of FUNC and return milliseconds."
+  (let ((start (current-time)))
+    (funcall func)
+    (let ((end (current-time)))
+      (* 1000.0 (float-time (time-subtract end start))))))
+
+(defun generate-test-text (word-count)
+  "Generate WORD-COUNT words of test text with some repetition."
+  (let ((words '("lorem" "ipsum" "dolor" "sit" "amet" "consectetur"
+                 "adipiscing" "elit" "sed" "do" "eiusmod" "tempor"
+                 "incididunt" "ut" "labore" "et" "dolore" "magna" "aliqua"))
+        (result '()))
+    (dotimes (i word-count)
+      (push (nth (mod i (length words)) words) result)
+      (when (zerop (mod i 10))
+        (push "." result)))
+    (mapconcat #'identity (nreverse result) " ")))
+
+(defun benchmark-report (name time-ms)
+  "Report benchmark NAME with TIME-MS."
+  (message "BENCHMARK [%s]: %.2f ms" name time-ms))
+
+;;; Learning Performance Tests
+
+(ert-deftest benchmark-learn-1k-words ()
+  "Benchmark learning 1000 words."
+  (let* ((text (generate-test-text 1000))
+         (chain (cj/markov-chain-create))
+         (time (benchmark-time
+                (lambda () (cj/markov-learn chain text)))))
+    (benchmark-report "Learn 1K words" time)
+    (should (< time 50.0))))  ; Should be < 50ms
+
+(ert-deftest benchmark-learn-10k-words ()
+  "Benchmark learning 10,000 words."
+  (let* ((text (generate-test-text 10000))
+         (chain (cj/markov-chain-create))
+         (time (benchmark-time
+                (lambda () (cj/markov-learn chain text)))))
+    (benchmark-report "Learn 10K words" time)
+    (should (< time 500.0))))  ; Should be < 500ms
+
+(ert-deftest benchmark-learn-100k-words ()
+  "Benchmark learning 100,000 words (stress test)."
+  :tags '(:slow)
+  (let* ((text (generate-test-text 100000))
+         (chain (cj/markov-chain-create))
+         (time (benchmark-time
+                (lambda () (cj/markov-learn chain text)))))
+    (benchmark-report "Learn 100K words" time)
+    ;; This may be slow due to key rebuild
+    (message "Hash table size: %d bigrams"
+             (hash-table-count (cj/markov-chain-map chain)))))
+
+;;; Multiple Learning Operations (Exposes Quadratic Behavior)
+
+(ert-deftest benchmark-multiple-learns-10x100 ()
+  "Benchmark 10 learn operations of 100 words each."
+  (let ((chain (cj/markov-chain-create))
+        (times '()))
+    (dotimes (i 10)
+      (let* ((text (generate-test-text 100))
+             (time (benchmark-time
+                    (lambda () (cj/markov-learn chain text)))))
+        (push time times)))
+    (let ((total (apply #'+ times))
+          (avg (/ (apply #'+ times) 10.0))
+          (max-time (apply #'max times)))
+      (benchmark-report "10x learn 100 words - TOTAL" total)
+      (benchmark-report "10x learn 100 words - AVG" avg)
+      (benchmark-report "10x learn 100 words - MAX" max-time)
+      (message "Times: %S" (nreverse times))
+      ;; Note: Watch if later operations are slower (quadratic behavior)
+      (should (< total 100.0)))))  ; Total should be < 100ms
+
+(ert-deftest benchmark-multiple-learns-100x100 ()
+  "Benchmark 100 learn operations of 100 words each (key rebuild overhead)."
+  :tags '(:slow)
+  (let ((chain (cj/markov-chain-create))
+        (times '())
+        (measurements '()))
+    (dotimes (i 100)
+      (let* ((text (generate-test-text 100))
+             (time (benchmark-time
+                    (lambda () (cj/markov-learn chain text)))))
+        (push time times)
+        ;; Sample measurements every 10 iterations
+        (when (zerop (mod i 10))
+          (push (cons i time) measurements))))
+    (let ((total (apply #'+ times))
+          (avg (/ (apply #'+ times) 100.0))
+          (first-10-avg (/ (apply #'+ (last times 10)) 10.0))
+          (last-10-avg (/ (apply #'+ (seq-take times 10)) 10.0)))
+      (benchmark-report "100x learn 100 words - TOTAL" total)
+      (benchmark-report "100x learn 100 words - AVG" avg)
+      (benchmark-report "100x learn - First 10 AVG" first-10-avg)
+      (benchmark-report "100x learn - Last 10 AVG" last-10-avg)
+      (message "Sampled times (iteration, ms): %S" (nreverse measurements))
+      (message "Hash table size: %d bigrams"
+               (hash-table-count (cj/markov-chain-map chain)))
+      ;; This exposes the quadratic behavior: last operations much slower
+      (when (> last-10-avg (* 2.0 first-10-avg))
+        (message "WARNING: Learning slows down significantly over time!")
+        (message "  First 10 avg: %.2f ms" first-10-avg)
+        (message "  Last 10 avg: %.2f ms" last-10-avg)
+        (message "  Ratio: %.1fx slower" (/ last-10-avg first-10-avg))))))
+
+;;; Generation Performance Tests
+
+(ert-deftest benchmark-generate-100-words ()
+  "Benchmark generating 100 words."
+  (let* ((text (generate-test-text 1000))
+         (chain (cj/markov-chain-create)))
+    (cj/markov-learn chain text)
+    (let ((time (benchmark-time
+                 (lambda () (cj/markov-generate chain 100)))))
+      (benchmark-report "Generate 100 words" time)
+      (should (< time 20.0)))))  ; Should be < 20ms
+
+(ert-deftest benchmark-generate-1000-words ()
+  "Benchmark generating 1000 words."
+  (let* ((text (generate-test-text 10000))
+         (chain (cj/markov-chain-create)))
+    (cj/markov-learn chain text)
+    (let ((time (benchmark-time
+                 (lambda () (cj/markov-generate chain 1000)))))
+      (benchmark-report "Generate 1000 words" time)
+      (should (< time 100.0)))))  ; Should be < 100ms
+
+;;; Tokenization Performance Tests
+
+(ert-deftest benchmark-tokenize-10k-words ()
+  "Benchmark tokenizing 10,000 words."
+  (let* ((text (generate-test-text 10000))
+         (time (benchmark-time
+                (lambda () (cj/markov-tokenize text)))))
+    (benchmark-report "Tokenize 10K words" time)
+    (should (< time 50.0))))  ; Tokenization should be fast
+
+;;; Memory/Size Tests
+
+(ert-deftest benchmark-chain-growth ()
+  "Measure hash table growth with increasing input."
+  (let ((chain (cj/markov-chain-create))
+        (sizes '()))
+    (dolist (word-count '(100 500 1000 5000 10000))
+      (let ((text (generate-test-text word-count)))
+        (cj/markov-learn chain text)
+        (let ((size (hash-table-count (cj/markov-chain-map chain))))
+          (push (cons word-count size) sizes)
+          (message "After %d words: %d unique bigrams" word-count size))))
+    (message "Growth pattern: %S" (nreverse sizes))))
+
+;;; Comparison: Tokenization vs Learning
+
+(ert-deftest benchmark-tokenize-vs-learn ()
+  "Compare tokenization time to total learning time."
+  (let* ((text (generate-test-text 5000))
+         (tokenize-time (benchmark-time
+                         (lambda () (cj/markov-tokenize text))))
+         (chain (cj/markov-chain-create))
+         (learn-time (benchmark-time
+                      (lambda () (cj/markov-learn chain text)))))
+    (benchmark-report "Tokenize 5K words" tokenize-time)
+    (benchmark-report "Learn 5K words (total)" learn-time)
+    (message "Tokenization is %.1f%% of total learning time"
+             (* 100.0 (/ tokenize-time learn-time)))))
+
+;;; Real-world Scenario
+
+(ert-deftest benchmark-realistic-usage ()
+  "Benchmark realistic usage: learn from multiple sources, generate paragraphs."
+  (let ((chain (cj/markov-chain-create))
+        (learn-total 0.0)
+        (gen-total 0.0))
+    ;; Simulate learning from 10 different sources
+    (dotimes (i 10)
+      (let ((text (generate-test-text 500)))
+        (setq learn-total
+              (+ learn-total
+                 (benchmark-time (lambda () (cj/markov-learn chain text)))))))
+
+    ;; Generate 5 paragraphs
+    (dotimes (i 5)
+      (setq gen-total
+            (+ gen-total
+               (benchmark-time (lambda () (cj/markov-generate chain 50))))))
+
+    (benchmark-report "Realistic: 10 learns (500 words each)" learn-total)
+    (benchmark-report "Realistic: 5 generations (50 words each)" gen-total)
+    (benchmark-report "Realistic: TOTAL TIME" (+ learn-total gen-total))
+    (message "Final chain size: %d bigrams"
+             (hash-table-count (cj/markov-chain-map chain)))))
+
+(provide 'test-lorem-optimum-benchmark)
+;;; test-lorem-optimum-benchmark.el ends here
diff --git a/tests/test-lorem-optimum.el b/tests/test-lorem-optimum.el
new file mode 100644
index 00000000..ca2e52f4
--- /dev/null
+++ b/tests/test-lorem-optimum.el
@@ -0,0 +1,242 @@
+;;; test-lorem-optimum.el --- Tests for lorem-optimum.el -*- lexical-binding: t; -*-
+
+;;; Commentary:
+;; Unit tests for lorem-optimum.el Markov chain text generation.
+;;
+;; Tests cover:
+;; - Tokenization
+;; - Learning and chain building
+;; - Text generation
+;; - Capitalization fixing
+;; - Token joining
+
+;;; Code:
+
+(require 'ert)
+(require 'testutil-general)
+
+;; Add modules directory to load path
+(add-to-list 'load-path (expand-file-name "modules" user-emacs-directory))
+
+;; Load the module
+(require 'lorem-optimum)
+
+;;; Test Helpers
+
+(defun test-chain ()
+  "Create a fresh test chain."
+  (cj/markov-chain-create))
+
+(defun test-learn (text)
+  "Create a chain and learn TEXT."
+  (let ((chain (test-chain)))
+    (cj/markov-learn chain text)
+    chain))
+
+;;; Tokenization Tests
+
+(ert-deftest test-tokenize-simple ()
+  "Should tokenize simple words."
+  (let ((result (cj/markov-tokenize "hello world")))
+    (should (equal result '("hello" "world")))))
+
+(ert-deftest test-tokenize-with-punctuation ()
+  "Should separate punctuation as tokens."
+  (let ((result (cj/markov-tokenize "Hello, world!")))
+    (should (equal result '("Hello" "," "world" "!")))))
+
+(ert-deftest test-tokenize-multiple-spaces ()
+  "Should handle multiple spaces."
+  (let ((result (cj/markov-tokenize "hello    world")))
+    (should (equal result '("hello" "world")))))
+
+(ert-deftest test-tokenize-newlines ()
+  "Should handle newlines as whitespace."
+  (let ((result (cj/markov-tokenize "hello\nworld")))
+    (should (equal result '("hello" "world")))))
+
+(ert-deftest test-tokenize-mixed-punctuation ()
+  "Should tokenize complex punctuation."
+  (let ((result (cj/markov-tokenize "one, two; three.")))
+    (should (equal result '("one" "," "two" ";" "three" ".")))))
+
+(ert-deftest test-tokenize-empty ()
+  "Should handle empty string."
+  (let ((result (cj/markov-tokenize "")))
+    (should (null result))))
+
+(ert-deftest test-tokenize-whitespace-only ()
+  "Should return nil for whitespace only."
+  (let ((result (cj/markov-tokenize "   \n\t  ")))
+    (should (null result))))
+
+;;; Markov Learn Tests
+
+(ert-deftest test-learn-basic ()
+  "Should learn simple text."
+  (let ((chain (test-learn "one two three four")))
+    (should (cj/markov-chain-p chain))
+    (should (> (hash-table-count (cj/markov-chain-map chain)) 0))))
+
+(ert-deftest test-learn-creates-bigrams ()
+  "Should create bigram mappings."
+  (let ((chain (test-learn "one two three")))
+    (should (gethash '("one" "two") (cj/markov-chain-map chain)))))
+
+(ert-deftest test-learn-stores-following-word ()
+  "Should store following word for bigram."
+  (let ((chain (test-learn "one two three")))
+    (should (member "three" (gethash '("one" "two") (cj/markov-chain-map chain))))))
+
+(ert-deftest test-learn-builds-keys-list ()
+  "Should build keys list lazily when accessed."
+  (let ((chain (test-learn "one two three four")))
+    ;; Keys are built lazily, so initially nil
+    (should (null (cj/markov-chain-keys chain)))
+    ;; After calling random-key, keys should be built
+    (cj/markov-random-key chain)
+    (should (> (length (cj/markov-chain-keys chain)) 0))))
+
+(ert-deftest test-learn-repeated-patterns ()
+  "Should accumulate repeated patterns."
+  (let ((chain (test-learn "one two three one two four")))
+    (let ((nexts (gethash '("one" "two") (cj/markov-chain-map chain))))
+      (should (= (length nexts) 2))
+      (should (member "three" nexts))
+      (should (member "four" nexts)))))
+
+(ert-deftest test-learn-incremental ()
+  "Should support incremental learning."
+  (let ((chain (test-chain)))
+    (cj/markov-learn chain "one two three")
+    (cj/markov-learn chain "four five six")
+    (should (> (hash-table-count (cj/markov-chain-map chain)) 0))))
+
+;;; Token Joining Tests
+
+(ert-deftest test-join-simple-words ()
+  "Should join words with spaces."
+  (let ((result (cj/markov-join-tokens '("hello" "world"))))
+    (should (string-match-p "^Hello world" result))))
+
+(ert-deftest test-join-with-punctuation ()
+  "Should attach punctuation without spaces."
+  (let ((result (cj/markov-join-tokens '("hello" "," "world"))))
+    (should (string-match-p "Hello, world" result))))
+
+(ert-deftest test-join-capitalizes-first ()
+  "Should capitalize first word."
+  (let ((result (cj/markov-join-tokens '("hello" "world"))))
+    (should (string-match-p "^H" result))))
+
+(ert-deftest test-join-adds-period ()
+  "Should add period if missing."
+  (let ((result (cj/markov-join-tokens '("hello" "world"))))
+    (should (string-match-p "\\.$" result))))
+
+(ert-deftest test-join-preserves-existing-period ()
+  "Should not double-add period."
+  (let ((result (cj/markov-join-tokens '("hello" "world" "."))))
+    (should (string-match-p "\\.$" result))
+    (should-not (string-match-p "\\.\\.$" result))))
+
+(ert-deftest test-join-empty-tokens ()
+  "Should handle empty token list."
+  (let ((result (cj/markov-join-tokens '())))
+    (should (equal result "."))))
+
+;;; Capitalization Tests
+
+(ert-deftest test-capitalize-first-word ()
+  "Should capitalize first word."
+  (let ((result (cj/markov-fix-capitalization "hello world")))
+    (should (string-match-p "^Hello" result))))
+
+(ert-deftest test-capitalize-after-period ()
+  "Should capitalize after period."
+  (let ((result (cj/markov-fix-capitalization "hello. world")))
+    (should (string-match-p "Hello\\. World" result))))
+
+(ert-deftest test-capitalize-after-exclamation ()
+  "Should capitalize after exclamation."
+  (let ((result (cj/markov-fix-capitalization "hello! world")))
+    (should (string-match-p "Hello! World" result))))
+
+(ert-deftest test-capitalize-after-question ()
+  "Should capitalize after question mark."
+  (let ((result (cj/markov-fix-capitalization "hello? world")))
+    (should (string-match-p "Hello\\? World" result))))
+
+(ert-deftest test-capitalize-skip-non-alpha ()
+  "Should skip non-alphabetic tokens."
+  (let ((result (cj/markov-fix-capitalization "hello. 123 world")))
+    (should (string-match-p "123" result))))
+
+(ert-deftest test-capitalize-multiple-sentences ()
+  "Should capitalize all sentences."
+  (let ((result (cj/markov-fix-capitalization "first. second. third")))
+    (should (string-match-p "First\\. Second\\. Third" result))))
+
+;;; Generation Tests (deterministic with fixed chain)
+
+(ert-deftest test-generate-produces-output ()
+  "Should generate non-empty output."
+  (let ((chain (test-learn "Lorem ipsum dolor sit amet consectetur adipiscing elit")))
+    (let ((result (cj/markov-generate chain 5)))
+      (should (stringp result))
+      (should (> (length result) 0)))))
+
+(ert-deftest test-generate-empty-chain ()
+  "Should handle empty chain gracefully."
+  (let ((chain (test-chain)))
+    (let ((result (cj/markov-generate chain 5)))
+      (should (or (null result) (string-empty-p result))))))
+
+(ert-deftest test-generate-respects-start ()
+  "Should use provided start state if available."
+  (let ((chain (test-learn "Lorem ipsum dolor sit amet")))
+    (let ((result (cj/markov-generate chain 3 '("Lorem" "ipsum"))))
+      (should (stringp result))
+      ;; Should start with Lorem or similar
+      (should (> (length result) 0)))))
+
+;;; Integration Tests
+
+(ert-deftest test-full-workflow ()
+  "Should complete full learn-generate workflow."
+  (let ((chain (test-chain)))
+    (cj/markov-learn chain "The quick brown fox jumps over the lazy dog")
+    (let ((result (cj/markov-generate chain 8)))
+      (should (stringp result))
+      (should (> (length result) 0))
+      (should (string-match-p "^[A-Z]" result))
+      (should (string-match-p "[.!?]$" result)))))
+
+(ert-deftest test-latin-like-output ()
+  "Should generate Latin-like text from Latin input."
+  (let ((chain (test-chain)))
+    (cj/markov-learn chain "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.")
+    (let ((result (cj/markov-generate chain 10)))
+      (should (stringp result))
+      (should (> (length result) 10)))))
+
+;;; Edge Cases
+
+(ert-deftest test-learn-short-text ()
+  "Should handle text shorter than trigram."
+  (let ((chain (test-learn "one two")))
+    (should (cj/markov-chain-p chain))))
+
+(ert-deftest test-learn-single-word ()
+  "Should handle single word."
+  (let ((chain (test-learn "word")))
+    (should (cj/markov-chain-p chain))))
+
+(ert-deftest test-generate-requested-count-small ()
+  "Should handle small generation count."
+  (let ((chain (test-learn "one two three four five")))
+    (let ((result (cj/markov-generate chain 2)))
+      (should (stringp result)))))
+
+(provide 'test-lorem-optimum)
+;;; test-lorem-optimum.el ends here