1 files changed, 71 insertions, 75 deletions
diff --git a/tests/test-lorem-optimum-benchmark.el b/tests/test-lorem-optimum-benchmark.el
index 6b2f0163..22219b92 100644
--- a/tests/test-lorem-optimum-benchmark.el
+++ b/tests/test-lorem-optimum-benchmark.el
@@ -1,19 +1,18 @@
-;;; test-lorem-optimum-benchmark.el --- Performance tests for lorem-optimum.el -*- lexical-binding: t; -*-
+;;; test-lorem-optimum-benchmark.el --- Performance benchmarks for lorem-optimum.el -*- lexical-binding: t; -*-
 
 ;;; Commentary:
-;; Benchmark and performance tests for the Markov chain implementation.
+;; Benchmarks for the order-two Markov chain in `lorem-optimum.el'.
 ;;
-;; These tests measure:
-;; - Learning time scaling with input size
-;; - Multiple learning operations (exposes key rebuild overhead)
-;; - Generation time scaling
-;; - Memory usage (hash table growth)
+;; Every test here is tagged `:perf', so `make test', `make coverage', and the
+;; PostToolUse test hook skip them.  Run them deliberately:
 ;;
-;; Performance baseline targets (on modern hardware):
-;; - Learn 1000 words: < 10ms
-;; - Learn 10,000 words: < 100ms
-;; - 100 learn operations of 100 words each: < 500ms (current bottleneck!)
-;; - Generate 100 words: < 5ms
+;;   make benchmark
+;;
+;; The tests print timings (learning, generation, tokenization, hash-table
+;; growth) for eyeballing.  The only assertions are machine-independent
+;; *ratios*: `benchmark-learn-scaling' fails if learn time grows faster than
+;; roughly linearly with input size.  A slow CI runner or an old laptop won't
+;; trip that — an O(N^2) regression will.
 
 ;;; Code:
 
@@ -53,43 +52,39 @@
 
 ;;; Learning Performance Tests
 
-(ert-deftest benchmark-learn-1k-words ()
-  "Benchmark learning 1000 words."
-  (let* ((text (generate-test-text 1000))
-         (chain (cj/markov-chain-create))
-         (time (benchmark-time
-                (lambda () (cj/markov-learn chain text)))))
-    (benchmark-report "Learn 1K words" time)
-    (should (< time 50.0))))  ; Should be < 50ms
-
-(ert-deftest benchmark-learn-10k-words ()
-  "Benchmark learning 10,000 words."
-  (let* ((text (generate-test-text 10000))
-         (chain (cj/markov-chain-create))
-         (time (benchmark-time
-                (lambda () (cj/markov-learn chain text)))))
-    (benchmark-report "Learn 10K words" time)
-    (should (< time 500.0))))  ; Should be < 500ms
-
-(ert-deftest benchmark-learn-100k-words ()
-  "Benchmark learning 100,000 words (stress test)."
-  (let* ((text (generate-test-text 100000))
-         (chain (cj/markov-chain-create))
-         (time (benchmark-time
-                (lambda () (cj/markov-learn chain text)))))
-    (benchmark-report "Learn 100K words" time)
-    ;; This may be slow due to key rebuild
-    (message "Hash table size: %d bigrams"
-             (hash-table-count (cj/markov-chain-map chain)))
-    (should (< time 5000.0))))
-
-;;; Multiple Learning Operations (Exposes Quadratic Behavior)
+(ert-deftest benchmark-learn-scaling ()
+  "Learn time scales no worse than ~linearly with input size.
+Reports 1K/10K/100K learn times and the hash-table size, then asserts each
+10x jump in input costs less than 40x the time — generous enough that GC
+pauses and a slow machine pass, tight enough that O(N^2) fails."
+  :tags '(:perf)
+  (let* ((t1k (benchmark-time
+               (lambda () (cj/markov-learn (cj/markov-chain-create)
+                                           (generate-test-text 1000)))))
+         (t10k (benchmark-time
+                (lambda () (cj/markov-learn (cj/markov-chain-create)
+                                            (generate-test-text 10000)))))
+         (chain-100k (cj/markov-chain-create))
+         (t100k (benchmark-time
+                 (lambda () (cj/markov-learn chain-100k
+                                             (generate-test-text 100000))))))
+    (benchmark-report "Learn 1K words" t1k)
+    (benchmark-report "Learn 10K words" t10k)
+    (benchmark-report "Learn 100K words" t100k)
+    (message "Hash table size after 100K words: %d bigrams"
+             (hash-table-count (cj/markov-chain-map chain-100k)))
+    (should (> t1k 0.0))
+    (should (< t10k (* 40.0 t1k)))
+    (should (< t100k (* 40.0 t10k)))))
+
+;;; Multiple Learning Operations
 
 (ert-deftest benchmark-multiple-learns-10x100 ()
-  "Benchmark 10 learn operations of 100 words each."
+  "Report 10 learn operations of 100 words each."
+  :tags '(:perf)
   (let ((chain (cj/markov-chain-create))
         (times '()))
-    (dotimes (i 10)
+    (dotimes (_ 10)
       (let* ((text (generate-test-text 100))
              (time (benchmark-time
                     (lambda () (cj/markov-learn chain text)))))
@@ -100,12 +95,14 @@
       (benchmark-report "10x learn 100 words - TOTAL" total)
       (benchmark-report "10x learn 100 words - AVG" avg)
       (benchmark-report "10x learn 100 words - MAX" max-time)
-      (message "Times: %S" (nreverse times))
-      ;; Note: Watch if later operations are slower (quadratic behavior)
-      (should (< total 100.0)))))  ; Total should be < 100ms
+      (message "Times: %S" (nreverse times)))))
 
 (ert-deftest benchmark-multiple-learns-100x100 ()
-  "Benchmark 100 learn operations of 100 words each (key rebuild overhead)."
+  "Report 100 learn operations of 100 words each, sampling for slowdown.
+With the O(1) word-vector access and lazy key rebuild, per-chunk time should
+stay flat across the run; a growing first-10-vs-last-10 gap means a
+regression to quadratic behavior."
+  :tags '(:perf)
   (let ((chain (cj/markov-chain-create))
         (times '())
         (measurements '()))
@@ -119,6 +116,8 @@
           (push (cons i time) measurements))))
     (let ((total (apply #'+ times))
           (avg (/ (apply #'+ times) 100.0))
+          ;; `times' is built with `push', so the tail holds the first
+          ;; iterations and the head holds the last ones.
           (first-10-avg (/ (apply #'+ (last times 10)) 10.0))
           (last-10-avg (/ (apply #'+ (seq-take times 10)) 10.0)))
       (benchmark-report "100x learn 100 words - TOTAL" total)
@@ -128,39 +127,36 @@
       (message "Sampled times (iteration, ms): %S" (nreverse measurements))
       (message "Hash table size: %d bigrams"
                (hash-table-count (cj/markov-chain-map chain)))
-      ;; This exposes the quadratic behavior: last operations much slower
       (when (> last-10-avg (* 2.0 first-10-avg))
-        (message "WARNING: Learning slows down significantly over time!")
-        (message "  First 10 avg: %.2f ms" first-10-avg)
-        (message "  Last 10 avg: %.2f ms" last-10-avg)
-        (message "  Ratio: %.1fx slower" (/ last-10-avg first-10-avg))))))
+        (message "WARNING: learning slows down over time! %.1fx (%.2f -> %.2f ms)"
+                 (/ last-10-avg first-10-avg) first-10-avg last-10-avg)))))
 
 ;;; Generation Performance Tests
 
 (ert-deftest benchmark-generate-100-words ()
-  "Benchmark generating 100 words."
-  (let* ((text (generate-test-text 1000))
-         (chain (cj/markov-chain-create)))
-    (cj/markov-learn chain text)
-    (let ((time (benchmark-time
-                 (lambda () (cj/markov-generate chain 100)))))
-      (benchmark-report "Generate 100 words" time)
-      (should (< time 30.0)))))  ; Should be < 30ms
+  "Report time to generate 100 words."
+  :tags '(:perf)
+  (let ((chain (cj/markov-chain-create)))
+    (cj/markov-learn chain (generate-test-text 1000))
+    (benchmark-report "Generate 100 words"
+                      (benchmark-time
+                       (lambda () (cj/markov-generate chain 100))))))
 
 ;;; Tokenization Performance Tests
 
 (ert-deftest benchmark-tokenize-10k-words ()
-  "Benchmark tokenizing 10,000 words."
-  (let* ((text (generate-test-text 10000))
-         (time (benchmark-time
-                (lambda () (cj/markov-tokenize text)))))
-    (benchmark-report "Tokenize 10K words" time)
-    (should (< time 50.0))))  ; Tokenization should be fast
+  "Report time to tokenize 10,000 words."
+  :tags '(:perf)
+  (let ((text (generate-test-text 10000)))
+    (benchmark-report "Tokenize 10K words"
+                      (benchmark-time
+                       (lambda () (cj/markov-tokenize text))))))
 
 ;;; Memory/Size Tests
 
 (ert-deftest benchmark-chain-growth ()
-  "Measure hash table growth with increasing input."
+  "Report hash-table growth with increasing input."
+  :tags '(:perf)
   (let ((chain (cj/markov-chain-create))
         (sizes '()))
     (dolist (word-count '(100 500 1000 5000 10000))
@@ -174,7 +170,8 @@
 ;;; Comparison: Tokenization vs Learning
 
 (ert-deftest benchmark-tokenize-vs-learn ()
-  "Compare tokenization time to total learning time."
+  "Report tokenization time as a fraction of total learning time."
+  :tags '(:perf)
   (let* ((text (generate-test-text 5000))
          (tokenize-time (benchmark-time
                          (lambda () (cj/markov-tokenize text))))
@@ -189,23 +186,22 @@
 ;;; Real-world Scenario
 
 (ert-deftest benchmark-realistic-usage ()
-  "Benchmark realistic usage: learn from multiple sources, generate paragraphs."
+  "Report a realistic mix: learn from several sources, then generate paragraphs."
+  :tags '(:perf)
   (let ((chain (cj/markov-chain-create))
         (learn-total 0.0)
         (gen-total 0.0))
     ;; Simulate learning from 10 different sources
-    (dotimes (i 10)
+    (dotimes (_ 10)
       (let ((text (generate-test-text 500)))
         (setq learn-total
               (+ learn-total
                  (benchmark-time (lambda () (cj/markov-learn chain text)))))))
-
     ;; Generate 5 paragraphs
-    (dotimes (i 5)
+    (dotimes (_ 5)
       (setq gen-total
             (+ gen-total
                (benchmark-time (lambda () (cj/markov-generate chain 50))))))
-
     (benchmark-report "Realistic: 10 learns (500 words each)" learn-total)
     (benchmark-report "Realistic: 5 generations (50 words each)" gen-total)
     (benchmark-report "Realistic: TOTAL TIME" (+ learn-total gen-total))