test(lorem-optimum): tag benchmarks :perf and assert on scaling ratios

The benchmark suite carried no tag, so `make test' ran it every time, and three of its tests asserted absolute wall-clock times (`(should (< time 5000.0))' and friends). Those numbers hold on my laptop and break on a slower box. Every benchmark is now `:perf'-tagged so the default test run skips it. The three absolute learn thresholds collapse into one `benchmark-learn-scaling' test: it times 1K, 10K, and 100K learns and requires each 10x jump in input to cost under 40x the time. Linear scaling lands near 10x, and the 40x ceiling tolerates GC pauses and slow hardware while still catching an O(N^2) regression. The rest drop their absolute `should's and stay as timing reports for `make benchmark'.
author: Craig Jennings <c@cjennings.net> 2026-05-12 00:09:15 -0500
committer: Craig Jennings <c@cjennings.net> 2026-05-12 00:09:15 -0500
commit: 94ee5e4fac213de915a27906e4e0861e86de8914 (patch)
tree: 9f8ee61efaefa9fedc1c79d7ff9a432cf0d3b96d /tests
parent: 2b88c6a83f854c3e166774d2855491de6e230d03 (diff)
download: dotemacs-94ee5e4fac213de915a27906e4e0861e86de8914.tar.gz
dotemacs-94ee5e4fac213de915a27906e4e0861e86de8914.zip
1 files changed, 71 insertions, 75 deletions
diff --git a/tests/test-lorem-optimum-benchmark.el b/tests/test-lorem-optimum-benchmark.el
index 6b2f0163..22219b92 100644
--- a/tests/test-lorem-optimum-benchmark.el
+++ b/tests/test-lorem-optimum-benchmark.el
@@ -1,19 +1,18 @@
-;;; test-lorem-optimum-benchmark.el --- Performance tests for lorem-optimum.el -*- lexical-binding: t; -*-
+;;; test-lorem-optimum-benchmark.el --- Performance benchmarks for lorem-optimum.el -*- lexical-binding: t; -*-
 
 ;;; Commentary:
-;; Benchmark and performance tests for the Markov chain implementation.
+;; Benchmarks for the order-two Markov chain in `lorem-optimum.el'.
 ;;
-;; These tests measure:
-;; - Learning time scaling with input size
-;; - Multiple learning operations (exposes key rebuild overhead)
-;; - Generation time scaling
-;; - Memory usage (hash table growth)
+;; Every test here is tagged `:perf', so `make test', `make coverage', and the
+;; PostToolUse test hook skip them.  Run them deliberately:
 ;;
-;; Performance baseline targets (on modern hardware):
-;; - Learn 1000 words: < 10ms
-;; - Learn 10,000 words: < 100ms
-;; - 100 learn operations of 100 words each: < 500ms (current bottleneck!)
-;; - Generate 100 words: < 5ms
+;;   make benchmark
+;;
+;; The tests print timings (learning, generation, tokenization, hash-table
+;; growth) for eyeballing.  The only assertions are machine-independent
+;; *ratios*: `benchmark-learn-scaling' fails if learn time grows faster than
+;; roughly linearly with input size.  A slow CI runner or an old laptop won't
+;; trip that — an O(N^2) regression will.
 
 ;;; Code:
 
@@ -53,43 +52,39 @@
 
 ;;; Learning Performance Tests
 
-(ert-deftest benchmark-learn-1k-words ()
-  "Benchmark learning 1000 words."
-  (let* ((text (generate-test-text 1000))
-         (chain (cj/markov-chain-create))
-         (time (benchmark-time
-                (lambda () (cj/markov-learn chain text)))))
-    (benchmark-report "Learn 1K words" time)
-    (should (< time 50.0))))  ; Should be < 50ms
-
-(ert-deftest benchmark-learn-10k-words ()
-  "Benchmark learning 10,000 words."
-  (let* ((text (generate-test-text 10000))
-         (chain (cj/markov-chain-create))
-         (time (benchmark-time
-                (lambda () (cj/markov-learn chain text)))))
-    (benchmark-report "Learn 10K words" time)
-    (should (< time 500.0))))  ; Should be < 500ms
-
-(ert-deftest benchmark-learn-100k-words ()
-  "Benchmark learning 100,000 words (stress test)."
-  (let* ((text (generate-test-text 100000))
-         (chain (cj/markov-chain-create))
-         (time (benchmark-time
-                (lambda () (cj/markov-learn chain text)))))
-    (benchmark-report "Learn 100K words" time)
-    ;; This may be slow due to key rebuild
-    (message "Hash table size: %d bigrams"
-             (hash-table-count (cj/markov-chain-map chain)))
-    (should (< time 5000.0))))
-
-;;; Multiple Learning Operations (Exposes Quadratic Behavior)
+(ert-deftest benchmark-learn-scaling ()
+  "Learn time scales no worse than ~linearly with input size.
+Reports 1K/10K/100K learn times and the hash-table size, then asserts each
+10x jump in input costs less than 40x the time — generous enough that GC
+pauses and a slow machine pass, tight enough that O(N^2) fails."
+  :tags '(:perf)
+  (let* ((t1k (benchmark-time
+               (lambda () (cj/markov-learn (cj/markov-chain-create)
+                                           (generate-test-text 1000)))))
+         (t10k (benchmark-time
+                (lambda () (cj/markov-learn (cj/markov-chain-create)
+                                            (generate-test-text 10000)))))
+         (chain-100k (cj/markov-chain-create))
+         (t100k (benchmark-time
+                 (lambda () (cj/markov-learn chain-100k
+                                             (generate-test-text 100000))))))
+    (benchmark-report "Learn 1K words" t1k)
+    (benchmark-report "Learn 10K words" t10k)
+    (benchmark-report "Learn 100K words" t100k)
+    (message "Hash table size after 100K words: %d bigrams"
+             (hash-table-count (cj/markov-chain-map chain-100k)))
+    (should (> t1k 0.0))
+    (should (< t10k (* 40.0 t1k)))
+    (should (< t100k (* 40.0 t10k)))))
+
+;;; Multiple Learning Operations
 
 (ert-deftest benchmark-multiple-learns-10x100 ()
-  "Benchmark 10 learn operations of 100 words each."
+  "Report 10 learn operations of 100 words each."
+  :tags '(:perf)
   (let ((chain (cj/markov-chain-create))
         (times '()))
-    (dotimes (i 10)
+    (dotimes (_ 10)
       (let* ((text (generate-test-text 100))
              (time (benchmark-time
                     (lambda () (cj/markov-learn chain text)))))
@@ -100,12 +95,14 @@
       (benchmark-report "10x learn 100 words - TOTAL" total)
       (benchmark-report "10x learn 100 words - AVG" avg)
       (benchmark-report "10x learn 100 words - MAX" max-time)
-      (message "Times: %S" (nreverse times))
-      ;; Note: Watch if later operations are slower (quadratic behavior)
-      (should (< total 100.0)))))  ; Total should be < 100ms
+      (message "Times: %S" (nreverse times)))))
 
 (ert-deftest benchmark-multiple-learns-100x100 ()
-  "Benchmark 100 learn operations of 100 words each (key rebuild overhead)."
+  "Report 100 learn operations of 100 words each, sampling for slowdown.
+With the O(1) word-vector access and lazy key rebuild, per-chunk time should
+stay flat across the run; a growing first-10-vs-last-10 gap means a
+regression to quadratic behavior."
+  :tags '(:perf)
   (let ((chain (cj/markov-chain-create))
         (times '())
         (measurements '()))
@@ -119,6 +116,8 @@
           (push (cons i time) measurements))))
     (let ((total (apply #'+ times))
           (avg (/ (apply #'+ times) 100.0))
+          ;; `times' is built with `push', so the tail holds the first
+          ;; iterations and the head holds the last ones.
           (first-10-avg (/ (apply #'+ (last times 10)) 10.0))
           (last-10-avg (/ (apply #'+ (seq-take times 10)) 10.0)))
       (benchmark-report "100x learn 100 words - TOTAL" total)
@@ -128,39 +127,36 @@
       (message "Sampled times (iteration, ms): %S" (nreverse measurements))
       (message "Hash table size: %d bigrams"
                (hash-table-count (cj/markov-chain-map chain)))
-      ;; This exposes the quadratic behavior: last operations much slower
       (when (> last-10-avg (* 2.0 first-10-avg))
-        (message "WARNING: Learning slows down significantly over time!")
-        (message "  First 10 avg: %.2f ms" first-10-avg)
-        (message "  Last 10 avg: %.2f ms" last-10-avg)
-        (message "  Ratio: %.1fx slower" (/ last-10-avg first-10-avg))))))
+        (message "WARNING: learning slows down over time! %.1fx (%.2f -> %.2f ms)"
+                 (/ last-10-avg first-10-avg) first-10-avg last-10-avg)))))
 
 ;;; Generation Performance Tests
 
 (ert-deftest benchmark-generate-100-words ()
-  "Benchmark generating 100 words."
-  (let* ((text (generate-test-text 1000))
-         (chain (cj/markov-chain-create)))
-    (cj/markov-learn chain text)
-    (let ((time (benchmark-time
-                 (lambda () (cj/markov-generate chain 100)))))
-      (benchmark-report "Generate 100 words" time)
-      (should (< time 30.0)))))  ; Should be < 30ms
+  "Report time to generate 100 words."
+  :tags '(:perf)
+  (let ((chain (cj/markov-chain-create)))
+    (cj/markov-learn chain (generate-test-text 1000))
+    (benchmark-report "Generate 100 words"
+                      (benchmark-time
+                       (lambda () (cj/markov-generate chain 100))))))
 
 ;;; Tokenization Performance Tests
 
 (ert-deftest benchmark-tokenize-10k-words ()
-  "Benchmark tokenizing 10,000 words."
-  (let* ((text (generate-test-text 10000))
-         (time (benchmark-time
-                (lambda () (cj/markov-tokenize text)))))
-    (benchmark-report "Tokenize 10K words" time)
-    (should (< time 50.0))))  ; Tokenization should be fast
+  "Report time to tokenize 10,000 words."
+  :tags '(:perf)
+  (let ((text (generate-test-text 10000)))
+    (benchmark-report "Tokenize 10K words"
+                      (benchmark-time
+                       (lambda () (cj/markov-tokenize text))))))
 
 ;;; Memory/Size Tests
 
 (ert-deftest benchmark-chain-growth ()
-  "Measure hash table growth with increasing input."
+  "Report hash-table growth with increasing input."
+  :tags '(:perf)
   (let ((chain (cj/markov-chain-create))
         (sizes '()))
     (dolist (word-count '(100 500 1000 5000 10000))
@@ -174,7 +170,8 @@
 ;;; Comparison: Tokenization vs Learning
 
 (ert-deftest benchmark-tokenize-vs-learn ()
-  "Compare tokenization time to total learning time."
+  "Report tokenization time as a fraction of total learning time."
+  :tags '(:perf)
   (let* ((text (generate-test-text 5000))
          (tokenize-time (benchmark-time
                          (lambda () (cj/markov-tokenize text))))
@@ -189,23 +186,22 @@
 ;;; Real-world Scenario
 
 (ert-deftest benchmark-realistic-usage ()
-  "Benchmark realistic usage: learn from multiple sources, generate paragraphs."
+  "Report a realistic mix: learn from several sources, then generate paragraphs."
+  :tags '(:perf)
   (let ((chain (cj/markov-chain-create))
         (learn-total 0.0)
         (gen-total 0.0))
     ;; Simulate learning from 10 different sources
-    (dotimes (i 10)
+    (dotimes (_ 10)
       (let ((text (generate-test-text 500)))
         (setq learn-total
               (+ learn-total
                  (benchmark-time (lambda () (cj/markov-learn chain text)))))))
-
     ;; Generate 5 paragraphs
-    (dotimes (i 5)
+    (dotimes (_ 5)
       (setq gen-total
             (+ gen-total
                (benchmark-time (lambda () (cj/markov-generate chain 50))))))
-
     (benchmark-report "Realistic: 10 learns (500 words each)" learn-total)
     (benchmark-report "Realistic: 5 generations (50 words each)" gen-total)
     (benchmark-report "Realistic: TOTAL TIME" (+ learn-total gen-total))
author	Craig Jennings <c@cjennings.net>	2026-05-12 00:09:15 -0500
committer	Craig Jennings <c@cjennings.net>	2026-05-12 00:09:15 -0500
commit	94ee5e4fac213de915a27906e4e0861e86de8914 (patch)
tree	9f8ee61efaefa9fedc1c79d7ff9a432cf0d3b96d /tests
parent	2b88c6a83f854c3e166774d2855491de6e230d03 (diff)
download	dotemacs-94ee5e4fac213de915a27906e4e0861e86de8914.tar.gz dotemacs-94ee5e4fac213de915a27906e4e0861e86de8914.zip