summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
Diffstat (limited to 'tests')
-rw-r--r--tests/test-lorem-optimum-benchmark.el146
1 files changed, 71 insertions, 75 deletions
diff --git a/tests/test-lorem-optimum-benchmark.el b/tests/test-lorem-optimum-benchmark.el
index 6b2f0163..22219b92 100644
--- a/tests/test-lorem-optimum-benchmark.el
+++ b/tests/test-lorem-optimum-benchmark.el
@@ -1,19 +1,18 @@
-;;; test-lorem-optimum-benchmark.el --- Performance tests for lorem-optimum.el -*- lexical-binding: t; -*-
+;;; test-lorem-optimum-benchmark.el --- Performance benchmarks for lorem-optimum.el -*- lexical-binding: t; -*-
;;; Commentary:
-;; Benchmark and performance tests for the Markov chain implementation.
+;; Benchmarks for the order-two Markov chain in `lorem-optimum.el'.
;;
-;; These tests measure:
-;; - Learning time scaling with input size
-;; - Multiple learning operations (exposes key rebuild overhead)
-;; - Generation time scaling
-;; - Memory usage (hash table growth)
+;; Every test here is tagged `:perf', so `make test', `make coverage', and the
+;; PostToolUse test hook skip them. Run them deliberately:
;;
-;; Performance baseline targets (on modern hardware):
-;; - Learn 1000 words: < 10ms
-;; - Learn 10,000 words: < 100ms
-;; - 100 learn operations of 100 words each: < 500ms (current bottleneck!)
-;; - Generate 100 words: < 5ms
+;; make benchmark
+;;
+;; The tests print timings (learning, generation, tokenization, hash-table
+;; growth) for eyeballing. The only assertions are machine-independent
+;; *ratios*: `benchmark-learn-scaling' fails if learn time grows faster than
+;; roughly linearly with input size. A slow CI runner or an old laptop won't
+;; trip that — an O(N^2) regression will.
;;; Code:
@@ -53,43 +52,39 @@
;;; Learning Performance Tests
-(ert-deftest benchmark-learn-1k-words ()
- "Benchmark learning 1000 words."
- (let* ((text (generate-test-text 1000))
- (chain (cj/markov-chain-create))
- (time (benchmark-time
- (lambda () (cj/markov-learn chain text)))))
- (benchmark-report "Learn 1K words" time)
- (should (< time 50.0)))) ; Should be < 50ms
-
-(ert-deftest benchmark-learn-10k-words ()
- "Benchmark learning 10,000 words."
- (let* ((text (generate-test-text 10000))
- (chain (cj/markov-chain-create))
- (time (benchmark-time
- (lambda () (cj/markov-learn chain text)))))
- (benchmark-report "Learn 10K words" time)
- (should (< time 500.0)))) ; Should be < 500ms
-
-(ert-deftest benchmark-learn-100k-words ()
- "Benchmark learning 100,000 words (stress test)."
- (let* ((text (generate-test-text 100000))
- (chain (cj/markov-chain-create))
- (time (benchmark-time
- (lambda () (cj/markov-learn chain text)))))
- (benchmark-report "Learn 100K words" time)
- ;; This may be slow due to key rebuild
- (message "Hash table size: %d bigrams"
- (hash-table-count (cj/markov-chain-map chain)))
- (should (< time 5000.0))))
-
-;;; Multiple Learning Operations (Exposes Quadratic Behavior)
+(ert-deftest benchmark-learn-scaling ()
+ "Learn time scales no worse than ~linearly with input size.
+Reports 1K/10K/100K learn times and the hash-table size, then asserts each
+10x jump in input costs less than 40x the time — generous enough that GC
+pauses and a slow machine pass, tight enough that O(N^2) fails."
+ :tags '(:perf)
+ (let* ((t1k (benchmark-time
+ (lambda () (cj/markov-learn (cj/markov-chain-create)
+ (generate-test-text 1000)))))
+ (t10k (benchmark-time
+ (lambda () (cj/markov-learn (cj/markov-chain-create)
+ (generate-test-text 10000)))))
+ (chain-100k (cj/markov-chain-create))
+ (t100k (benchmark-time
+ (lambda () (cj/markov-learn chain-100k
+ (generate-test-text 100000))))))
+ (benchmark-report "Learn 1K words" t1k)
+ (benchmark-report "Learn 10K words" t10k)
+ (benchmark-report "Learn 100K words" t100k)
+ (message "Hash table size after 100K words: %d bigrams"
+ (hash-table-count (cj/markov-chain-map chain-100k)))
+ (should (> t1k 0.0))
+ (should (< t10k (* 40.0 t1k)))
+ (should (< t100k (* 40.0 t10k)))))
+
+;;; Multiple Learning Operations
(ert-deftest benchmark-multiple-learns-10x100 ()
- "Benchmark 10 learn operations of 100 words each."
+ "Report 10 learn operations of 100 words each."
+ :tags '(:perf)
(let ((chain (cj/markov-chain-create))
(times '()))
- (dotimes (i 10)
+ (dotimes (_ 10)
(let* ((text (generate-test-text 100))
(time (benchmark-time
(lambda () (cj/markov-learn chain text)))))
@@ -100,12 +95,14 @@
(benchmark-report "10x learn 100 words - TOTAL" total)
(benchmark-report "10x learn 100 words - AVG" avg)
(benchmark-report "10x learn 100 words - MAX" max-time)
- (message "Times: %S" (nreverse times))
- ;; Note: Watch if later operations are slower (quadratic behavior)
- (should (< total 100.0))))) ; Total should be < 100ms
+ (message "Times: %S" (nreverse times)))))
(ert-deftest benchmark-multiple-learns-100x100 ()
- "Benchmark 100 learn operations of 100 words each (key rebuild overhead)."
+ "Report 100 learn operations of 100 words each, sampling for slowdown.
+With the O(1) word-vector access and lazy key rebuild, per-chunk time should
+stay flat across the run; a growing first-10-vs-last-10 gap means a
+regression to quadratic behavior."
+ :tags '(:perf)
(let ((chain (cj/markov-chain-create))
(times '())
(measurements '()))
@@ -119,6 +116,8 @@
(push (cons i time) measurements))))
(let ((total (apply #'+ times))
(avg (/ (apply #'+ times) 100.0))
+ ;; `times' is built with `push', so the tail holds the first
+ ;; iterations and the head holds the last ones.
(first-10-avg (/ (apply #'+ (last times 10)) 10.0))
(last-10-avg (/ (apply #'+ (seq-take times 10)) 10.0)))
(benchmark-report "100x learn 100 words - TOTAL" total)
@@ -128,39 +127,36 @@
(message "Sampled times (iteration, ms): %S" (nreverse measurements))
(message "Hash table size: %d bigrams"
(hash-table-count (cj/markov-chain-map chain)))
- ;; This exposes the quadratic behavior: last operations much slower
(when (> last-10-avg (* 2.0 first-10-avg))
- (message "WARNING: Learning slows down significantly over time!")
- (message " First 10 avg: %.2f ms" first-10-avg)
- (message " Last 10 avg: %.2f ms" last-10-avg)
- (message " Ratio: %.1fx slower" (/ last-10-avg first-10-avg))))))
+ (message "WARNING: learning slows down over time! %.1fx (%.2f -> %.2f ms)"
+ (/ last-10-avg first-10-avg) first-10-avg last-10-avg)))))
;;; Generation Performance Tests
(ert-deftest benchmark-generate-100-words ()
- "Benchmark generating 100 words."
- (let* ((text (generate-test-text 1000))
- (chain (cj/markov-chain-create)))
- (cj/markov-learn chain text)
- (let ((time (benchmark-time
- (lambda () (cj/markov-generate chain 100)))))
- (benchmark-report "Generate 100 words" time)
- (should (< time 30.0))))) ; Should be < 30ms
+ "Report time to generate 100 words."
+ :tags '(:perf)
+ (let ((chain (cj/markov-chain-create)))
+ (cj/markov-learn chain (generate-test-text 1000))
+ (benchmark-report "Generate 100 words"
+ (benchmark-time
+ (lambda () (cj/markov-generate chain 100))))))
;;; Tokenization Performance Tests
(ert-deftest benchmark-tokenize-10k-words ()
- "Benchmark tokenizing 10,000 words."
- (let* ((text (generate-test-text 10000))
- (time (benchmark-time
- (lambda () (cj/markov-tokenize text)))))
- (benchmark-report "Tokenize 10K words" time)
- (should (< time 50.0)))) ; Tokenization should be fast
+ "Report time to tokenize 10,000 words."
+ :tags '(:perf)
+ (let ((text (generate-test-text 10000)))
+ (benchmark-report "Tokenize 10K words"
+ (benchmark-time
+ (lambda () (cj/markov-tokenize text))))))
;;; Memory/Size Tests
(ert-deftest benchmark-chain-growth ()
- "Measure hash table growth with increasing input."
+ "Report hash-table growth with increasing input."
+ :tags '(:perf)
(let ((chain (cj/markov-chain-create))
(sizes '()))
(dolist (word-count '(100 500 1000 5000 10000))
@@ -174,7 +170,8 @@
;;; Comparison: Tokenization vs Learning
(ert-deftest benchmark-tokenize-vs-learn ()
- "Compare tokenization time to total learning time."
+ "Report tokenization time as a fraction of total learning time."
+ :tags '(:perf)
(let* ((text (generate-test-text 5000))
(tokenize-time (benchmark-time
(lambda () (cj/markov-tokenize text))))
@@ -189,23 +186,22 @@
;;; Real-world Scenario
(ert-deftest benchmark-realistic-usage ()
- "Benchmark realistic usage: learn from multiple sources, generate paragraphs."
+ "Report a realistic mix: learn from several sources, then generate paragraphs."
+ :tags '(:perf)
(let ((chain (cj/markov-chain-create))
(learn-total 0.0)
(gen-total 0.0))
;; Simulate learning from 10 different sources
- (dotimes (i 10)
+ (dotimes (_ 10)
(let ((text (generate-test-text 500)))
(setq learn-total
(+ learn-total
(benchmark-time (lambda () (cj/markov-learn chain text)))))))
-
;; Generate 5 paragraphs
- (dotimes (i 5)
+ (dotimes (_ 5)
(setq gen-total
(+ gen-total
(benchmark-time (lambda () (cj/markov-generate chain 50))))))
-
(benchmark-report "Realistic: 10 learns (500 words each)" learn-total)
(benchmark-report "Realistic: 5 generations (50 words each)" gen-total)
(benchmark-report "Realistic: TOTAL TIME" (+ learn-total gen-total))