diff options
| -rw-r--r-- | tests/test-lorem-optimum-benchmark.el | 146 |
1 files changed, 71 insertions, 75 deletions
diff --git a/tests/test-lorem-optimum-benchmark.el b/tests/test-lorem-optimum-benchmark.el index 6b2f0163..22219b92 100644 --- a/tests/test-lorem-optimum-benchmark.el +++ b/tests/test-lorem-optimum-benchmark.el @@ -1,19 +1,18 @@ -;;; test-lorem-optimum-benchmark.el --- Performance tests for lorem-optimum.el -*- lexical-binding: t; -*- +;;; test-lorem-optimum-benchmark.el --- Performance benchmarks for lorem-optimum.el -*- lexical-binding: t; -*- ;;; Commentary: -;; Benchmark and performance tests for the Markov chain implementation. +;; Benchmarks for the order-two Markov chain in `lorem-optimum.el'. ;; -;; These tests measure: -;; - Learning time scaling with input size -;; - Multiple learning operations (exposes key rebuild overhead) -;; - Generation time scaling -;; - Memory usage (hash table growth) +;; Every test here is tagged `:perf', so `make test', `make coverage', and the +;; PostToolUse test hook skip them. Run them deliberately: ;; -;; Performance baseline targets (on modern hardware): -;; - Learn 1000 words: < 10ms -;; - Learn 10,000 words: < 100ms -;; - 100 learn operations of 100 words each: < 500ms (current bottleneck!) -;; - Generate 100 words: < 5ms +;; make benchmark +;; +;; The tests print timings (learning, generation, tokenization, hash-table +;; growth) for eyeballing. The only assertions are machine-independent +;; *ratios*: `benchmark-learn-scaling' fails if learn time grows faster than +;; roughly linearly with input size. A slow CI runner or an old laptop won't +;; trip that — an O(N^2) regression will. ;;; Code: @@ -53,43 +52,39 @@ ;;; Learning Performance Tests -(ert-deftest benchmark-learn-1k-words () - "Benchmark learning 1000 words." - (let* ((text (generate-test-text 1000)) - (chain (cj/markov-chain-create)) - (time (benchmark-time - (lambda () (cj/markov-learn chain text))))) - (benchmark-report "Learn 1K words" time) - (should (< time 50.0)))) ; Should be < 50ms - -(ert-deftest benchmark-learn-10k-words () - "Benchmark learning 10,000 words." - (let* ((text (generate-test-text 10000)) - (chain (cj/markov-chain-create)) - (time (benchmark-time - (lambda () (cj/markov-learn chain text))))) - (benchmark-report "Learn 10K words" time) - (should (< time 500.0)))) ; Should be < 500ms - -(ert-deftest benchmark-learn-100k-words () - "Benchmark learning 100,000 words (stress test)." - (let* ((text (generate-test-text 100000)) - (chain (cj/markov-chain-create)) - (time (benchmark-time - (lambda () (cj/markov-learn chain text))))) - (benchmark-report "Learn 100K words" time) - ;; This may be slow due to key rebuild - (message "Hash table size: %d bigrams" - (hash-table-count (cj/markov-chain-map chain))) - (should (< time 5000.0)))) - -;;; Multiple Learning Operations (Exposes Quadratic Behavior) +(ert-deftest benchmark-learn-scaling () + "Learn time scales no worse than ~linearly with input size. +Reports 1K/10K/100K learn times and the hash-table size, then asserts each +10x jump in input costs less than 40x the time — generous enough that GC +pauses and a slow machine pass, tight enough that O(N^2) fails." + :tags '(:perf) + (let* ((t1k (benchmark-time + (lambda () (cj/markov-learn (cj/markov-chain-create) + (generate-test-text 1000))))) + (t10k (benchmark-time + (lambda () (cj/markov-learn (cj/markov-chain-create) + (generate-test-text 10000))))) + (chain-100k (cj/markov-chain-create)) + (t100k (benchmark-time + (lambda () (cj/markov-learn chain-100k + (generate-test-text 100000)))))) + (benchmark-report "Learn 1K words" t1k) + (benchmark-report "Learn 10K words" t10k) + (benchmark-report "Learn 100K words" t100k) + (message "Hash table size after 100K words: %d bigrams" + (hash-table-count (cj/markov-chain-map chain-100k))) + (should (> t1k 0.0)) + (should (< t10k (* 40.0 t1k))) + (should (< t100k (* 40.0 t10k))))) + +;;; Multiple Learning Operations (ert-deftest benchmark-multiple-learns-10x100 () - "Benchmark 10 learn operations of 100 words each." + "Report 10 learn operations of 100 words each." + :tags '(:perf) (let ((chain (cj/markov-chain-create)) (times '())) - (dotimes (i 10) + (dotimes (_ 10) (let* ((text (generate-test-text 100)) (time (benchmark-time (lambda () (cj/markov-learn chain text))))) @@ -100,12 +95,14 @@ (benchmark-report "10x learn 100 words - TOTAL" total) (benchmark-report "10x learn 100 words - AVG" avg) (benchmark-report "10x learn 100 words - MAX" max-time) - (message "Times: %S" (nreverse times)) - ;; Note: Watch if later operations are slower (quadratic behavior) - (should (< total 100.0))))) ; Total should be < 100ms + (message "Times: %S" (nreverse times))))) (ert-deftest benchmark-multiple-learns-100x100 () - "Benchmark 100 learn operations of 100 words each (key rebuild overhead)." + "Report 100 learn operations of 100 words each, sampling for slowdown. +With the O(1) word-vector access and lazy key rebuild, per-chunk time should +stay flat across the run; a growing first-10-vs-last-10 gap means a +regression to quadratic behavior." + :tags '(:perf) (let ((chain (cj/markov-chain-create)) (times '()) (measurements '())) @@ -119,6 +116,8 @@ (push (cons i time) measurements)))) (let ((total (apply #'+ times)) (avg (/ (apply #'+ times) 100.0)) + ;; `times' is built with `push', so the tail holds the first + ;; iterations and the head holds the last ones. (first-10-avg (/ (apply #'+ (last times 10)) 10.0)) (last-10-avg (/ (apply #'+ (seq-take times 10)) 10.0))) (benchmark-report "100x learn 100 words - TOTAL" total) @@ -128,39 +127,36 @@ (message "Sampled times (iteration, ms): %S" (nreverse measurements)) (message "Hash table size: %d bigrams" (hash-table-count (cj/markov-chain-map chain))) - ;; This exposes the quadratic behavior: last operations much slower (when (> last-10-avg (* 2.0 first-10-avg)) - (message "WARNING: Learning slows down significantly over time!") - (message " First 10 avg: %.2f ms" first-10-avg) - (message " Last 10 avg: %.2f ms" last-10-avg) - (message " Ratio: %.1fx slower" (/ last-10-avg first-10-avg)))))) + (message "WARNING: learning slows down over time! %.1fx (%.2f -> %.2f ms)" + (/ last-10-avg first-10-avg) first-10-avg last-10-avg))))) ;;; Generation Performance Tests (ert-deftest benchmark-generate-100-words () - "Benchmark generating 100 words." - (let* ((text (generate-test-text 1000)) - (chain (cj/markov-chain-create))) - (cj/markov-learn chain text) - (let ((time (benchmark-time - (lambda () (cj/markov-generate chain 100))))) - (benchmark-report "Generate 100 words" time) - (should (< time 30.0))))) ; Should be < 30ms + "Report time to generate 100 words." + :tags '(:perf) + (let ((chain (cj/markov-chain-create))) + (cj/markov-learn chain (generate-test-text 1000)) + (benchmark-report "Generate 100 words" + (benchmark-time + (lambda () (cj/markov-generate chain 100)))))) ;;; Tokenization Performance Tests (ert-deftest benchmark-tokenize-10k-words () - "Benchmark tokenizing 10,000 words." - (let* ((text (generate-test-text 10000)) - (time (benchmark-time - (lambda () (cj/markov-tokenize text))))) - (benchmark-report "Tokenize 10K words" time) - (should (< time 50.0)))) ; Tokenization should be fast + "Report time to tokenize 10,000 words." + :tags '(:perf) + (let ((text (generate-test-text 10000))) + (benchmark-report "Tokenize 10K words" + (benchmark-time + (lambda () (cj/markov-tokenize text)))))) ;;; Memory/Size Tests (ert-deftest benchmark-chain-growth () - "Measure hash table growth with increasing input." + "Report hash-table growth with increasing input." + :tags '(:perf) (let ((chain (cj/markov-chain-create)) (sizes '())) (dolist (word-count '(100 500 1000 5000 10000)) @@ -174,7 +170,8 @@ ;;; Comparison: Tokenization vs Learning (ert-deftest benchmark-tokenize-vs-learn () - "Compare tokenization time to total learning time." + "Report tokenization time as a fraction of total learning time." + :tags '(:perf) (let* ((text (generate-test-text 5000)) (tokenize-time (benchmark-time (lambda () (cj/markov-tokenize text)))) @@ -189,23 +186,22 @@ ;;; Real-world Scenario (ert-deftest benchmark-realistic-usage () - "Benchmark realistic usage: learn from multiple sources, generate paragraphs." + "Report a realistic mix: learn from several sources, then generate paragraphs." + :tags '(:perf) (let ((chain (cj/markov-chain-create)) (learn-total 0.0) (gen-total 0.0)) ;; Simulate learning from 10 different sources - (dotimes (i 10) + (dotimes (_ 10) (let ((text (generate-test-text 500))) (setq learn-total (+ learn-total (benchmark-time (lambda () (cj/markov-learn chain text))))))) - ;; Generate 5 paragraphs - (dotimes (i 5) + (dotimes (_ 5) (setq gen-total (+ gen-total (benchmark-time (lambda () (cj/markov-generate chain 50)))))) - (benchmark-report "Realistic: 10 learns (500 words each)" learn-total) (benchmark-report "Realistic: 5 generations (50 words each)" gen-total) (benchmark-report "Realistic: TOTAL TIME" (+ learn-total gen-total)) |
