From 1abae87acaba85453ef9b7e1eafe0d6e8e22c4e5 Mon Sep 17 00:00:00 2001
From: Craig Jennings <c@cjennings.net>
Date: Fri, 8 May 2026 08:10:39 -0500
Subject: feat(scripts): add readability tool + pre-warm textstat in deps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds scripts/readability — a Python tool that prints standard readability metrics (Flesch Reading Ease, Flesch-Kincaid Grade, Gunning Fog, SMOG, Coleman-Liau, ARI, Dale-Chall, Linsear-Write) for one input file or as a side-by-side comparison of two.

Self-contained via PEP 723 inline metadata: textstat is declared as the script's only dependency, and the `#!/usr/bin/env -S uv run --quiet --script` shebang lets uv resolve it on each invocation.

The Makefile `deps` target now also pre-warms textstat in uv's cache so the first interactive run is fast.
---
 scripts/readability | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100755 scripts/readability

(limited to 'scripts')

diff --git a/scripts/readability b/scripts/readability
new file mode 100755
index 0000000..cdae627
--- /dev/null
+++ b/scripts/readability
@@ -0,0 +1,109 @@
+#!/usr/bin/env -S uv run --quiet --script
+# /// script
+# requires-python = ">=3.10"
+# dependencies = ["textstat"]
+# ///
+"""Compute readability metrics for one or two text inputs.
+
+Usage:
+    readability FILE                  # single-file metrics
+    readability FILE1 FILE2           # side-by-side comparison
+
+Notes:
+    Each input is read as plain text. PDF/HTML/org-mode markup is passed
+    through unchanged — strip first if you want a clean reading.
+    First run downloads textstat (~2 MB) via uv's cache; subsequent runs
+    are fast.
+"""
+
+from __future__ import annotations
+
+import pathlib
+import sys
+
+import textstat  # type: ignore[import-not-found]
+
+
+METRICS = [
+    ("Words", lambda t: textstat.lexicon_count(t, removepunct=True)),
+    ("Sentences", textstat.sentence_count),
+    ("Avg sentence length", lambda t: round(textstat.words_per_sentence(t), 1)),
+    ("Avg syllables/word", lambda t: round(textstat.avg_syllables_per_word(t), 2)),
+    ("Flesch Reading Ease", lambda t: round(textstat.flesch_reading_ease(t), 1)),
+    ("Flesch-Kincaid Grade", lambda t: round(textstat.flesch_kincaid_grade(t), 1)),
+    ("Gunning Fog", lambda t: round(textstat.gunning_fog(t), 1)),
+    ("SMOG Index", lambda t: round(textstat.smog_index(t), 1)),
+    ("Coleman-Liau", lambda t: round(textstat.coleman_liau_index(t), 1)),
+    ("ARI", lambda t: round(textstat.automated_readability_index(t), 1)),
+    ("Dale-Chall", lambda t: round(textstat.dale_chall_readability_score(t), 1)),
+    ("Linsear-Write", lambda t: round(textstat.linsear_write_formula(t), 1)),
+    ("Difficult words", textstat.difficult_words),
+]
+
+SCALE_NOTES = """
+Scale notes:
+  Flesch Reading Ease: 100 (very easy) → 0 (very confusing); academic prose ~30
+  Flesch-Kincaid Grade / Gunning Fog / SMOG / Coleman-Liau / ARI / Linsear-Write
+    → years of formal education needed to comprehend (US grade level)
+  Dale-Chall: <5 = grade 4 reader; 7-8 = avg adult; 9+ = college; 10+ = grad
+""".strip()
+
+
+def compute(path: str) -> dict:
+    """Return {metric_name: value} for the file at path."""
+    text = pathlib.Path(path).read_text()
+    return {name: fn(text) for name, fn in METRICS}
+
+
+def label_for(path: str) -> str:
+    """Use the file's stem (no extension) as a short column label."""
+    return pathlib.Path(path).stem
+
+
+def print_single(path: str) -> None:
+    metrics = compute(path)
+    label_w = max(len(k) for k in metrics)
+    col = label_for(path)
+    col_w = max(14, len(col))
+    print(f"{'Metric':<{label_w}}  {col:>{col_w}}")
+    print("-" * (label_w + 2 + col_w))
+    for name, value in metrics.items():
+        print(f"{name:<{label_w}}  {str(value):>{col_w}}")
+    print()
+    print(SCALE_NOTES)
+
+
+def print_compare(path_a: str, path_b: str) -> None:
+    a = compute(path_a)
+    b = compute(path_b)
+    label_w = max(len(k) for k in a)
+    name_a = label_for(path_a)
+    name_b = label_for(path_b)
+    col_w = max(12, len(name_a), len(name_b))
+    print(f"{'Metric':<{label_w}}  {name_a:>{col_w}}  {name_b:>{col_w}}  {'Δ':>10}")
+    print("-" * (label_w + 2 + col_w + 2 + col_w + 2 + 10))
+    for name in a:
+        va, vb = a[name], b[name]
+        if isinstance(va, (int, float)) and isinstance(vb, (int, float)):
+            delta_str = f"{round(vb - va, 1):+}"
+        else:
+            delta_str = "—"
+        print(f"{name:<{label_w}}  {str(va):>{col_w}}  {str(vb):>{col_w}}  {delta_str:>10}")
+    print()
+    print(SCALE_NOTES)
+
+
+def main() -> int:
+    args = sys.argv[1:]
+    if len(args) == 1:
+        print_single(args[0])
+    elif len(args) == 2:
+        print_compare(args[0], args[1])
+    else:
+        sys.stderr.write(__doc__ or "")
+        return 2
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
-- 
cgit v1.2.3