From 1abae87acaba85453ef9b7e1eafe0d6e8e22c4e5 Mon Sep 17 00:00:00 2001 From: Craig Jennings Date: Fri, 8 May 2026 08:10:39 -0500 Subject: feat(scripts): add readability tool + pre-warm textstat in deps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds scripts/readability — a Python tool that prints standard readability metrics (Flesch Reading Ease, Flesch-Kincaid Grade, Gunning Fog, SMOG, Coleman-Liau, ARI, Dale-Chall, Linsear-Write) for one input file or as a side-by-side comparison of two. Self-contained via PEP 723 inline metadata: textstat is declared as the script's only dependency, and the `#!/usr/bin/env -S uv run --quiet --script` shebang lets uv resolve it on each invocation. The Makefile `deps` target now also pre-warms textstat in uv's cache so the first interactive run is fast. --- scripts/readability | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100755 scripts/readability (limited to 'scripts') diff --git a/scripts/readability b/scripts/readability new file mode 100755 index 0000000..cdae627 --- /dev/null +++ b/scripts/readability @@ -0,0 +1,109 @@ +#!/usr/bin/env -S uv run --quiet --script +# /// script +# requires-python = ">=3.10" +# dependencies = ["textstat"] +# /// +"""Compute readability metrics for one or two text inputs. + +Usage: + readability FILE # single-file metrics + readability FILE1 FILE2 # side-by-side comparison + +Notes: + Each input is read as plain text. PDF/HTML/org-mode markup is passed + through unchanged — strip first if you want a clean reading. + First run downloads textstat (~2 MB) via uv's cache; subsequent runs + are fast. +""" + +from __future__ import annotations + +import pathlib +import sys + +import textstat # type: ignore[import-not-found] + + +METRICS = [ + ("Words", lambda t: textstat.lexicon_count(t, removepunct=True)), + ("Sentences", textstat.sentence_count), + ("Avg sentence length", lambda t: round(textstat.words_per_sentence(t), 1)), + ("Avg syllables/word", lambda t: round(textstat.avg_syllables_per_word(t), 2)), + ("Flesch Reading Ease", lambda t: round(textstat.flesch_reading_ease(t), 1)), + ("Flesch-Kincaid Grade", lambda t: round(textstat.flesch_kincaid_grade(t), 1)), + ("Gunning Fog", lambda t: round(textstat.gunning_fog(t), 1)), + ("SMOG Index", lambda t: round(textstat.smog_index(t), 1)), + ("Coleman-Liau", lambda t: round(textstat.coleman_liau_index(t), 1)), + ("ARI", lambda t: round(textstat.automated_readability_index(t), 1)), + ("Dale-Chall", lambda t: round(textstat.dale_chall_readability_score(t), 1)), + ("Linsear-Write", lambda t: round(textstat.linsear_write_formula(t), 1)), + ("Difficult words", textstat.difficult_words), +] + +SCALE_NOTES = """ +Scale notes: + Flesch Reading Ease: 100 (very easy) → 0 (very confusing); academic prose ~30 + Flesch-Kincaid Grade / Gunning Fog / SMOG / Coleman-Liau / ARI / Linsear-Write + → years of formal education needed to comprehend (US grade level) + Dale-Chall: <5 = grade 4 reader; 7-8 = avg adult; 9+ = college; 10+ = grad +""".strip() + + +def compute(path: str) -> dict: + """Return {metric_name: value} for the file at path.""" + text = pathlib.Path(path).read_text() + return {name: fn(text) for name, fn in METRICS} + + +def label_for(path: str) -> str: + """Use the file's stem (no extension) as a short column label.""" + return pathlib.Path(path).stem + + +def print_single(path: str) -> None: + metrics = compute(path) + label_w = max(len(k) for k in metrics) + col = label_for(path) + col_w = max(14, len(col)) + print(f"{'Metric':<{label_w}} {col:>{col_w}}") + print("-" * (label_w + 2 + col_w)) + for name, value in metrics.items(): + print(f"{name:<{label_w}} {str(value):>{col_w}}") + print() + print(SCALE_NOTES) + + +def print_compare(path_a: str, path_b: str) -> None: + a = compute(path_a) + b = compute(path_b) + label_w = max(len(k) for k in a) + name_a = label_for(path_a) + name_b = label_for(path_b) + col_w = max(12, len(name_a), len(name_b)) + print(f"{'Metric':<{label_w}} {name_a:>{col_w}} {name_b:>{col_w}} {'Δ':>10}") + print("-" * (label_w + 2 + col_w + 2 + col_w + 2 + 10)) + for name in a: + va, vb = a[name], b[name] + if isinstance(va, (int, float)) and isinstance(vb, (int, float)): + delta_str = f"{round(vb - va, 1):+}" + else: + delta_str = "—" + print(f"{name:<{label_w}} {str(va):>{col_w}} {str(vb):>{col_w}} {delta_str:>10}") + print() + print(SCALE_NOTES) + + +def main() -> int: + args = sys.argv[1:] + if len(args) == 1: + print_single(args[0]) + elif len(args) == 2: + print_compare(args[0], args[1]) + else: + sys.stderr.write(__doc__ or "") + return 2 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) -- cgit v1.2.3