diff options
| author | Craig Jennings <c@cjennings.net> | 2026-05-08 08:10:39 -0500 |
|---|---|---|
| committer | Craig Jennings <c@cjennings.net> | 2026-05-08 08:10:39 -0500 |
| commit | 1abae87acaba85453ef9b7e1eafe0d6e8e22c4e5 (patch) | |
| tree | 9b07ac98e18b82fbacae20540fb7dd6b9681a4d8 | |
| parent | 0f6ed33ab7f183f86763866f35dcf25c8ba27330 (diff) | |
| download | rulesets-1abae87acaba85453ef9b7e1eafe0d6e8e22c4e5.tar.gz rulesets-1abae87acaba85453ef9b7e1eafe0d6e8e22c4e5.zip | |
feat(scripts): add readability tool + pre-warm textstat in deps
Adds scripts/readability — a Python tool that prints standard readability metrics (Flesch Reading Ease, Flesch-Kincaid Grade, Gunning Fog, SMOG, Coleman-Liau, ARI, Dale-Chall, Linsear-Write) for one input file or as a side-by-side comparison of two.
Self-contained via PEP 723 inline metadata: textstat is declared as the script's only dependency, and the `#!/usr/bin/env -S uv run --quiet --script` shebang lets uv resolve it on each invocation.
The Makefile `deps` target now also pre-warms textstat in uv's cache so the first interactive run is fast.
| -rw-r--r-- | Makefile | 7 | ||||
| -rwxr-xr-x | scripts/readability | 109 |
2 files changed, 116 insertions, 0 deletions
@@ -92,6 +92,13 @@ deps: ## Install required tools (claude, node, jq, fzf, ripgrep, emacs, playwrig { echo " emacs: installing..."; $(call install_pkg,emacs); } @command -v uv >/dev/null 2>&1 && echo " uv: installed ($$(uv --version | awk '{print $$NF}'))" || \ { echo " uv: installing..."; $(call install_pkg,uv); } + @# Pre-warm script-level Python deps (PEP 723 inline metadata in scripts/). + @# First-time invocations otherwise download their deps on demand; warming + @# the cache here keeps interactive use snappy. + @command -v uv >/dev/null 2>&1 \ + && uv run --quiet --with textstat python -c "" >/dev/null 2>&1 \ + && echo " textstat: cached (scripts/readability)" \ + || echo " textstat: skipped (uv missing or offline)" @if [ -d "$(CURDIR)/playwright-js" ]; then \ if [ -d "$(CURDIR)/playwright-js/node_modules/playwright" ]; then \ echo " playwright (js): installed (skill node_modules present)"; \ diff --git a/scripts/readability b/scripts/readability new file mode 100755 index 0000000..cdae627 --- /dev/null +++ b/scripts/readability @@ -0,0 +1,109 @@ +#!/usr/bin/env -S uv run --quiet --script +# /// script +# requires-python = ">=3.10" +# dependencies = ["textstat"] +# /// +"""Compute readability metrics for one or two text inputs. + +Usage: + readability FILE # single-file metrics + readability FILE1 FILE2 # side-by-side comparison + +Notes: + Each input is read as plain text. PDF/HTML/org-mode markup is passed + through unchanged — strip first if you want a clean reading. + First run downloads textstat (~2 MB) via uv's cache; subsequent runs + are fast. +""" + +from __future__ import annotations + +import pathlib +import sys + +import textstat # type: ignore[import-not-found] + + +METRICS = [ + ("Words", lambda t: textstat.lexicon_count(t, removepunct=True)), + ("Sentences", textstat.sentence_count), + ("Avg sentence length", lambda t: round(textstat.words_per_sentence(t), 1)), + ("Avg syllables/word", lambda t: round(textstat.avg_syllables_per_word(t), 2)), + ("Flesch Reading Ease", lambda t: round(textstat.flesch_reading_ease(t), 1)), + ("Flesch-Kincaid Grade", lambda t: round(textstat.flesch_kincaid_grade(t), 1)), + ("Gunning Fog", lambda t: round(textstat.gunning_fog(t), 1)), + ("SMOG Index", lambda t: round(textstat.smog_index(t), 1)), + ("Coleman-Liau", lambda t: round(textstat.coleman_liau_index(t), 1)), + ("ARI", lambda t: round(textstat.automated_readability_index(t), 1)), + ("Dale-Chall", lambda t: round(textstat.dale_chall_readability_score(t), 1)), + ("Linsear-Write", lambda t: round(textstat.linsear_write_formula(t), 1)), + ("Difficult words", textstat.difficult_words), +] + +SCALE_NOTES = """ +Scale notes: + Flesch Reading Ease: 100 (very easy) → 0 (very confusing); academic prose ~30 + Flesch-Kincaid Grade / Gunning Fog / SMOG / Coleman-Liau / ARI / Linsear-Write + → years of formal education needed to comprehend (US grade level) + Dale-Chall: <5 = grade 4 reader; 7-8 = avg adult; 9+ = college; 10+ = grad +""".strip() + + +def compute(path: str) -> dict: + """Return {metric_name: value} for the file at path.""" + text = pathlib.Path(path).read_text() + return {name: fn(text) for name, fn in METRICS} + + +def label_for(path: str) -> str: + """Use the file's stem (no extension) as a short column label.""" + return pathlib.Path(path).stem + + +def print_single(path: str) -> None: + metrics = compute(path) + label_w = max(len(k) for k in metrics) + col = label_for(path) + col_w = max(14, len(col)) + print(f"{'Metric':<{label_w}} {col:>{col_w}}") + print("-" * (label_w + 2 + col_w)) + for name, value in metrics.items(): + print(f"{name:<{label_w}} {str(value):>{col_w}}") + print() + print(SCALE_NOTES) + + +def print_compare(path_a: str, path_b: str) -> None: + a = compute(path_a) + b = compute(path_b) + label_w = max(len(k) for k in a) + name_a = label_for(path_a) + name_b = label_for(path_b) + col_w = max(12, len(name_a), len(name_b)) + print(f"{'Metric':<{label_w}} {name_a:>{col_w}} {name_b:>{col_w}} {'Δ':>10}") + print("-" * (label_w + 2 + col_w + 2 + col_w + 2 + 10)) + for name in a: + va, vb = a[name], b[name] + if isinstance(va, (int, float)) and isinstance(vb, (int, float)): + delta_str = f"{round(vb - va, 1):+}" + else: + delta_str = "—" + print(f"{name:<{label_w}} {str(va):>{col_w}} {str(vb):>{col_w}} {delta_str:>10}") + print() + print(SCALE_NOTES) + + +def main() -> int: + args = sys.argv[1:] + if len(args) == 1: + print_single(args[0]) + elif len(args) == 2: + print_compare(args[0], args[1]) + else: + sys.stderr.write(__doc__ or "") + return 2 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) |
