aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile7
-rwxr-xr-xscripts/readability109
2 files changed, 116 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 6e859c8..7df7f13 100644
--- a/Makefile
+++ b/Makefile
@@ -92,6 +92,13 @@ deps: ## Install required tools (claude, node, jq, fzf, ripgrep, emacs, playwrig
{ echo " emacs: installing..."; $(call install_pkg,emacs); }
@command -v uv >/dev/null 2>&1 && echo " uv: installed ($$(uv --version | awk '{print $$NF}'))" || \
{ echo " uv: installing..."; $(call install_pkg,uv); }
+ @# Pre-warm script-level Python deps (PEP 723 inline metadata in scripts/).
+ @# First-time invocations otherwise download their deps on demand; warming
+ @# the cache here keeps interactive use snappy.
+ @command -v uv >/dev/null 2>&1 \
+ && uv run --quiet --with textstat python -c "" >/dev/null 2>&1 \
+ && echo " textstat: cached (scripts/readability)" \
+ || echo " textstat: skipped (uv missing or offline)"
@if [ -d "$(CURDIR)/playwright-js" ]; then \
if [ -d "$(CURDIR)/playwright-js/node_modules/playwright" ]; then \
echo " playwright (js): installed (skill node_modules present)"; \
diff --git a/scripts/readability b/scripts/readability
new file mode 100755
index 0000000..cdae627
--- /dev/null
+++ b/scripts/readability
@@ -0,0 +1,109 @@
+#!/usr/bin/env -S uv run --quiet --script
+# /// script
+# requires-python = ">=3.10"
+# dependencies = ["textstat"]
+# ///
+"""Compute readability metrics for one or two text inputs.
+
+Usage:
+ readability FILE # single-file metrics
+ readability FILE1 FILE2 # side-by-side comparison
+
+Notes:
+ Each input is read as plain text. PDF/HTML/org-mode markup is passed
+ through unchanged — strip first if you want a clean reading.
+ First run downloads textstat (~2 MB) via uv's cache; subsequent runs
+ are fast.
+"""
+
+from __future__ import annotations
+
+import pathlib
+import sys
+
+import textstat # type: ignore[import-not-found]
+
+
+METRICS = [
+ ("Words", lambda t: textstat.lexicon_count(t, removepunct=True)),
+ ("Sentences", textstat.sentence_count),
+ ("Avg sentence length", lambda t: round(textstat.words_per_sentence(t), 1)),
+ ("Avg syllables/word", lambda t: round(textstat.avg_syllables_per_word(t), 2)),
+ ("Flesch Reading Ease", lambda t: round(textstat.flesch_reading_ease(t), 1)),
+ ("Flesch-Kincaid Grade", lambda t: round(textstat.flesch_kincaid_grade(t), 1)),
+ ("Gunning Fog", lambda t: round(textstat.gunning_fog(t), 1)),
+ ("SMOG Index", lambda t: round(textstat.smog_index(t), 1)),
+ ("Coleman-Liau", lambda t: round(textstat.coleman_liau_index(t), 1)),
+ ("ARI", lambda t: round(textstat.automated_readability_index(t), 1)),
+ ("Dale-Chall", lambda t: round(textstat.dale_chall_readability_score(t), 1)),
+ ("Linsear-Write", lambda t: round(textstat.linsear_write_formula(t), 1)),
+ ("Difficult words", textstat.difficult_words),
+]
+
+SCALE_NOTES = """
+Scale notes:
+ Flesch Reading Ease: 100 (very easy) → 0 (very confusing); academic prose ~30
+ Flesch-Kincaid Grade / Gunning Fog / SMOG / Coleman-Liau / ARI / Linsear-Write
+ → years of formal education needed to comprehend (US grade level)
+ Dale-Chall: <5 = grade 4 reader; 7-8 = avg adult; 9+ = college; 10+ = grad
+""".strip()
+
+
+def compute(path: str) -> dict:
+ """Return {metric_name: value} for the file at path."""
+ text = pathlib.Path(path).read_text()
+ return {name: fn(text) for name, fn in METRICS}
+
+
+def label_for(path: str) -> str:
+ """Use the file's stem (no extension) as a short column label."""
+ return pathlib.Path(path).stem
+
+
+def print_single(path: str) -> None:
+ metrics = compute(path)
+ label_w = max(len(k) for k in metrics)
+ col = label_for(path)
+ col_w = max(14, len(col))
+ print(f"{'Metric':<{label_w}} {col:>{col_w}}")
+ print("-" * (label_w + 2 + col_w))
+ for name, value in metrics.items():
+ print(f"{name:<{label_w}} {str(value):>{col_w}}")
+ print()
+ print(SCALE_NOTES)
+
+
+def print_compare(path_a: str, path_b: str) -> None:
+ a = compute(path_a)
+ b = compute(path_b)
+ label_w = max(len(k) for k in a)
+ name_a = label_for(path_a)
+ name_b = label_for(path_b)
+ col_w = max(12, len(name_a), len(name_b))
+ print(f"{'Metric':<{label_w}} {name_a:>{col_w}} {name_b:>{col_w}} {'Δ':>10}")
+ print("-" * (label_w + 2 + col_w + 2 + col_w + 2 + 10))
+ for name in a:
+ va, vb = a[name], b[name]
+ if isinstance(va, (int, float)) and isinstance(vb, (int, float)):
+ delta_str = f"{round(vb - va, 1):+}"
+ else:
+ delta_str = "—"
+ print(f"{name:<{label_w}} {str(va):>{col_w}} {str(vb):>{col_w}} {delta_str:>10}")
+ print()
+ print(SCALE_NOTES)
+
+
+def main() -> int:
+ args = sys.argv[1:]
+ if len(args) == 1:
+ print_single(args[0])
+ elif len(args) == 2:
+ print_compare(args[0], args[1])
+ else:
+ sys.stderr.write(__doc__ or "")
+ return 2
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())