1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
#!/usr/bin/env -S uv run --quiet --script
# /// script
# requires-python = ">=3.10"
# dependencies = ["textstat"]
# ///
"""Compute readability metrics for one or two text inputs.
Usage:
readability FILE # single-file metrics
readability FILE1 FILE2 # side-by-side comparison
Notes:
Each input is read as plain text. PDF/HTML/org-mode markup is passed
through unchanged — strip first if you want a clean reading.
First run downloads textstat (~2 MB) via uv's cache; subsequent runs
are fast.
"""
from __future__ import annotations
import pathlib
import sys
import textstat # type: ignore[import-not-found]
METRICS = [
("Words", lambda t: textstat.lexicon_count(t, removepunct=True)),
("Sentences", textstat.sentence_count),
("Avg sentence length", lambda t: round(textstat.words_per_sentence(t), 1)),
("Avg syllables/word", lambda t: round(textstat.avg_syllables_per_word(t), 2)),
("Flesch Reading Ease", lambda t: round(textstat.flesch_reading_ease(t), 1)),
("Flesch-Kincaid Grade", lambda t: round(textstat.flesch_kincaid_grade(t), 1)),
("Gunning Fog", lambda t: round(textstat.gunning_fog(t), 1)),
("SMOG Index", lambda t: round(textstat.smog_index(t), 1)),
("Coleman-Liau", lambda t: round(textstat.coleman_liau_index(t), 1)),
("ARI", lambda t: round(textstat.automated_readability_index(t), 1)),
("Dale-Chall", lambda t: round(textstat.dale_chall_readability_score(t), 1)),
("Linsear-Write", lambda t: round(textstat.linsear_write_formula(t), 1)),
("Difficult words", textstat.difficult_words),
]
SCALE_NOTES = """
Scale notes:
Flesch Reading Ease: 100 (very easy) → 0 (very confusing); academic prose ~30
Flesch-Kincaid Grade / Gunning Fog / SMOG / Coleman-Liau / ARI / Linsear-Write
→ years of formal education needed to comprehend (US grade level)
Dale-Chall: <5 = grade 4 reader; 7-8 = avg adult; 9+ = college; 10+ = grad
""".strip()
def compute(path: str) -> dict:
"""Return {metric_name: value} for the file at path."""
text = pathlib.Path(path).read_text()
return {name: fn(text) for name, fn in METRICS}
def label_for(path: str) -> str:
"""Use the file's stem (no extension) as a short column label."""
return pathlib.Path(path).stem
def print_single(path: str) -> None:
metrics = compute(path)
label_w = max(len(k) for k in metrics)
col = label_for(path)
col_w = max(14, len(col))
print(f"{'Metric':<{label_w}} {col:>{col_w}}")
print("-" * (label_w + 2 + col_w))
for name, value in metrics.items():
print(f"{name:<{label_w}} {str(value):>{col_w}}")
print()
print(SCALE_NOTES)
def print_compare(path_a: str, path_b: str) -> None:
a = compute(path_a)
b = compute(path_b)
label_w = max(len(k) for k in a)
name_a = label_for(path_a)
name_b = label_for(path_b)
col_w = max(12, len(name_a), len(name_b))
print(f"{'Metric':<{label_w}} {name_a:>{col_w}} {name_b:>{col_w}} {'Δ':>10}")
print("-" * (label_w + 2 + col_w + 2 + col_w + 2 + 10))
for name in a:
va, vb = a[name], b[name]
if isinstance(va, (int, float)) and isinstance(vb, (int, float)):
delta_str = f"{round(vb - va, 1):+}"
else:
delta_str = "—"
print(f"{name:<{label_w}} {str(va):>{col_w}} {str(vb):>{col_w}} {delta_str:>10}")
print()
print(SCALE_NOTES)
def main() -> int:
args = sys.argv[1:]
if len(args) == 1:
print_single(args[0])
elif len(args) == 2:
print_compare(args[0], args[1])
else:
sys.stderr.write(__doc__ or "")
return 2
return 0
if __name__ == "__main__":
sys.exit(main())
|