aboutsummaryrefslogtreecommitdiff
path: root/.ai/scripts/wrap-org-table.el
blob: ddbea656d2748c16a89697d36f486df4b90642ab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
;;; wrap-org-table.el --- reflow org tables to the width standard  -*- lexical-binding: t; -*-
;;
;; Reformats org tables to the org-table standard (claude-rules/org-tables.md):
;;
;;   1. Max 120 columns wide, measured at RENDER width — an org link counts as
;;      its visible label, not its [[target][label]] source. Links are never
;;      split to chase a source-width number.
;;   2. Cells that would push a row past the budget wrap onto continuation
;;      rows (the other columns left blank).
;;   3. A horizontal rule under the header and under every logical data row,
;;      closing rule included.
;;
;; Usage:
;;   emacs --batch -q -l wrap-org-table.el [--width=120] FILE.org [FILE.org ...]
;;     reformat every table in each file, in place. A backup of each file is
;;     copied to /tmp/<basename>.before-table-wrap.<YYYYMMDD-HHMMSS> first.
;;
;; As a library: (wot-reformat-table-string STRING &optional BUDGET) is the
;; pure core; (wot-process-file FILE &optional BUDGET) is the file layer.
;;
;; Column widths: each column starts at its natural width (the widest cell it
;; holds, render-measured). When the row total exceeds the budget, the widest
;; columns shrink first, never below the column's floor — its longest atomic
;; token (a word, or a whole link) — because going lower would force a
;; mid-word or mid-link split. A table whose floors alone exceed the budget is
;; reflowed to the floors (best effort): the source stays over budget and the
;; lint check keeps flagging it for a human to restructure (merge or drop
;; columns — a judgment call this helper doesn't make).

(require 'cl-lib)
(require 'subr-x)

(defconst wot-default-budget 120
  "Default table width budget in render columns, pipes included.")

;;; ---------------------------------------------------------------------------
;;; pure core

(defun wot-render-width (s)
  "Render width of cell text S: org links count as their visible label.
A descriptive link [[target][label]] measures as its label; a bare [[target]]
measures as the target text. Everything else is literal."
  (let ((rendered (replace-regexp-in-string
                   "\\[\\[\\([^][]*\\)\\]\\(?:\\[\\([^][]*\\)\\]\\)?\\]"
                   (lambda (m)
                     (save-match-data
                       (if (string-match
                            "\\[\\[\\([^][]*\\)\\]\\[\\([^][]*\\)\\]\\]" m)
                           (match-string 2 m)
                         (string-match "\\[\\[\\([^][]*\\)\\]\\]" m)
                         (match-string 1 m))))
                   s t t)))
    (length rendered)))

(defun wot-tokenize (s)
  "Split cell text S into tokens; org links are atomic tokens."
  (let ((tokens nil)
        (pos 0)
        (link-re "\\[\\[[^][]*\\]\\(?:\\[[^][]*\\]\\)?\\]"))
    (while (string-match link-re s pos)
      ;; Capture the bounds first: split-string below runs its own matches
      ;; and clobbers the global match data.
      (let ((mb (match-beginning 0))
            (me (match-end 0)))
        (dolist (w (split-string (substring s pos mb) nil t)) (push w tokens))
        (push (substring s mb me) tokens)
        (setq pos me)))
    (dolist (w (split-string (substring s pos) nil t)) (push w tokens))
    (nreverse tokens)))

(defun wot-wrap-cell (s width)
  "Greedy-wrap cell text S into lines of at most WIDTH render columns.
Tokens (words and whole links) are never split; a token wider than WIDTH sits
alone on its own over-width line."
  (let ((tokens (wot-tokenize s))
        (lines nil)
        (current ""))
    (dolist (tok tokens)
      (cond
       ((string-empty-p current)
        (setq current tok))
       ((<= (+ (wot-render-width current) 1 (wot-render-width tok)) width)
        (setq current (concat current " " tok)))
       (t
        (push current lines)
        (setq current tok))))
    (push current lines)
    (nreverse lines)))

(defun wot--column-floor (cells)
  "Floor width for a column holding CELLS: its widest atomic token."
  (let ((floor 1))
    (dolist (cell cells)
      (dolist (tok (wot-tokenize cell))
        (setq floor (max floor (wot-render-width tok)))))
    floor))

(defun wot-allocate-widths (rows budget)
  "Column widths for ROWS (lists of cell strings) under BUDGET total width.
Row overhead is `| ' + ` | ' separators + ` |', i.e. 3*ncols + 1. Columns
start at natural width; the widest shrink first, never below their floor."
  (let* ((ncols (apply #'max (mapcar #'length rows)))
         (cols (cl-loop for i below ncols
                        collect (mapcar (lambda (r) (or (nth i r) "")) rows)))
         (widths (mapcar (lambda (col)
                           (apply #'max 1 (mapcar #'wot-render-width col)))
                         cols))
         (floors (mapcar #'wot--column-floor cols))
         (cell-budget (- budget (+ (* 3 ncols) 1))))
    (cl-loop while (> (apply #'+ widths) cell-budget)
             for idx = (cl-loop with best = nil with best-w = -1
                                for i below ncols
                                when (and (> (nth i widths) (nth i floors))
                                          (> (nth i widths) best-w))
                                do (setq best i best-w (nth i widths))
                                finally return best)
             while idx
             do (setf (nth idx widths) (1- (nth idx widths))))
    widths))

(defun wot--pad (cell width)
  "Pad CELL source text with spaces so its render width is at least WIDTH."
  (concat cell (make-string (max 0 (- width (wot-render-width cell))) ?\s)))

(defun wot--hline (widths indent)
  (concat indent "|"
          (mapconcat (lambda (w) (make-string (+ w 2) ?-)) widths "+")
          "|"))

(defun wot--emit-row (cells widths indent)
  "Physical lines for one logical row: CELLS wrapped to WIDTHS, link-safe."
  (let* ((wrapped (cl-loop for i below (length widths)
                           collect (wot-wrap-cell (or (nth i cells) "")
                                                  (nth i widths))))
         (height (apply #'max (mapcar #'length wrapped))))
    (cl-loop for line below height
             collect (concat indent "| "
                             (mapconcat
                              (lambda (i)
                                (wot--pad (or (nth line (nth i wrapped)) "")
                                          (nth i widths)))
                              (number-sequence 0 (1- (length widths)))
                              " | ")
                             " |"))))

(defun wot--parse-row (line)
  "Cell strings of table LINE, or the symbol `hline'."
  (let ((trimmed (string-trim line)))
    (if (string-prefix-p "|-" trimmed)
        'hline
      (mapcar #'string-trim
              (split-string (string-remove-suffix "|"
                             (string-remove-prefix "|" trimmed))
                            "|")))))

(defun wot--merge-group (group)
  "Merge GROUP (a list of cell-lists) into one logical row.
Each column's non-empty values join with a space — the inverse of the
continuation-row split `wot--emit-row' produces."
  (let ((ncols (apply #'max (mapcar #'length group))))
    (cl-loop for i below ncols
             collect (string-join
                      (cl-remove-if #'string-empty-p
                                    (mapcar (lambda (r) (or (nth i r) ""))
                                            group))
                      " "))))

(defun wot--continuation-group-p (group)
  "Non-nil when GROUP's lines after the first read as continuation rows.
A continuation row carries overflow text in some columns and leaves the rest
empty, so every line past the first must have at least one empty cell. A
group of fully-populated lines is distinct rows that merely share a rule."
  (and (> (length group) 1)
       (cl-every (lambda (r) (cl-some #'string-empty-p r))
                 (cdr group))))

(defun wot--logical-rows (elems)
  "Logical rows from ELEMS, a list of cell-lists and `hline' symbols.
With no hlines, every line is its own row. With hlines, lines group between
rules; a group whose trailing lines look like continuations (each has an
empty cell) merges into one logical row — that makes re-running on
already-conformant output a no-op — while fully-populated groups keep their
line-per-row meaning."
  (if (not (memq 'hline elems))
      elems
    (let ((groups nil) (current nil))
      (dolist (e elems)
        (if (eq e 'hline)
            (when current
              (push (nreverse current) groups)
              (setq current nil))
          (push e current)))
      (when current (push (nreverse current) groups))
      (cl-loop for g in (nreverse groups)
               if (wot--continuation-group-p g)
               collect (wot--merge-group g)
               else append g))))

(defun wot-reformat-table-string (table-string &optional budget)
  "Reformat TABLE-STRING to the org-table standard at BUDGET width.
Wraps over-budget cells onto continuation rows, puts a rule under the header
and under every logical data row, and preserves the table's indentation.
Re-running on already-conformant output is a no-op: rule-delimited
continuation lines merge back into their logical row before re-wrapping."
  (let* ((budget (or budget wot-default-budget))
         (lines (split-string (string-remove-suffix "\n" table-string) "\n"))
         (indent (if (string-match "^[ \t]*" (car lines))
                     (match-string 0 (car lines))
                   ""))
         (parsed (mapcar #'wot--parse-row lines))
         ;; Header = first row when the source separates it with an hline.
         (header-p (and (listp (car parsed)) (eq (cadr parsed) 'hline)))
         (header (and header-p (car parsed)))
         (data-elems (if header-p (cddr parsed) parsed))
         (rows (wot--logical-rows data-elems))
         (widths (wot-allocate-widths (if header (cons header rows) rows)
                                      budget))
         (out nil))
    (when header
      (dolist (l (wot--emit-row header widths indent)) (push l out))
      (push (wot--hline widths indent) out))
    (dolist (row rows)
      (dolist (l (wot--emit-row row widths indent)) (push l out))
      (push (wot--hline widths indent) out))
    (concat (string-join (nreverse out) "\n") "\n")))

;;; ---------------------------------------------------------------------------
;;; file layer

(defun wot-process-file (file &optional budget)
  "Reformat every org table in FILE in place to BUDGET width."
  (with-temp-buffer
    (insert-file-contents file)
    (goto-char (point-min))
    (while (re-search-forward "^[ \t]*|" nil t)
      (let ((start (line-beginning-position)))
        (while (and (not (eobp))
                    (save-excursion (beginning-of-line)
                                    (looking-at "[ \t]*|")))
          (forward-line 1))
        (let* ((end (point))
               (table (buffer-substring-no-properties start end))
               (reformatted (wot-reformat-table-string table budget)))
          (delete-region start end)
          (goto-char start)
          (insert reformatted))))
    (write-region (point-min) (point-max) file)))

;;; ---------------------------------------------------------------------------
;;; CLI

(defun wot--backup (file)
  (copy-file file
             (format "/tmp/%s.before-table-wrap.%s"
                     (file-name-nondirectory file)
                     (format-time-string "%Y%m%d-%H%M%S"))
             t))

(defun wot-main ()
  (let ((budget wot-default-budget)
        (width-arg (cl-find-if (lambda (a) (string-prefix-p "--width=" a))
                               command-line-args-left)))
    (when width-arg
      (setq budget (string-to-number (substring width-arg (length "--width="))))
      (setq command-line-args-left (delete width-arg command-line-args-left)))
    (if (null command-line-args-left)
        (progn
          (princ "Usage: emacs --batch -q -l wrap-org-table.el [--width=120] FILE.org ...\n")
          (kill-emacs 1))
      (let ((files command-line-args-left))
        (setq command-line-args-left nil)
        (dolist (file files)
          (if (file-readable-p file)
              (progn
                (wot--backup file)
                (wot-process-file file budget)
                (princ (format ";; wrap-org-table: file=%s reformatted (budget %d)\n"
                               file budget)))
            (princ (format ";; wrap-org-table: file=%s not readable — skipping\n"
                           file))))))))

(defun wot--cli-invocation-p ()
  "Non-nil when the trailing args look like a real invocation (flags + files),
so the ERT suite can `require' this file without firing the CLI dispatch."
  (and command-line-args-left
       (cl-every (lambda (a)
                   (cond ((string-prefix-p "--width=" a) t)
                         ((string-prefix-p "-" a) nil)
                         (t (file-readable-p a))))
                 command-line-args-left)))

(when (and noninteractive (wot--cli-invocation-p))
  (wot-main))

(provide 'wrap-org-table)
;;; wrap-org-table.el ends here