1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
|
;;; titlecase.el --- Title-case phrases -*- lexical-binding: t; -*-
;; Author: Case Duckworth <acdw@acdw.net>
;; Maintainer: Case Duckworth <acdw@acdw.net>
;; Version: 0.3.0
;; URL: https://codeberg.org/acdw/titlecase.el
;; Package-Requires: ((emacs "25.1"))
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
;;; Commentary:
;; This library strives to be the most accurate possible with title-casing
;; sentences, lines, and regions of text in English prose according to a number
;; of styles guides' capitalization rules. It is necessarily a best-effort; due
;; to the vaguaries of written English it's impossible to completely correctly
;; capitalize aribtrary titles. So be sure to proofread and copy-edit your
;; titles before sending them off to be published, and never trust a computer.
;; INSTALLATION and USE:
;; Make sure both titlecase.el and titlecase-data.el are in your `load-path',
;; and `require' titlecase. You should then be able to call the interactive
;; functions defined in this file.
;;; CUSTOMIZATION:
;; Only two customization options are probably going to be of any interest:
;; `titlecase-style' (the style to use for capitalizing titles), and
;; `titlecase-dwim-non-region-function', which determines what to do when
;; `titlecase-dwim' isn't acting on a region.
;; If you want to use your own title-casing code, or a third party, you can
;; customize `titlecase-command' to something other than its default. One
;; possibility is titlecase.pl, written John Gruber and Aristotle Pagaltzis:
;; https://github.com/ap/titlecase.
;;; Code:
(require 'browse-url) ; `browse-url-button-regexp'
(require 'cl-lib) ; `cl-loop'
(require 'seq) ; `seq-some'
(require 'thingatpt) ; `bounds-of-thing-at-point'
(require 'titlecase-data)
(defgroup titlecase nil
"Customization for title-casing phrases."
:prefix "titlecase-"
:group 'text)
(defvar titlecase-styles '((chicago . "Chicago Style")
(apa . "APA Style")
(mla . "MLA Style")
(ap . "AP Style")
(bluebook . "Bluebook Style")
(ama . "AMA Style")
(nyt . "New York Times Style")
(wikipedia . "Wikipedia Style")
(imdb . "IMDB Style")
(sentence . "Sentence style"))
"Available styles for title-casing.")
(defvar titlecase-default-case-function #'capitalize-word
"What to do to a word when a style doesn't specify what to do.")
(defcustom titlecase-normalize-functions '(titlecase--lowercase-all-caps)
"List of functions for normalizing input before title-casing.
Each function will be passed 3 arguments: the beginning and
ending points of the region to operate on, as well as the
title-casing style. They are called one after another in order
in a `save-excursion' block."
:type '(repeat function))
(defcustom titlecase-skip-words-regexps (list "\\b[[:upper:]]+\\b"
browse-url-button-regexp)
"Regexps of words to skip when titlecasing.
Each regexp in this list will be tested on each word considered
for title-casing, and if the regexp matches the entire word, the
word will be skipped.
NOTE: These regexps will be matched against the title-cased
region /after/ normalizing it, which means that, by default, if
the region is in all-caps before calling `titlecase-region', it
will be downcased before title-casing. Thus, some of these
regexps might not match when expected. This behavior is a
trade-off between possible user expectations. To change this
behavior, customize `titlecase-normalize-functions'."
:type '(repeat regexp))
(defcustom titlecase-style 'wikipedia
"Which style to use when title-casing."
:type (cons 'choice (cl-loop
for style in titlecase-styles
collect (list 'const :tag (cdr style) (car style))
into choices
finally return choices)))
(defcustom titlecase-force-cap-after-punc "[.?!\\/;:\n\r]"
"Regexp to force the next word capitalized."
:type 'regexp)
(defcustom titlecase-dwim-non-region-function #'titlecase-line
"What to do with `titlecase-dwim' when region isn't active.
Recommended: `titlecase-line' or `titlecase-sentence'."
:type 'function)
(defcustom titlecase-command #'titlecase--region-with-style
"Command to use for titlecasing titles.
This option can be one of two things:
A string value, or a list of string values, is interpreted as a
system command to run using `call-process-region' on a temp
buffer containing the text to titlecase. Just a string is
interpreted as the command to run, with no arguments. A list of
strings will pass those strings as aruguments to the command-line
program. In that list, the symbol `style' will be replaced with
the the string of the title-casing style that's passed.
A function value is interpreted as the function to call on the
region. The function will be called with three arguments: the
beginning and end of the region, and the style (see
`titlecase-style') to capitalize it in.")
(defcustom titlecase-downcase-sentences nil
"Whether to downcase words after the first in \"sentence\" style.
If nil, titlecasing using the \"sentence\" style will leave all
words as-is. If t, \"sentence\"-style titlecasing will downcase
words that don't begin a sentence."
:type 'boolean)
(defun titlecase--region-with-style-impl (begin end style)
"Title-case implementation.
`titlecase-force-cap-after-punc' must be handled by the caller.
This is expected to have run in a block that uses `save-excursion' and
`save-match-data'. See documentation for `titlecase--region-with-style'
for docs on BEGIN, END and STYLE."
(let ( ;; Constants during this function's runtime.
(case-fold-search nil)
(downcase-word-list (symbol-value
(intern (format "titlecase-lowercase-%s"
style)))))
;; Normalize the text in region by calling `titlecase-normalize-functions'
;; in order.
(dolist (fn titlecase-normalize-functions)
(save-excursion
(funcall fn begin end style)))
;; Skip blank lines & white-space (where `current-word' would return nil).
;; It's important this uses the same logic that `current-word' uses to scan
;; for words, or this may be nil when it's not expected. See #11.
(goto-char begin)
(skip-syntax-forward "^w" end)
(setq begin (point))
;; And loop over the rest.
(catch :done
(while (< (point) end)
(let ((this-word (current-word)))
(cond
;; Skip words matching `titlecase-skip-words-regexps'.
((looking-at (format "%s"
(mapconcat #'identity
titlecase-skip-words-regexps
"\\|")))
(goto-char (match-end 0))
;; TODO: Document what this does (it's late)
(when (>= (point) end)
(throw :done 'skipped)))
;; Phrasal verbs!
((and (memq style titlecase-styles-capitalize-phrasal-verbs)
(member (downcase this-word)
(mapcar #'car titlecase-phrasal-verbs)))
;; We need to do a little state machine thingy here.
(let ((next-words (assoc this-word titlecase-phrasal-verbs))
(bail-pt (point)))
;; Take care of the first word --- this is inelegant.
(capitalize-word 1)
(skip-syntax-forward "^w" end)
(setq this-word (current-word))
;; Loop through the rest
(while (and this-word
(member (downcase this-word)
(mapcar #'car-safe next-words)))
(capitalize-word 1)
(skip-syntax-forward "^w" end)
(setq this-word (current-word)
next-words (mapcar #'cdr-safe next-words)))
(unless (seq-some #'null next-words)
;; If it's not a phrasal verb, bail --- but still
;; capitalize the first word!
(downcase-region bail-pt (point))
(goto-char bail-pt)
(capitalize-word 1))))
;; Force capitalization if this is the first word.
((eq begin (point))
(capitalize-word 1))
;; AP capitalizes /all/ words longer than 3 letters.
((and (memq style titlecase-styles-capitalize-non-short-words)
(> (length this-word) titlecase-short-word-length))
(capitalize-word 1))
;; Sentence style just capitalizes the first word. Since we can't be
;; sure how the user has already capitalized anything, we just skip
;; the current word. HOWEVER, there are times when downcasing the
;; rest of the sentence is warranted. --- NOTE 2022-05-09: Now I'm
;; thinking about it, does `sentence' style need to do anything
;; whatsoever? Maybe I just need to include a test toward the top of
;; the enclosing function to make `titlecase-default-case-function'
;; be `downcase-word' if `titlecase-downcase-sentences' is true... or
;; something of that nature. I might be over-engineering this, is
;; what I'm saying. Curious, isn't it?
((eq style 'sentence)
(funcall (if titlecase-downcase-sentences
#'downcase-word
#'forward-word)
1))
;; Skip the next word if:
((or
;; None of the styles require a capital letter after an
;; apostrophe.
(memq (char-before (point)) '(?' ?’))
;; FIXME: Hyphens are a completely different story with
;; capitalization.
(eq (char-before (point)) ?-))
(forward-word 1))
;; Down-case words that should be.
((member (downcase this-word) downcase-word-list)
(downcase-word 1))
;; Otherwise, do the default function on the word.
(t
(funcall titlecase-default-case-function 1))))
;; Step over the loop.
(unless (= end (point))
(skip-syntax-forward "^w" end)))
;; Capitalize the last word, only in some styles and some conditions.
(when (and (memq style titlecase-styles-capitalize-last-word))
(save-excursion
(backward-word 1)
(when (and (>= (point) begin)
(not (seq-some (lambda (r) (looking-at r))
titlecase-skip-words-regexps)))
(capitalize-word 1)))))))
(defun titlecase--region-with-style (begin end style)
"Title-case the region of English text from BEGIN to END, using STYLE."
;; It doesn't makes sense for this function to be interactive;
;; `titlecase-region' can now specify a style interactively.
(save-match-data
(while (< begin end)
(goto-char begin)
(let ((end-step
(if (re-search-forward titlecase-force-cap-after-punc
end :noerror)
(point)
end)))
(if (memq (titlecase--region-with-style-impl begin end-step style)
'(skipped))
(setq begin (point))
(setq begin end-step))))))
(defun titlecase--read-style ()
"Read which title-case style to use from the minibuffer."
(let ((choice (completing-read
"Title-case style: "
(mapcar #'cdr titlecase-styles)
nil t nil nil
(alist-get titlecase-style titlecase-styles))))
(cl-loop for (s . n) in titlecase-styles
if (equal n choice) return s)))
(defun titlecase--arg (style interactivep)
"Process arguments to titlecase functions.
If STYLE is passed to a function in any way, use it; otherwise,
if INTERACTIVEP, prompt the user for a style to use. As a
fall-back, use `titlecase-style'."
(or style
(and interactivep (titlecase--read-style))
titlecase-style))
(defun titlecase--string (str style)
"Run `titlecase-command' on STR with STYLE and return the result.
See the docstring for `titlecase-command' for its possible
values."
(let (;; Remember the existing newlines
(str-ending-newlines (replace-regexp-in-string
"\\`\\([^z-a]*?\\)\n*\\'" "" str nil nil 1)))
(with-temp-buffer
(insert str)
(cond
((stringp titlecase-command)
(call-process-region (point-min) (point-max) titlecase-command t t nil))
((listp titlecase-command)
(apply #'call-process-region (point-min) (point-max)
(car titlecase-command) t t nil
(mapcar (lambda (s)
(format "%s" (if (or (null s)
(eq s 'style))
titlecase-style
s)))
(cdr titlecase-command))))
((functionp titlecase-command)
(funcall titlecase-command (point-min) (point-max)
(or style titlecase-style))))
;; Ensure that the string has no extra trailing whitespace.
(goto-char (point-max)) ; Go to the end of the buffer
(newline) ; Ensure at least one newline
(delete-blank-lines) ; Delete all but the last newline
(insert str-ending-newlines) ; Replace the pre-existing newlines
;; Delete the extra newline and return the buffer as a string
(buffer-substring (point-min) (1- (point-max))))))
(defun titlecase--lowercase-all-caps (begin end _style)
"If the text from BEGIN to END is all-caps, downcase it."
(goto-char begin)
(unless (re-search-forward "[[:lower:]]" end :noerror)
(downcase-region begin end)))
;;;###autoload
(defun titlecase-region (begin end &optional style interactivep)
"Title-case the region of English text from BEGIN to END.
Uses the style provided in `titlecase-style', unless optional
STYLE is provided.
When called interactively , or when INTERACTIVEP is non-nil,
\\[universal-argument] \\[titlecase-region] will prompt the user
for the style to use."
(interactive "*r\ni\nP")
(atomic-change-group
(save-excursion ; `replace-region-contents'
(save-restriction
(narrow-to-region begin end)
(insert (titlecase--string (delete-and-extract-region begin end)
style))))))
;;;###autoload
(defun titlecase-line (&optional point style interactivep)
"Title-case the line at POINT.
Uses the style provided in `titlecase-style', unless optional
STYLE is provided.
When called interactively , or when INTERACTIVEP is non-nil,
POINT is the current point, and calling with
\\[universal-argument] \\[titlecase-line] will prompt the user
for the style to use."
(interactive "d\ni\nP")
(goto-char (or point (point)))
(let ((style (titlecase--arg style interactivep))
(thing (bounds-of-thing-at-point 'line)))
(titlecase-region (car thing) (cdr thing) style)))
;;;###autoload
(defun titlecase-sentence (&optional point style interactivep)
"Title-case the sentence at POINT.
Uses the style provided in `titlecase-style', unless optional
STYLE is provided.
When called interactively , or when INTERACTIVEP is non-nil,
POINT is the current point, and calling with
\\[universal-argument] \\[titlecase-sentence] will prompt the
user for the style to use."
(interactive "d\ni\nP")
(goto-char (or point (point)))
(let ((style (titlecase--arg style interactivep))
(thing (bounds-of-thing-at-point 'sentence)))
(titlecase-region (car thing) (cdr thing) style)
(goto-char (cdr thing))))
;;;###autoload
(defun titlecase-dwim (&optional style interactivep)
"Title-case either the region, if active, or the current line.
Uses the style provided in `titlecase-style', unless optional
STYLE is provided.
When called interactively with \\[universal-argument]
\\[titlecase-dwim], or when INTERACTIVEP is non-nil, prompt the
user for the style to use."
(interactive "i\nP")
(let ((style (titlecase--arg style interactivep)))
(if (region-active-p)
(titlecase-region (region-beginning) (region-end) style)
(funcall titlecase-dwim-non-region-function (point) style))))
(provide 'titlecase)
;;; titlecase.el ends here
|