summaryrefslogtreecommitdiff
path: root/custom/titlecase.el
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2025-10-12 11:47:26 -0500
committerCraig Jennings <c@cjennings.net>2025-10-12 11:47:26 -0500
commit092304d9e0ccc37cc0ddaa9b136457e56a1cac20 (patch)
treeea81999b8442246c978b364dd90e8c752af50db5 /custom/titlecase.el
changing repositories
Diffstat (limited to 'custom/titlecase.el')
-rw-r--r--custom/titlecase.el396
1 files changed, 396 insertions, 0 deletions
diff --git a/custom/titlecase.el b/custom/titlecase.el
new file mode 100644
index 00000000..43947822
--- /dev/null
+++ b/custom/titlecase.el
@@ -0,0 +1,396 @@
+;;; titlecase.el --- Title-case phrases -*- lexical-binding: t; -*-
+
+;; Author: Case Duckworth <acdw@acdw.net>
+;; Maintainer: Case Duckworth <acdw@acdw.net>
+;; Version: 0.3.0
+;; URL: https://codeberg.org/acdw/titlecase.el
+;; Package-Requires: ((emacs "25.1"))
+
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; This library strives to be the most accurate possible with title-casing
+;; sentences, lines, and regions of text in English prose according to a number
+;; of styles guides' capitalization rules. It is necessarily a best-effort; due
+;; to the vaguaries of written English it's impossible to completely correctly
+;; capitalize aribtrary titles. So be sure to proofread and copy-edit your
+;; titles before sending them off to be published, and never trust a computer.
+
+;; INSTALLATION and USE:
+
+;; Make sure both titlecase.el and titlecase-data.el are in your `load-path',
+;; and `require' titlecase. You should then be able to call the interactive
+;; functions defined in this file.
+
+;;; CUSTOMIZATION:
+
+;; Only two customization options are probably going to be of any interest:
+;; `titlecase-style' (the style to use for capitalizing titles), and
+;; `titlecase-dwim-non-region-function', which determines what to do when
+;; `titlecase-dwim' isn't acting on a region.
+
+;; If you want to use your own title-casing code, or a third party, you can
+;; customize `titlecase-command' to something other than its default. One
+;; possibility is titlecase.pl, written John Gruber and Aristotle Pagaltzis:
+;; https://github.com/ap/titlecase.
+
+;;; Code:
+
+(require 'browse-url) ; `browse-url-button-regexp'
+(require 'cl-lib) ; `cl-loop'
+(require 'seq) ; `seq-some'
+(require 'thingatpt) ; `bounds-of-thing-at-point'
+(require 'titlecase-data)
+
+(defgroup titlecase nil
+ "Customization for title-casing phrases."
+ :prefix "titlecase-"
+ :group 'text)
+
+(defvar titlecase-styles '((chicago . "Chicago Style")
+ (apa . "APA Style")
+ (mla . "MLA Style")
+ (ap . "AP Style")
+ (bluebook . "Bluebook Style")
+ (ama . "AMA Style")
+ (nyt . "New York Times Style")
+ (wikipedia . "Wikipedia Style")
+ (imdb . "IMDB Style")
+ (sentence . "Sentence style"))
+ "Available styles for title-casing.")
+
+(defvar titlecase-default-case-function #'capitalize-word
+ "What to do to a word when a style doesn't specify what to do.")
+
+(defcustom titlecase-normalize-functions '(titlecase--lowercase-all-caps)
+ "List of functions for normalizing input before title-casing.
+Each function will be passed 3 arguments: the beginning and
+ending points of the region to operate on, as well as the
+title-casing style. They are called one after another in order
+in a `save-excursion' block."
+ :type '(repeat function))
+
+(defcustom titlecase-skip-words-regexps (list "\\b[[:upper:]]+\\b"
+ browse-url-button-regexp)
+ "Regexps of words to skip when titlecasing.
+Each regexp in this list will be tested on each word considered
+for title-casing, and if the regexp matches the entire word, the
+word will be skipped.
+
+NOTE: These regexps will be matched against the title-cased
+region /after/ normalizing it, which means that, by default, if
+the region is in all-caps before calling `titlecase-region', it
+will be downcased before title-casing. Thus, some of these
+regexps might not match when expected. This behavior is a
+trade-off between possible user expectations. To change this
+behavior, customize `titlecase-normalize-functions'."
+ :type '(repeat regexp))
+
+(defcustom titlecase-style 'wikipedia
+ "Which style to use when title-casing."
+ :type (cons 'choice (cl-loop
+ for style in titlecase-styles
+ collect (list 'const :tag (cdr style) (car style))
+ into choices
+ finally return choices)))
+
+(defcustom titlecase-force-cap-after-punc "[.?!\\/;:\n\r]"
+ "Regexp to force the next word capitalized."
+ :type 'regexp)
+
+(defcustom titlecase-dwim-non-region-function #'titlecase-line
+ "What to do with `titlecase-dwim' when region isn't active.
+Recommended: `titlecase-line' or `titlecase-sentence'."
+ :type 'function)
+
+(defcustom titlecase-command #'titlecase--region-with-style
+ "Command to use for titlecasing titles.
+This option can be one of two things:
+
+A string value, or a list of string values, is interpreted as a
+system command to run using `call-process-region' on a temp
+buffer containing the text to titlecase. Just a string is
+interpreted as the command to run, with no arguments. A list of
+strings will pass those strings as aruguments to the command-line
+program. In that list, the symbol `style' will be replaced with
+the the string of the title-casing style that's passed.
+
+A function value is interpreted as the function to call on the
+region. The function will be called with three arguments: the
+beginning and end of the region, and the style (see
+`titlecase-style') to capitalize it in.")
+
+(defcustom titlecase-downcase-sentences nil
+ "Whether to downcase words after the first in \"sentence\" style.
+If nil, titlecasing using the \"sentence\" style will leave all
+words as-is. If t, \"sentence\"-style titlecasing will downcase
+words that don't begin a sentence."
+ :type 'boolean)
+
+(defun titlecase--region-with-style-impl (begin end style)
+ "Title-case implementation.
+`titlecase-force-cap-after-punc' must be handled by the caller.
+This is expected to have run in a block that uses `save-excursion' and
+`save-match-data'. See documentation for `titlecase--region-with-style'
+for docs on BEGIN, END and STYLE."
+ (let ( ;; Constants during this function's runtime.
+ (case-fold-search nil)
+ (downcase-word-list (symbol-value
+ (intern (format "titlecase-lowercase-%s"
+ style)))))
+
+ ;; Normalize the text in region by calling `titlecase-normalize-functions'
+ ;; in order.
+ (dolist (fn titlecase-normalize-functions)
+ (save-excursion
+ (funcall fn begin end style)))
+
+ ;; Skip blank lines & white-space (where `current-word' would return nil).
+ ;; It's important this uses the same logic that `current-word' uses to scan
+ ;; for words, or this may be nil when it's not expected. See #11.
+ (goto-char begin)
+ (skip-syntax-forward "^w" end)
+ (setq begin (point))
+
+ ;; And loop over the rest.
+ (catch :done
+ (while (< (point) end)
+ (let ((this-word (current-word)))
+ (cond
+ ;; Skip words matching `titlecase-skip-words-regexps'.
+ ((looking-at (format "%s"
+ (mapconcat #'identity
+ titlecase-skip-words-regexps
+ "\\|")))
+ (goto-char (match-end 0))
+ ;; TODO: Document what this does (it's late)
+ (when (>= (point) end)
+ (throw :done 'skipped)))
+ ;; Phrasal verbs!
+ ((and (memq style titlecase-styles-capitalize-phrasal-verbs)
+ (member (downcase this-word)
+ (mapcar #'car titlecase-phrasal-verbs)))
+ ;; We need to do a little state machine thingy here.
+ (let ((next-words (assoc this-word titlecase-phrasal-verbs))
+ (bail-pt (point)))
+ ;; Take care of the first word --- this is inelegant.
+ (capitalize-word 1)
+ (skip-syntax-forward "^w" end)
+ (setq this-word (current-word))
+ ;; Loop through the rest
+ (while (and this-word
+ (member (downcase this-word)
+ (mapcar #'car-safe next-words)))
+ (capitalize-word 1)
+ (skip-syntax-forward "^w" end)
+ (setq this-word (current-word)
+ next-words (mapcar #'cdr-safe next-words)))
+ (unless (seq-some #'null next-words)
+ ;; If it's not a phrasal verb, bail --- but still
+ ;; capitalize the first word!
+ (downcase-region bail-pt (point))
+ (goto-char bail-pt)
+ (capitalize-word 1))))
+ ;; Force capitalization if this is the first word.
+ ((eq begin (point))
+ (capitalize-word 1))
+ ;; AP capitalizes /all/ words longer than 3 letters.
+ ((and (memq style titlecase-styles-capitalize-non-short-words)
+ (> (length this-word) titlecase-short-word-length))
+ (capitalize-word 1))
+ ;; Sentence style just capitalizes the first word. Since we can't be
+ ;; sure how the user has already capitalized anything, we just skip
+ ;; the current word. HOWEVER, there are times when downcasing the
+ ;; rest of the sentence is warranted. --- NOTE 2022-05-09: Now I'm
+ ;; thinking about it, does `sentence' style need to do anything
+ ;; whatsoever? Maybe I just need to include a test toward the top of
+ ;; the enclosing function to make `titlecase-default-case-function'
+ ;; be `downcase-word' if `titlecase-downcase-sentences' is true... or
+ ;; something of that nature. I might be over-engineering this, is
+ ;; what I'm saying. Curious, isn't it?
+ ((eq style 'sentence)
+ (funcall (if titlecase-downcase-sentences
+ #'downcase-word
+ #'forward-word)
+ 1))
+ ;; Skip the next word if:
+ ((or
+ ;; None of the styles require a capital letter after an
+ ;; apostrophe.
+ (memq (char-before (point)) '(?' ?’))
+ ;; FIXME: Hyphens are a completely different story with
+ ;; capitalization.
+ (eq (char-before (point)) ?-))
+ (forward-word 1))
+ ;; Down-case words that should be.
+ ((member (downcase this-word) downcase-word-list)
+ (downcase-word 1))
+ ;; Otherwise, do the default function on the word.
+ (t
+ (funcall titlecase-default-case-function 1))))
+
+ ;; Step over the loop.
+ (unless (= end (point))
+ (skip-syntax-forward "^w" end)))
+ ;; Capitalize the last word, only in some styles and some conditions.
+ (when (and (memq style titlecase-styles-capitalize-last-word))
+ (save-excursion
+ (backward-word 1)
+ (when (and (>= (point) begin)
+ (not (seq-some (lambda (r) (looking-at r))
+ titlecase-skip-words-regexps)))
+ (capitalize-word 1)))))))
+
+(defun titlecase--region-with-style (begin end style)
+ "Title-case the region of English text from BEGIN to END, using STYLE."
+ ;; It doesn't makes sense for this function to be interactive;
+ ;; `titlecase-region' can now specify a style interactively.
+ (save-match-data
+ (while (< begin end)
+ (goto-char begin)
+ (let ((end-step
+ (if (re-search-forward titlecase-force-cap-after-punc
+ end :noerror)
+ (point)
+ end)))
+ (if (memq (titlecase--region-with-style-impl begin end-step style)
+ '(skipped))
+ (setq begin (point))
+ (setq begin end-step))))))
+
+(defun titlecase--read-style ()
+ "Read which title-case style to use from the minibuffer."
+ (let ((choice (completing-read
+ "Title-case style: "
+ (mapcar #'cdr titlecase-styles)
+ nil t nil nil
+ (alist-get titlecase-style titlecase-styles))))
+ (cl-loop for (s . n) in titlecase-styles
+ if (equal n choice) return s)))
+
+(defun titlecase--arg (style interactivep)
+ "Process arguments to titlecase functions.
+If STYLE is passed to a function in any way, use it; otherwise,
+if INTERACTIVEP, prompt the user for a style to use. As a
+fall-back, use `titlecase-style'."
+ (or style
+ (and interactivep (titlecase--read-style))
+ titlecase-style))
+
+(defun titlecase--string (str style)
+ "Run `titlecase-command' on STR with STYLE and return the result.
+See the docstring for `titlecase-command' for its possible
+values."
+ (let (;; Remember the existing newlines
+ (str-ending-newlines (replace-regexp-in-string
+ "\\`\\([^z-a]*?\\)\n*\\'" "" str nil nil 1)))
+ (with-temp-buffer
+ (insert str)
+ (cond
+ ((stringp titlecase-command)
+ (call-process-region (point-min) (point-max) titlecase-command t t nil))
+ ((listp titlecase-command)
+ (apply #'call-process-region (point-min) (point-max)
+ (car titlecase-command) t t nil
+ (mapcar (lambda (s)
+ (format "%s" (if (or (null s)
+ (eq s 'style))
+ titlecase-style
+ s)))
+ (cdr titlecase-command))))
+ ((functionp titlecase-command)
+ (funcall titlecase-command (point-min) (point-max)
+ (or style titlecase-style))))
+ ;; Ensure that the string has no extra trailing whitespace.
+ (goto-char (point-max)) ; Go to the end of the buffer
+ (newline) ; Ensure at least one newline
+ (delete-blank-lines) ; Delete all but the last newline
+ (insert str-ending-newlines) ; Replace the pre-existing newlines
+ ;; Delete the extra newline and return the buffer as a string
+ (buffer-substring (point-min) (1- (point-max))))))
+
+(defun titlecase--lowercase-all-caps (begin end _style)
+ "If the text from BEGIN to END is all-caps, downcase it."
+ (goto-char begin)
+ (unless (re-search-forward "[[:lower:]]" end :noerror)
+ (downcase-region begin end)))
+
+;;;###autoload
+(defun titlecase-region (begin end &optional style interactivep)
+ "Title-case the region of English text from BEGIN to END.
+Uses the style provided in `titlecase-style', unless optional
+STYLE is provided.
+
+When called interactively , or when INTERACTIVEP is non-nil,
+\\[universal-argument] \\[titlecase-region] will prompt the user
+for the style to use."
+ (interactive "*r\ni\nP")
+ (atomic-change-group
+ (save-excursion ; `replace-region-contents'
+ (save-restriction
+ (narrow-to-region begin end)
+ (insert (titlecase--string (delete-and-extract-region begin end)
+ style))))))
+
+;;;###autoload
+(defun titlecase-line (&optional point style interactivep)
+ "Title-case the line at POINT.
+Uses the style provided in `titlecase-style', unless optional
+STYLE is provided.
+
+When called interactively , or when INTERACTIVEP is non-nil,
+POINT is the current point, and calling with
+\\[universal-argument] \\[titlecase-line] will prompt the user
+for the style to use."
+ (interactive "d\ni\nP")
+ (goto-char (or point (point)))
+ (let ((style (titlecase--arg style interactivep))
+ (thing (bounds-of-thing-at-point 'line)))
+ (titlecase-region (car thing) (cdr thing) style)))
+
+;;;###autoload
+(defun titlecase-sentence (&optional point style interactivep)
+ "Title-case the sentence at POINT.
+Uses the style provided in `titlecase-style', unless optional
+STYLE is provided.
+
+When called interactively , or when INTERACTIVEP is non-nil,
+POINT is the current point, and calling with
+\\[universal-argument] \\[titlecase-sentence] will prompt the
+user for the style to use."
+ (interactive "d\ni\nP")
+ (goto-char (or point (point)))
+ (let ((style (titlecase--arg style interactivep))
+ (thing (bounds-of-thing-at-point 'sentence)))
+ (titlecase-region (car thing) (cdr thing) style)
+ (goto-char (cdr thing))))
+
+;;;###autoload
+(defun titlecase-dwim (&optional style interactivep)
+ "Title-case either the region, if active, or the current line.
+Uses the style provided in `titlecase-style', unless optional
+STYLE is provided.
+
+When called interactively with \\[universal-argument]
+\\[titlecase-dwim], or when INTERACTIVEP is non-nil, prompt the
+user for the style to use."
+ (interactive "i\nP")
+ (let ((style (titlecase--arg style interactivep)))
+ (if (region-active-p)
+ (titlecase-region (region-beginning) (region-end) style)
+ (funcall titlecase-dwim-non-region-function (point) style))))
+
+(provide 'titlecase)
+;;; titlecase.el ends here