diff options
Diffstat (limited to 'modules/transcription-config.el')
| -rw-r--r-- | modules/transcription-config.el | 390 |
1 files changed, 390 insertions, 0 deletions
diff --git a/modules/transcription-config.el b/modules/transcription-config.el new file mode 100644 index 00000000..fd2f4aaa --- /dev/null +++ b/modules/transcription-config.el @@ -0,0 +1,390 @@ +;;; transcription-config.el --- Audio transcription workflow -*- lexical-binding: t; -*- + +;; Author: Craig Jennings <c@cjennings.net> +;; Created: 2025-11-04 + +;;; Commentary: +;; +;; Audio transcription workflow with multiple backend options. +;; +;; USAGE: +;; In dired: Press `T` on an audio file to transcribe +;; Anywhere: M-x cj/transcribe-audio +;; View active: M-x cj/transcriptions-buffer +;; Switch backend: C-; T b (or M-x cj/transcription-switch-backend) +;; +;; OUTPUT FILES: +;; audio.m4a → audio.txt (transcript) +;; → audio.log (process logs, conditionally kept) +;; +;; BACKENDS: +;; - 'openai-api: Fast cloud transcription +;; API key retrieved from authinfo.gpg (machine api.openai.com) +;; - 'assemblyai: Cloud transcription with speaker diarization +;; API key retrieved from authinfo.gpg (machine api.assemblyai.com) +;; - 'local-whisper: Local transcription (requires whisper installed) +;; +;; NOTIFICATIONS: +;; - "Transcription started on <file>" +;; - "Transcription complete. Transcript in <file.txt>" +;; - "Transcription errored. Logs in <file.log>" +;; +;; MODELINE: +;; Shows active transcription count: ⏺2 +;; Click to view *Transcriptions* buffer +;; +;;; Code: + +(require 'dired) +(require 'notifications) +(require 'auth-source) + +;; ----------------------------- Configuration --------------------------------- + +(defvar cj/transcribe-backend 'assemblyai + "Transcription backend to use. +- `openai-api': Fast cloud transcription via OpenAI API +- `assemblyai': Cloud transcription with speaker diarization via AssemblyAI +- `local-whisper': Local transcription using installed Whisper") + +(defvar cj/transcription-keep-log-when-done nil + "Whether to keep log files after successful transcription. +If nil, log files are deleted after successful completion. +If t, log files are always kept. +Log files are always kept on error regardless of this setting.") + +(defvar cj/transcriptions-list '() + "List of active transcriptions. +Each entry: (process audio-file start-time status) +Status: running, complete, error") + +;; ----------------------------- Pure Functions -------------------------------- + +(defun cj/--audio-file-p (file) + "Return non-nil if FILE is an audio file based on extension." + (when (and file (stringp file)) + (when-let ((ext (file-name-extension file))) + (member (downcase ext) cj/audio-file-extensions)))) + +(defun cj/--transcription-output-files (audio-file) + "Return cons cell of (TXT-FILE . LOG-FILE) for AUDIO-FILE." + (let ((base (file-name-sans-extension audio-file))) + (cons (concat base ".txt") + (concat base ".log")))) + +(defun cj/--transcription-duration (start-time) + "Return duration string (MM:SS) since START-TIME." + (let* ((elapsed (float-time (time-subtract (current-time) start-time))) + (minutes (floor (/ elapsed 60))) + (seconds (floor (mod elapsed 60)))) + (format "%02d:%02d" minutes seconds))) + +(defun cj/--should-keep-log (success-p) + "Return non-nil if log file should be kept. +SUCCESS-P indicates whether transcription succeeded." + (or (not success-p) ; Always keep on error + cj/transcription-keep-log-when-done)) + +(defun cj/--transcription-script-path () + "Return absolute path to transcription script based on backend." + (let ((script-name (pcase cj/transcribe-backend + ('openai-api "oai-transcribe") + ('assemblyai "assemblyai-transcribe") + ('local-whisper "local-whisper")))) + (expand-file-name (concat "scripts/" script-name) user-emacs-directory))) + +(defun cj/--get-openai-api-key () + "Retrieve OpenAI API key from authinfo.gpg. +Expects entry in authinfo.gpg: + machine api.openai.com login api password sk-... +Returns the API key string, or nil if not found." + (when-let* ((auth-info (car (auth-source-search + :host "api.openai.com" + :require '(:secret)))) + (secret (plist-get auth-info :secret))) + (if (functionp secret) + (funcall secret) + secret))) + +(defun cj/--get-assemblyai-api-key () + "Retrieve AssemblyAI API key from authinfo.gpg. +Expects entry in authinfo.gpg: + machine api.assemblyai.com login api password <key> +Returns the API key string, or nil if not found." + (when-let* ((auth-info (car (auth-source-search + :host "api.assemblyai.com" + :require '(:secret)))) + (secret (plist-get auth-info :secret))) + (if (functionp secret) + (funcall secret) + secret))) + +;; ---------------------------- Process Management ----------------------------- + +(defun cj/--notify (title message &optional urgency) + "Send desktop notification and echo area message. +TITLE and MESSAGE are strings. URGENCY is normal or critical." + (message "%s: %s" title message) + (when (and (fboundp 'notifications-notify) + (getenv "DISPLAY")) + (notifications-notify + :title title + :body message + :urgency (or urgency 'normal)))) + +(defun cj/--start-transcription-process (audio-file) + "Start async transcription process for AUDIO-FILE. +Returns the process object." + (unless (file-exists-p audio-file) + (user-error "Audio file does not exist: %s" audio-file)) + + (unless (cj/--audio-file-p audio-file) + (user-error "Not an audio file: %s" audio-file)) + + (let* ((script (cj/--transcription-script-path)) + (outputs (cj/--transcription-output-files audio-file)) + (txt-file (car outputs)) + (log-file (cdr outputs)) + (buffer-name (format " *transcribe-%s*" (file-name-nondirectory audio-file))) + (process-name (format "transcribe-%s" (file-name-nondirectory audio-file)))) + + (unless (file-executable-p script) + (user-error "Transcription script not found or not executable: %s" script)) + + ;; Create log file + (with-temp-file log-file + (insert (format "Transcription started: %s\n" (current-time-string)) + (format "Backend: %s\n" cj/transcribe-backend) + (format "Audio file: %s\n" audio-file) + (format "Script: %s\n\n" script))) + + ;; Start process with environment + (let* ((process-environment + ;; Add API key to environment based on backend + (pcase cj/transcribe-backend + ('openai-api + (if-let ((api-key (cj/--get-openai-api-key))) + (cons (format "OPENAI_API_KEY=%s" api-key) + process-environment) + (user-error "OpenAI API key not found in authinfo.gpg for host api.openai.com"))) + ('assemblyai + (if-let ((api-key (cj/--get-assemblyai-api-key))) + (cons (format "ASSEMBLYAI_API_KEY=%s" api-key) + process-environment) + (user-error "AssemblyAI API key not found in authinfo.gpg for host api.assemblyai.com"))) + (_ process-environment))) + (process (make-process + :name process-name + :buffer (get-buffer-create buffer-name) + :command (list script audio-file) + :sentinel (lambda (proc event) + (cj/--transcription-sentinel proc event audio-file txt-file log-file)) + :stderr log-file))) + + ;; Track transcription + (push (list process audio-file (current-time) 'running) cj/transcriptions-list) + (force-mode-line-update t) + + ;; Notify user + (cj/--notify "Transcription" + (format "Started on %s" (file-name-nondirectory audio-file))) + + process))) + +(defun cj/--transcription-sentinel (process event audio-file txt-file log-file) + "Sentinel for transcription PROCESS. +EVENT is the process event string. +AUDIO-FILE, TXT-FILE, and LOG-FILE are the associated files." + (let* ((success-p (and (string-match-p "finished" event) + (= 0 (process-exit-status process)))) + (process-buffer (process-buffer process)) + (entry (assq process cj/transcriptions-list))) + + ;; Write process output to txt file + (when (and success-p (buffer-live-p process-buffer)) + (with-current-buffer process-buffer + (write-region (point-min) (point-max) txt-file nil 'silent))) + + ;; Append process output to log file + (when (buffer-live-p process-buffer) + (with-temp-buffer + (insert-file-contents log-file) + (goto-char (point-max)) + (insert "\n" (format-time-string "[%Y-%m-%d %H:%M:%S] ") event "\n") + (insert-buffer-substring process-buffer) + (write-region (point-min) (point-max) log-file nil 'silent))) + + ;; Update transcription status + (when entry + (setf (nth 3 entry) (if success-p 'complete 'error))) + + ;; Cleanup log file if successful and configured to do so + (when (and success-p (not (cj/--should-keep-log t))) + (delete-file log-file)) + + ;; Kill process buffer + (when (buffer-live-p process-buffer) + (kill-buffer process-buffer)) + + ;; Notify user + (if success-p + (cj/--notify "Transcription" + (format "Complete. Transcript in %s" (file-name-nondirectory txt-file))) + (cj/--notify "Transcription" + (format "Errored. Logs in %s" (file-name-nondirectory log-file)) + 'critical)) + + ;; Clean up completed transcriptions after 10 minutes + (run-at-time 600 nil #'cj/--cleanup-completed-transcriptions) + + ;; Update modeline + (force-mode-line-update t))) + +(defun cj/--cleanup-completed-transcriptions () + "Remove completed/errored transcriptions from tracking list." + (setq cj/transcriptions-list + (seq-filter (lambda (entry) + (eq (nth 3 entry) 'running)) + cj/transcriptions-list)) + (force-mode-line-update t)) + +(defun cj/--count-active-transcriptions () + "Return count of running transcriptions." + (length (seq-filter (lambda (entry) + (eq (nth 3 entry) 'running)) + cj/transcriptions-list))) + +;; ----------------------------- Modeline Integration -------------------------- + +(defun cj/--transcription-modeline-string () + "Return modeline string for active transcriptions." + (let ((count (cj/--count-active-transcriptions))) + (when (> count 0) + (propertize (format " ⏺%d " count) + 'face 'warning + 'help-echo (format "%d active transcription%s (click to view)" + count (if (= count 1) "" "s")) + 'mouse-face 'mode-line-highlight + 'local-map (let ((map (make-sparse-keymap))) + (define-key map [mode-line mouse-1] + #'cj/transcriptions-buffer) + map))))) + +;; Add to mode-line-format (will be activated when module loads) +(add-to-list 'mode-line-misc-info + '(:eval (cj/--transcription-modeline-string)) + t) + +;; --------------------------- Interactive Commands ---------------------------- + +;;;###autoload +(defun cj/transcribe-audio (audio-file) + "Transcribe AUDIO-FILE asynchronously. +Creates AUDIO.txt with transcript and AUDIO.log with process logs. +Uses backend specified by `cj/transcribe-backend'." + (interactive (list (read-file-name "Audio file to transcribe: " + nil nil t nil + #'cj/--audio-file-p))) + (cj/--start-transcription-process (expand-file-name audio-file))) + +;;;###autoload +(defun cj/transcribe-audio-at-point () + "Transcribe audio file at point in dired." + (interactive) + (unless (derived-mode-p 'dired-mode) + (user-error "Not in dired-mode")) + (let ((file (dired-get-filename nil t))) + (unless file + (user-error "No file at point")) + (cj/transcribe-audio file))) + +;;;###autoload +(defun cj/transcriptions-buffer () + "Show buffer with active transcriptions." + (interactive) + (let ((buffer (get-buffer-create "*Transcriptions*"))) + (with-current-buffer buffer + (let ((inhibit-read-only t)) + (erase-buffer) + (insert (propertize "Active Transcriptions\n" 'face 'bold) + (propertize (make-string 50 ?─) 'face 'shadow) + "\n\n") + (if (null cj/transcriptions-list) + (insert "No active transcriptions.\n") + (dolist (entry cj/transcriptions-list) + (let* ((process (nth 0 entry)) + (audio-file (nth 1 entry)) + (start-time (nth 2 entry)) + (status (nth 3 entry)) + (duration (cj/--transcription-duration start-time)) + (status-face (pcase status + ('running 'warning) + ('complete 'success) + ('error 'error)))) + (insert (propertize (format "%-10s" status) 'face status-face) + " " + (file-name-nondirectory audio-file) + (format " (%s)\n" duration)))))) + (goto-char (point-min)) + (special-mode)) + (display-buffer buffer))) + +;;;###autoload +(defun cj/transcription-kill (process) + "Kill transcription PROCESS." + (interactive + (list (let ((choices (mapcar (lambda (entry) + (cons (file-name-nondirectory (nth 1 entry)) + (nth 0 entry))) + cj/transcriptions-list))) + (unless choices + (user-error "No active transcriptions")) + (cdr (assoc (completing-read "Kill transcription: " choices nil t) + choices))))) + (when (process-live-p process) + (kill-process process) + (message "Killed transcription process"))) + +;;;###autoload +(defun cj/transcription-switch-backend () + "Switch transcription backend. +Prompts with completing-read to select from available backends." + (interactive) + (let* ((backends '(("assemblyai" . assemblyai) + ("openai-api" . openai-api) + ("local-whisper" . local-whisper))) + (current (symbol-name cj/transcribe-backend)) + (prompt (format "Transcription backend (current: %s): " current)) + (choice (completing-read prompt backends nil t)) + (new-backend (alist-get choice backends nil nil #'string=))) + (setq cj/transcribe-backend new-backend) + (message "Transcription backend: %s" choice))) + +;; ------------------------------- Dired Integration --------------------------- + +(with-eval-after-load 'dired + (define-key dired-mode-map (kbd "T") #'cj/transcribe-audio-at-point)) + +;; Dirvish inherits dired-mode-map, so T works automatically + +;; ------------------------------- Global Keybindings -------------------------- + +;; Transcription keymap +(defvar-keymap cj/transcribe-map + :doc "Keymap for transcription operations" + "a" #'cj/transcribe-audio + "b" #'cj/transcription-switch-backend + "v" #'cj/transcriptions-buffer + "k" #'cj/transcription-kill) +(keymap-set cj/custom-keymap "T" cj/transcribe-map) + +(with-eval-after-load 'which-key + (which-key-add-key-based-replacements + "C-; T" "transcription menu" + "C-; T a" "transcribe audio" + "C-; T b" "switch backend" + "C-; T v" "view transcriptions" + "C-; T k" "kill transcription")) + +(provide 'transcription-config) +;;; transcription-config.el ends here |
