diff options
| -rw-r--r-- | modules/transcription-config.el | 118 | ||||
| -rw-r--r-- | modules/user-constants.el | 6 | ||||
| -rw-r--r-- | tests/test-transcription-status-and-commands.el | 9 | ||||
| -rw-r--r-- | tests/test-transcription-video.el | 156 |
4 files changed, 270 insertions, 19 deletions
diff --git a/modules/transcription-config.el b/modules/transcription-config.el index 3736835d..d81ccba0 100644 --- a/modules/transcription-config.el +++ b/modules/transcription-config.el @@ -8,8 +8,9 @@ ;; Audio transcription workflow with multiple backend options. ;; ;; USAGE: -;; In dired: Press `T` on an audio file to transcribe -;; Anywhere: M-x cj/transcribe-audio +;; In dired: Press `T` on an audio OR video file to transcribe +;; (videos run through ffmpeg first to extract the audio) +;; Anywhere: M-x cj/transcribe-media (M-x cj/transcribe-audio still works) ;; View active: M-x cj/transcriptions-buffer ;; Switch backend: M-x cj/transcription-switch-backend ;; @@ -84,6 +85,17 @@ Status: running, complete, error") (when-let ((ext (file-name-extension file))) (member (downcase ext) cj/audio-file-extensions)))) +(defun cj/--video-file-p (file) + "Return non-nil if FILE is a video file based on extension." + (when (and file (stringp file)) + (when-let ((ext (file-name-extension file))) + (member (downcase ext) cj/video-file-extensions)))) + +(defun cj/--media-file-p (file) + "Return non-nil if FILE is an audio or video file." + (or (cj/--audio-file-p file) + (cj/--video-file-p file))) + (defun cj/--transcription-output-files (audio-file) "Return cons cell of (TXT-FILE . LOG-FILE) for AUDIO-FILE." (let ((base (file-name-sans-extension audio-file))) @@ -159,9 +171,13 @@ TITLE and MESSAGE are strings. URGENCY is normal or critical." :body message :urgency (or urgency 'normal)))) -(defun cj/--start-transcription-process (audio-file) +(defun cj/--start-transcription-process (audio-file &optional cleanup-file) "Start async transcription process for AUDIO-FILE. -Returns the process object." +Returns the process object. + +When CLEANUP-FILE is non-nil, delete that path once the transcription +sentinel fires (success or failure). Used by the video flow to drop +the temp audio file produced by ffmpeg after transcription completes." (unless (file-exists-p audio-file) (user-error "Audio file does not exist: %s" audio-file)) @@ -186,13 +202,57 @@ Returns the process object." :buffer (get-buffer-create buffer-name) :command (list script audio-file) :sentinel (lambda (proc event) - (cj/--transcription-sentinel proc event audio-file txt-file log-file)) + (cj/--transcription-sentinel proc event audio-file txt-file log-file) + (when cleanup-file + (ignore-errors (delete-file cleanup-file)))) :stderr log-file))) (cj/--track-transcription process audio-file) (cj/--notify "Transcription" (format "Started on %s" (file-name-nondirectory audio-file))) process))) +(defun cj/--video-extracted-audio-path (video-file) + "Return a temp .mp3 path to hold the extracted audio for VIDEO-FILE. +The basename hints at the source so a stuck file is easy to identify." + (make-temp-file (format "cj-tx-%s-" + (file-name-base video-file)) + nil ".mp3")) + +(defun cj/--extract-audio-from-video (video-file output-file on-success) + "Async-extract the audio track from VIDEO-FILE to OUTPUT-FILE via ffmpeg. + +On success, call ON-SUCCESS (no args). On failure, signal a +descriptive `user-error' via `cj/--notify'. Signals `user-error' +synchronously if ffmpeg isn't on PATH. + +Uses libmp3lame at quality 4 (~165kbps VBR) -- good for speech, +universally accepted by the transcription backends." + (let ((ffmpeg (cj/executable-find-or-warn + "ffmpeg" "video audio extraction" 'transcription-config))) + (unless ffmpeg + (user-error "ffmpeg not found on PATH -- install ffmpeg to transcribe videos")) + (let ((process-name (format "ffmpeg-extract-%s" + (file-name-nondirectory video-file)))) + (make-process + :name process-name + :buffer (get-buffer-create (format " *%s*" process-name)) + :command (list ffmpeg "-y" "-i" video-file + "-vn" "-acodec" "libmp3lame" "-q:a" "4" + output-file) + :sentinel (lambda (proc event) + (cond + ((and (string-match-p "finished" event) + (= 0 (process-exit-status proc))) + (let ((buf (process-buffer proc))) + (when (buffer-live-p buf) (kill-buffer buf))) + (funcall on-success)) + ((string-match-p "\\(?:exited\\|failed\\|signal\\)" event) + (cj/--notify "Transcription" + (format "ffmpeg failed on %s" + (file-name-nondirectory video-file)) + 'critical) + (ignore-errors (delete-file output-file))))))))) + (defun cj/--write-transcript-on-success (process-buffer success-p txt-file) "Write PROCESS-BUFFER contents to TXT-FILE when SUCCESS-P is non-nil. No-op if PROCESS-BUFFER is dead or SUCCESS-P is nil." @@ -284,25 +344,49 @@ associated output files." ;; --------------------------- Interactive Commands ---------------------------- ;;;###autoload -(defun cj/transcribe-audio (audio-file) - "Transcribe AUDIO-FILE asynchronously. -Creates AUDIO.txt with transcript and AUDIO.log with process logs. -Uses backend specified by `cj/transcribe-backend'." - (interactive (list (read-file-name "Audio file to transcribe: " +(defun cj/transcribe-media (file) + "Transcribe FILE asynchronously. Accepts audio or video. + +For audio: hands the file straight to the transcription pipeline. +For video: shells ffmpeg to extract the audio track to a temp .mp3, +then transcribes that. The temp audio is deleted after the +transcription sentinel fires. + +Creates FILE.txt with the transcript (alongside the source) and +FILE.log with process logs. Uses the backend in +`cj/transcribe-backend'." + (interactive (list (read-file-name "Media file to transcribe: " nil nil t nil - #'cj/--audio-file-p))) - (cj/--start-transcription-process (expand-file-name audio-file))) + #'cj/--media-file-p))) + (let ((path (expand-file-name file))) + (unless (cj/--media-file-p path) + (user-error "Not an audio or video file: %s" path)) + (cond + ((cj/--audio-file-p path) + (cj/--start-transcription-process path)) + ((cj/--video-file-p path) + (let ((extracted (cj/--video-extracted-audio-path path))) + (cj/--extract-audio-from-video + path extracted + (lambda () + (cj/--start-transcription-process extracted extracted)))))))) ;;;###autoload -(defun cj/transcribe-audio-at-point () - "Transcribe audio file at point in dired." +(defun cj/transcribe-media-at-point () + "Transcribe the audio or video file at point in dired/dirvish." (interactive) (unless (derived-mode-p 'dired-mode) (user-error "Not in dired-mode")) (let ((file (dired-get-filename nil t))) (unless file (user-error "No file at point")) - (cj/transcribe-audio file))) + (cj/transcribe-media file))) + +;; Backwards-compat aliases. The audio-only names predate the +;; video-extension work; keep them as drop-in synonyms for anyone +;; with muscle memory or external scripts. +(defalias 'cj/transcribe-audio 'cj/transcribe-media) +(defalias 'cj/transcribe-audio-at-point 'cj/transcribe-media-at-point) (defun cj/--format-transcription-entry (entry) "Return a display string for a transcription ENTRY. @@ -374,11 +458,11 @@ Prompts with completing-read to select from available backends." ;; ------------------------------- Dired Integration --------------------------- (with-eval-after-load 'dired - (define-key dired-mode-map (kbd "T") #'cj/transcribe-audio-at-point)) + (define-key dired-mode-map (kbd "T") #'cj/transcribe-media-at-point)) ;; Dirvish uses its own keymap, so bind T there too (with-eval-after-load 'dirvish - (define-key dirvish-mode-map (kbd "T") #'cj/transcribe-audio-at-point)) + (define-key dirvish-mode-map (kbd "T") #'cj/transcribe-media-at-point)) ;; Reach the transcription commands via M-x. The previous `C-; T' ;; menu was retired so the top-level slot could go to telega (which diff --git a/modules/user-constants.el b/modules/user-constants.el index b1f6016b..21e141a5 100644 --- a/modules/user-constants.el +++ b/modules/user-constants.el @@ -56,6 +56,12 @@ Used by cursor color, modeline, and other UI elements.") "File extensions recognized as audio files. Used by transcription module and other audio-related functionality.") +(defvar cj/video-file-extensions + '("mp4" "mkv" "mov" "webm" "avi" "m4v" "wmv" "flv" "mpg" "mpeg" "3gp" "ogv") + "File extensions recognized as video files. +Used by transcription to dispatch the audio-extract step in front of +the regular transcription pipeline.") + ;; ------------------------ Directory And File Constants ----------------------- ;; DIRECTORIES diff --git a/tests/test-transcription-status-and-commands.el b/tests/test-transcription-status-and-commands.el index 90841e70..7c796de0 100644 --- a/tests/test-transcription-status-and-commands.el +++ b/tests/test-transcription-status-and-commands.el @@ -179,13 +179,18 @@ (should-error (cj/transcribe-audio-at-point) :type 'user-error))) (ert-deftest test-tx-transcribe-audio-at-point-normal-delegates () - "Normal: with a file at point, delegates to `cj/transcribe-audio'." + "Normal: with a file at point, delegates to `cj/transcribe-media'. + +`cj/transcribe-audio-at-point' is now a `defalias' for +`cj/transcribe-media-at-point', which hands off to +`cj/transcribe-media' (no longer the old audio-only command). The +stub still pins behavior by name." (let ((handed-off nil)) (cl-letf (((symbol-function 'derived-mode-p) (lambda (&rest modes) (memq 'dired-mode modes))) ((symbol-function 'dired-get-filename) (lambda (&rest _) "/tmp/recording.wav")) - ((symbol-function 'cj/transcribe-audio) + ((symbol-function 'cj/transcribe-media) (lambda (f) (setq handed-off f)))) (cj/transcribe-audio-at-point)) (should (equal handed-off "/tmp/recording.wav")))) diff --git a/tests/test-transcription-video.el b/tests/test-transcription-video.el new file mode 100644 index 00000000..8327fa32 --- /dev/null +++ b/tests/test-transcription-video.el @@ -0,0 +1,156 @@ +;;; test-transcription-video.el --- Tests for video transcription dispatch -*- lexical-binding: t; -*- + +;;; Commentary: +;; Tests for the video branch of the transcription pipeline. Audio +;; files keep flowing through `cj/--start-transcription-process' +;; unchanged (covered by sibling test files). Video files go through +;; ffmpeg audio extraction first, then into the same transcription +;; pipeline with the extracted file marked for cleanup once +;; transcription completes. + +;;; Code: + +(require 'ert) +(require 'cl-lib) + +(add-to-list 'load-path (expand-file-name "modules" user-emacs-directory)) +(require 'transcription-config) + +;;; cj/--video-file-p + +(ert-deftest test-tx-video-file-p-recognizes-common-video-extensions () + "Normal: common video extensions are recognized." + (dolist (path '("clip.mp4" "talk.mkv" "demo.mov" "ad.webm" "old.avi" + "screencast.m4v" "promo.mpg")) + (should (cj/--video-file-p path)))) + +(ert-deftest test-tx-video-file-p-rejects-audio-and-non-media-extensions () + "Boundary: audio and unrelated extensions return nil." + (dolist (path '("song.mp3" "notes.txt" "image.png" "archive.tar.gz")) + (should-not (cj/--video-file-p path)))) + +(ert-deftest test-tx-video-file-p-case-insensitive () + "Boundary: uppercase extensions count too." + (should (cj/--video-file-p "Clip.MP4")) + (should (cj/--video-file-p "TALK.MKV"))) + +(ert-deftest test-tx-video-file-p-handles-no-extension () + "Boundary: extensionless and nil/empty input returns nil." + (should-not (cj/--video-file-p "README")) + (should-not (cj/--video-file-p "")) + (should-not (cj/--video-file-p nil))) + +;;; cj/--media-file-p + +(ert-deftest test-tx-media-file-p-accepts-audio () + "Normal: audio passes." + (should (cj/--media-file-p "song.mp3"))) + +(ert-deftest test-tx-media-file-p-accepts-video () + "Normal: video passes." + (should (cj/--media-file-p "clip.mp4"))) + +(ert-deftest test-tx-media-file-p-rejects-non-media () + "Boundary: text, image, etc. fail." + (should-not (cj/--media-file-p "notes.txt")) + (should-not (cj/--media-file-p "image.png"))) + +;;; cj/--extract-audio-from-video + +(ert-deftest test-tx-extract-audio-invokes-ffmpeg-with-expected-args () + "Normal: extraction shells ffmpeg with -vn and the chosen MP3 encoder." + (let* ((video "/clips/demo.mp4") + (out "/tmp/cj-tx-extract.mp3") + make-process-kwargs) + (cl-letf (((symbol-function 'cj/executable-find-or-warn) + (lambda (&rest _) "/usr/bin/ffmpeg")) + ((symbol-function 'make-process) + (lambda (&rest kw) (setq make-process-kwargs kw) 'fake-process))) + (cj/--extract-audio-from-video video out #'ignore)) + (should make-process-kwargs) + (let ((cmd (plist-get make-process-kwargs :command))) + (should (equal (car cmd) "/usr/bin/ffmpeg")) + (should (member "-vn" cmd)) + (should (member video cmd)) + (should (member out cmd)) + (should (member "libmp3lame" cmd))))) + +(ert-deftest test-tx-extract-audio-errors-when-ffmpeg-missing () + "Error: ffmpeg not on PATH signals user-error before make-process." + (cl-letf (((symbol-function 'cj/executable-find-or-warn) + (lambda (&rest _) nil)) + ((symbol-function 'make-process) + (lambda (&rest _) (error "make-process must not be called")))) + (should-error (cj/--extract-audio-from-video "/x.mp4" "/tmp/y.mp3" #'ignore) + :type 'user-error))) + +;;; cj/transcribe-media dispatcher + +(ert-deftest test-tx-transcribe-media-audio-routes-directly () + "Normal: audio paths go straight to the transcription worker, no ffmpeg." + (let* ((tmp (make-temp-file "cj-tx-aud-" nil ".mp3")) + worker-arg ffmpeg-called) + (unwind-protect + (cl-letf (((symbol-function 'cj/--start-transcription-process) + (lambda (file &rest _) (setq worker-arg file) 'fake-proc)) + ((symbol-function 'cj/--extract-audio-from-video) + (lambda (&rest _) (setq ffmpeg-called t)))) + (cj/transcribe-media tmp)) + (delete-file tmp)) + (should (equal worker-arg tmp)) + (should-not ffmpeg-called))) + +(ert-deftest test-tx-transcribe-media-video-extracts-then-transcribes () + "Normal: video paths invoke ffmpeg; on success the extracted audio +goes through `cj/--start-transcription-process' with a cleanup hint." + (let* ((tmp (make-temp-file "cj-tx-vid-" nil ".mp4")) + extract-args worker-call) + (unwind-protect + (cl-letf (((symbol-function 'cj/--extract-audio-from-video) + (lambda (vid out cb) + (setq extract-args (list vid out cb)) + ;; Simulate immediate ffmpeg success. + (funcall cb))) + ((symbol-function 'cj/--start-transcription-process) + (lambda (file &rest rest) + (setq worker-call (cons file rest)) + 'fake-proc))) + (cj/transcribe-media tmp)) + (delete-file tmp)) + ;; ffmpeg was asked to extract from tmp. + (should extract-args) + (should (equal (car extract-args) tmp)) + ;; The temp audio path passed to ffmpeg matches the path passed to + ;; the worker -- in other words the extraction output IS what the + ;; worker transcribes. + (should (equal (nth 1 extract-args) (car worker-call))) + ;; The worker got the temp-audio as cleanup-file (so it gets + ;; deleted after transcription completes). + (should (equal (nth 1 extract-args) (cadr worker-call))))) + +(ert-deftest test-tx-transcribe-media-rejects-non-media () + "Error: non-media paths get rejected up front." + (should-error (cj/transcribe-media "/notes/readme.txt") :type 'user-error)) + +;;; Aliases + +(ert-deftest test-tx-old-transcribe-audio-aliases-new-media-command () + "Backwards compat: `cj/transcribe-audio' still resolves to the new +media dispatcher via defalias." + (should (eq (symbol-function 'cj/transcribe-audio) 'cj/transcribe-media))) + +(ert-deftest test-tx-old-at-point-aliases-new-media-at-point () + "Backwards compat: `cj/transcribe-audio-at-point' still resolves." + (should (eq (symbol-function 'cj/transcribe-audio-at-point) + 'cj/transcribe-media-at-point))) + +;;; Keybinding + +(ert-deftest test-tx-dired-T-binds-media-at-point () + "Normal: T in dired-mode-map invokes `cj/transcribe-media-at-point'." + (require 'dired) + (should (eq (lookup-key dired-mode-map (kbd "T")) + #'cj/transcribe-media-at-point))) + +(provide 'test-transcription-video) +;;; test-transcription-video.el ends here |
