aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--modules/transcription-config.el118
-rw-r--r--modules/user-constants.el6
-rw-r--r--tests/test-transcription-status-and-commands.el9
-rw-r--r--tests/test-transcription-video.el156
4 files changed, 270 insertions, 19 deletions
diff --git a/modules/transcription-config.el b/modules/transcription-config.el
index 3736835d..d81ccba0 100644
--- a/modules/transcription-config.el
+++ b/modules/transcription-config.el
@@ -8,8 +8,9 @@
;; Audio transcription workflow with multiple backend options.
;;
;; USAGE:
-;; In dired: Press `T` on an audio file to transcribe
-;; Anywhere: M-x cj/transcribe-audio
+;; In dired: Press `T` on an audio OR video file to transcribe
+;; (videos run through ffmpeg first to extract the audio)
+;; Anywhere: M-x cj/transcribe-media (M-x cj/transcribe-audio still works)
;; View active: M-x cj/transcriptions-buffer
;; Switch backend: M-x cj/transcription-switch-backend
;;
@@ -84,6 +85,17 @@ Status: running, complete, error")
(when-let ((ext (file-name-extension file)))
(member (downcase ext) cj/audio-file-extensions))))
+(defun cj/--video-file-p (file)
+ "Return non-nil if FILE is a video file based on extension."
+ (when (and file (stringp file))
+ (when-let ((ext (file-name-extension file)))
+ (member (downcase ext) cj/video-file-extensions))))
+
+(defun cj/--media-file-p (file)
+ "Return non-nil if FILE is an audio or video file."
+ (or (cj/--audio-file-p file)
+ (cj/--video-file-p file)))
+
(defun cj/--transcription-output-files (audio-file)
"Return cons cell of (TXT-FILE . LOG-FILE) for AUDIO-FILE."
(let ((base (file-name-sans-extension audio-file)))
@@ -159,9 +171,13 @@ TITLE and MESSAGE are strings. URGENCY is normal or critical."
:body message
:urgency (or urgency 'normal))))
-(defun cj/--start-transcription-process (audio-file)
+(defun cj/--start-transcription-process (audio-file &optional cleanup-file)
"Start async transcription process for AUDIO-FILE.
-Returns the process object."
+Returns the process object.
+
+When CLEANUP-FILE is non-nil, delete that path once the transcription
+sentinel fires (success or failure). Used by the video flow to drop
+the temp audio file produced by ffmpeg after transcription completes."
(unless (file-exists-p audio-file)
(user-error "Audio file does not exist: %s" audio-file))
@@ -186,13 +202,57 @@ Returns the process object."
:buffer (get-buffer-create buffer-name)
:command (list script audio-file)
:sentinel (lambda (proc event)
- (cj/--transcription-sentinel proc event audio-file txt-file log-file))
+ (cj/--transcription-sentinel proc event audio-file txt-file log-file)
+ (when cleanup-file
+ (ignore-errors (delete-file cleanup-file))))
:stderr log-file)))
(cj/--track-transcription process audio-file)
(cj/--notify "Transcription"
(format "Started on %s" (file-name-nondirectory audio-file)))
process)))
+(defun cj/--video-extracted-audio-path (video-file)
+ "Return a temp .mp3 path to hold the extracted audio for VIDEO-FILE.
+The basename hints at the source so a stuck file is easy to identify."
+ (make-temp-file (format "cj-tx-%s-"
+ (file-name-base video-file))
+ nil ".mp3"))
+
+(defun cj/--extract-audio-from-video (video-file output-file on-success)
+ "Async-extract the audio track from VIDEO-FILE to OUTPUT-FILE via ffmpeg.
+
+On success, call ON-SUCCESS (no args). On failure, signal a
+descriptive `user-error' via `cj/--notify'. Signals `user-error'
+synchronously if ffmpeg isn't on PATH.
+
+Uses libmp3lame at quality 4 (~165kbps VBR) -- good for speech,
+universally accepted by the transcription backends."
+ (let ((ffmpeg (cj/executable-find-or-warn
+ "ffmpeg" "video audio extraction" 'transcription-config)))
+ (unless ffmpeg
+ (user-error "ffmpeg not found on PATH -- install ffmpeg to transcribe videos"))
+ (let ((process-name (format "ffmpeg-extract-%s"
+ (file-name-nondirectory video-file))))
+ (make-process
+ :name process-name
+ :buffer (get-buffer-create (format " *%s*" process-name))
+ :command (list ffmpeg "-y" "-i" video-file
+ "-vn" "-acodec" "libmp3lame" "-q:a" "4"
+ output-file)
+ :sentinel (lambda (proc event)
+ (cond
+ ((and (string-match-p "finished" event)
+ (= 0 (process-exit-status proc)))
+ (let ((buf (process-buffer proc)))
+ (when (buffer-live-p buf) (kill-buffer buf)))
+ (funcall on-success))
+ ((string-match-p "\\(?:exited\\|failed\\|signal\\)" event)
+ (cj/--notify "Transcription"
+ (format "ffmpeg failed on %s"
+ (file-name-nondirectory video-file))
+ 'critical)
+ (ignore-errors (delete-file output-file)))))))))
+
(defun cj/--write-transcript-on-success (process-buffer success-p txt-file)
"Write PROCESS-BUFFER contents to TXT-FILE when SUCCESS-P is non-nil.
No-op if PROCESS-BUFFER is dead or SUCCESS-P is nil."
@@ -284,25 +344,49 @@ associated output files."
;; --------------------------- Interactive Commands ----------------------------
;;;###autoload
-(defun cj/transcribe-audio (audio-file)
- "Transcribe AUDIO-FILE asynchronously.
-Creates AUDIO.txt with transcript and AUDIO.log with process logs.
-Uses backend specified by `cj/transcribe-backend'."
- (interactive (list (read-file-name "Audio file to transcribe: "
+(defun cj/transcribe-media (file)
+ "Transcribe FILE asynchronously. Accepts audio or video.
+
+For audio: hands the file straight to the transcription pipeline.
+For video: shells ffmpeg to extract the audio track to a temp .mp3,
+then transcribes that. The temp audio is deleted after the
+transcription sentinel fires.
+
+Creates FILE.txt with the transcript (alongside the source) and
+FILE.log with process logs. Uses the backend in
+`cj/transcribe-backend'."
+ (interactive (list (read-file-name "Media file to transcribe: "
nil nil t nil
- #'cj/--audio-file-p)))
- (cj/--start-transcription-process (expand-file-name audio-file)))
+ #'cj/--media-file-p)))
+ (let ((path (expand-file-name file)))
+ (unless (cj/--media-file-p path)
+ (user-error "Not an audio or video file: %s" path))
+ (cond
+ ((cj/--audio-file-p path)
+ (cj/--start-transcription-process path))
+ ((cj/--video-file-p path)
+ (let ((extracted (cj/--video-extracted-audio-path path)))
+ (cj/--extract-audio-from-video
+ path extracted
+ (lambda ()
+ (cj/--start-transcription-process extracted extracted))))))))
;;;###autoload
-(defun cj/transcribe-audio-at-point ()
- "Transcribe audio file at point in dired."
+(defun cj/transcribe-media-at-point ()
+ "Transcribe the audio or video file at point in dired/dirvish."
(interactive)
(unless (derived-mode-p 'dired-mode)
(user-error "Not in dired-mode"))
(let ((file (dired-get-filename nil t)))
(unless file
(user-error "No file at point"))
- (cj/transcribe-audio file)))
+ (cj/transcribe-media file)))
+
+;; Backwards-compat aliases. The audio-only names predate the
+;; video-extension work; keep them as drop-in synonyms for anyone
+;; with muscle memory or external scripts.
+(defalias 'cj/transcribe-audio 'cj/transcribe-media)
+(defalias 'cj/transcribe-audio-at-point 'cj/transcribe-media-at-point)
(defun cj/--format-transcription-entry (entry)
"Return a display string for a transcription ENTRY.
@@ -374,11 +458,11 @@ Prompts with completing-read to select from available backends."
;; ------------------------------- Dired Integration ---------------------------
(with-eval-after-load 'dired
- (define-key dired-mode-map (kbd "T") #'cj/transcribe-audio-at-point))
+ (define-key dired-mode-map (kbd "T") #'cj/transcribe-media-at-point))
;; Dirvish uses its own keymap, so bind T there too
(with-eval-after-load 'dirvish
- (define-key dirvish-mode-map (kbd "T") #'cj/transcribe-audio-at-point))
+ (define-key dirvish-mode-map (kbd "T") #'cj/transcribe-media-at-point))
;; Reach the transcription commands via M-x. The previous `C-; T'
;; menu was retired so the top-level slot could go to telega (which
diff --git a/modules/user-constants.el b/modules/user-constants.el
index b1f6016b..21e141a5 100644
--- a/modules/user-constants.el
+++ b/modules/user-constants.el
@@ -56,6 +56,12 @@ Used by cursor color, modeline, and other UI elements.")
"File extensions recognized as audio files.
Used by transcription module and other audio-related functionality.")
+(defvar cj/video-file-extensions
+ '("mp4" "mkv" "mov" "webm" "avi" "m4v" "wmv" "flv" "mpg" "mpeg" "3gp" "ogv")
+ "File extensions recognized as video files.
+Used by transcription to dispatch the audio-extract step in front of
+the regular transcription pipeline.")
+
;; ------------------------ Directory And File Constants -----------------------
;; DIRECTORIES
diff --git a/tests/test-transcription-status-and-commands.el b/tests/test-transcription-status-and-commands.el
index 90841e70..7c796de0 100644
--- a/tests/test-transcription-status-and-commands.el
+++ b/tests/test-transcription-status-and-commands.el
@@ -179,13 +179,18 @@
(should-error (cj/transcribe-audio-at-point) :type 'user-error)))
(ert-deftest test-tx-transcribe-audio-at-point-normal-delegates ()
- "Normal: with a file at point, delegates to `cj/transcribe-audio'."
+ "Normal: with a file at point, delegates to `cj/transcribe-media'.
+
+`cj/transcribe-audio-at-point' is now a `defalias' for
+`cj/transcribe-media-at-point', which hands off to
+`cj/transcribe-media' (no longer the old audio-only command). The
+stub still pins behavior by name."
(let ((handed-off nil))
(cl-letf (((symbol-function 'derived-mode-p)
(lambda (&rest modes) (memq 'dired-mode modes)))
((symbol-function 'dired-get-filename)
(lambda (&rest _) "/tmp/recording.wav"))
- ((symbol-function 'cj/transcribe-audio)
+ ((symbol-function 'cj/transcribe-media)
(lambda (f) (setq handed-off f))))
(cj/transcribe-audio-at-point))
(should (equal handed-off "/tmp/recording.wav"))))
diff --git a/tests/test-transcription-video.el b/tests/test-transcription-video.el
new file mode 100644
index 00000000..8327fa32
--- /dev/null
+++ b/tests/test-transcription-video.el
@@ -0,0 +1,156 @@
+;;; test-transcription-video.el --- Tests for video transcription dispatch -*- lexical-binding: t; -*-
+
+;;; Commentary:
+;; Tests for the video branch of the transcription pipeline. Audio
+;; files keep flowing through `cj/--start-transcription-process'
+;; unchanged (covered by sibling test files). Video files go through
+;; ffmpeg audio extraction first, then into the same transcription
+;; pipeline with the extracted file marked for cleanup once
+;; transcription completes.
+
+;;; Code:
+
+(require 'ert)
+(require 'cl-lib)
+
+(add-to-list 'load-path (expand-file-name "modules" user-emacs-directory))
+(require 'transcription-config)
+
+;;; cj/--video-file-p
+
+(ert-deftest test-tx-video-file-p-recognizes-common-video-extensions ()
+ "Normal: common video extensions are recognized."
+ (dolist (path '("clip.mp4" "talk.mkv" "demo.mov" "ad.webm" "old.avi"
+ "screencast.m4v" "promo.mpg"))
+ (should (cj/--video-file-p path))))
+
+(ert-deftest test-tx-video-file-p-rejects-audio-and-non-media-extensions ()
+ "Boundary: audio and unrelated extensions return nil."
+ (dolist (path '("song.mp3" "notes.txt" "image.png" "archive.tar.gz"))
+ (should-not (cj/--video-file-p path))))
+
+(ert-deftest test-tx-video-file-p-case-insensitive ()
+ "Boundary: uppercase extensions count too."
+ (should (cj/--video-file-p "Clip.MP4"))
+ (should (cj/--video-file-p "TALK.MKV")))
+
+(ert-deftest test-tx-video-file-p-handles-no-extension ()
+ "Boundary: extensionless and nil/empty input returns nil."
+ (should-not (cj/--video-file-p "README"))
+ (should-not (cj/--video-file-p ""))
+ (should-not (cj/--video-file-p nil)))
+
+;;; cj/--media-file-p
+
+(ert-deftest test-tx-media-file-p-accepts-audio ()
+ "Normal: audio passes."
+ (should (cj/--media-file-p "song.mp3")))
+
+(ert-deftest test-tx-media-file-p-accepts-video ()
+ "Normal: video passes."
+ (should (cj/--media-file-p "clip.mp4")))
+
+(ert-deftest test-tx-media-file-p-rejects-non-media ()
+ "Boundary: text, image, etc. fail."
+ (should-not (cj/--media-file-p "notes.txt"))
+ (should-not (cj/--media-file-p "image.png")))
+
+;;; cj/--extract-audio-from-video
+
+(ert-deftest test-tx-extract-audio-invokes-ffmpeg-with-expected-args ()
+ "Normal: extraction shells ffmpeg with -vn and the chosen MP3 encoder."
+ (let* ((video "/clips/demo.mp4")
+ (out "/tmp/cj-tx-extract.mp3")
+ make-process-kwargs)
+ (cl-letf (((symbol-function 'cj/executable-find-or-warn)
+ (lambda (&rest _) "/usr/bin/ffmpeg"))
+ ((symbol-function 'make-process)
+ (lambda (&rest kw) (setq make-process-kwargs kw) 'fake-process)))
+ (cj/--extract-audio-from-video video out #'ignore))
+ (should make-process-kwargs)
+ (let ((cmd (plist-get make-process-kwargs :command)))
+ (should (equal (car cmd) "/usr/bin/ffmpeg"))
+ (should (member "-vn" cmd))
+ (should (member video cmd))
+ (should (member out cmd))
+ (should (member "libmp3lame" cmd)))))
+
+(ert-deftest test-tx-extract-audio-errors-when-ffmpeg-missing ()
+ "Error: ffmpeg not on PATH signals user-error before make-process."
+ (cl-letf (((symbol-function 'cj/executable-find-or-warn)
+ (lambda (&rest _) nil))
+ ((symbol-function 'make-process)
+ (lambda (&rest _) (error "make-process must not be called"))))
+ (should-error (cj/--extract-audio-from-video "/x.mp4" "/tmp/y.mp3" #'ignore)
+ :type 'user-error)))
+
+;;; cj/transcribe-media dispatcher
+
+(ert-deftest test-tx-transcribe-media-audio-routes-directly ()
+ "Normal: audio paths go straight to the transcription worker, no ffmpeg."
+ (let* ((tmp (make-temp-file "cj-tx-aud-" nil ".mp3"))
+ worker-arg ffmpeg-called)
+ (unwind-protect
+ (cl-letf (((symbol-function 'cj/--start-transcription-process)
+ (lambda (file &rest _) (setq worker-arg file) 'fake-proc))
+ ((symbol-function 'cj/--extract-audio-from-video)
+ (lambda (&rest _) (setq ffmpeg-called t))))
+ (cj/transcribe-media tmp))
+ (delete-file tmp))
+ (should (equal worker-arg tmp))
+ (should-not ffmpeg-called)))
+
+(ert-deftest test-tx-transcribe-media-video-extracts-then-transcribes ()
+ "Normal: video paths invoke ffmpeg; on success the extracted audio
+goes through `cj/--start-transcription-process' with a cleanup hint."
+ (let* ((tmp (make-temp-file "cj-tx-vid-" nil ".mp4"))
+ extract-args worker-call)
+ (unwind-protect
+ (cl-letf (((symbol-function 'cj/--extract-audio-from-video)
+ (lambda (vid out cb)
+ (setq extract-args (list vid out cb))
+ ;; Simulate immediate ffmpeg success.
+ (funcall cb)))
+ ((symbol-function 'cj/--start-transcription-process)
+ (lambda (file &rest rest)
+ (setq worker-call (cons file rest))
+ 'fake-proc)))
+ (cj/transcribe-media tmp))
+ (delete-file tmp))
+ ;; ffmpeg was asked to extract from tmp.
+ (should extract-args)
+ (should (equal (car extract-args) tmp))
+ ;; The temp audio path passed to ffmpeg matches the path passed to
+ ;; the worker -- in other words the extraction output IS what the
+ ;; worker transcribes.
+ (should (equal (nth 1 extract-args) (car worker-call)))
+ ;; The worker got the temp-audio as cleanup-file (so it gets
+ ;; deleted after transcription completes).
+ (should (equal (nth 1 extract-args) (cadr worker-call)))))
+
+(ert-deftest test-tx-transcribe-media-rejects-non-media ()
+ "Error: non-media paths get rejected up front."
+ (should-error (cj/transcribe-media "/notes/readme.txt") :type 'user-error))
+
+;;; Aliases
+
+(ert-deftest test-tx-old-transcribe-audio-aliases-new-media-command ()
+ "Backwards compat: `cj/transcribe-audio' still resolves to the new
+media dispatcher via defalias."
+ (should (eq (symbol-function 'cj/transcribe-audio) 'cj/transcribe-media)))
+
+(ert-deftest test-tx-old-at-point-aliases-new-media-at-point ()
+ "Backwards compat: `cj/transcribe-audio-at-point' still resolves."
+ (should (eq (symbol-function 'cj/transcribe-audio-at-point)
+ 'cj/transcribe-media-at-point)))
+
+;;; Keybinding
+
+(ert-deftest test-tx-dired-T-binds-media-at-point ()
+ "Normal: T in dired-mode-map invokes `cj/transcribe-media-at-point'."
+ (require 'dired)
+ (should (eq (lookup-key dired-mode-map (kbd "T"))
+ #'cj/transcribe-media-at-point)))
+
+(provide 'test-transcription-video)
+;;; test-transcription-video.el ends here