diff options
Diffstat (limited to 'scripts')
| -rwxr-xr-x | scripts/assemblyai-transcribe | 134 | ||||
| -rwxr-xr-x | scripts/delete-elisp-compiled-files.sh | 7 | ||||
| -rwxr-xr-x | scripts/install-whisper.sh | 103 | ||||
| -rwxr-xr-x | scripts/languagetool-flycheck | 82 | ||||
| -rwxr-xr-x | scripts/local-whisper | 60 | ||||
| -rwxr-xr-x | scripts/oai-transcribe | 45 | ||||
| -rwxr-xr-x | scripts/profile-dotemacs.sh | 20 | ||||
| -rwxr-xr-x | scripts/reset-to-first-launch.sh | 48 | ||||
| -rwxr-xr-x | scripts/uninstall-whisper.sh | 65 |
9 files changed, 489 insertions, 75 deletions
diff --git a/scripts/assemblyai-transcribe b/scripts/assemblyai-transcribe new file mode 100755 index 00000000..22cbf538 --- /dev/null +++ b/scripts/assemblyai-transcribe @@ -0,0 +1,134 @@ +#!/usr/bin/env bash +# assemblyai-transcribe - Transcribe audio files using AssemblyAI API with speaker diarization +# Usage: assemblyai-transcribe <audio-file> [language] +# +# Requires: ASSEMBLYAI_API_KEY environment variable +# Language: en, es, fr, etc. (default: en) +# Features: Speaker diarization (up to 50 speakers) + +set -euo pipefail + +# Parse arguments +AUDIO="${1:-}" +LANG="${2:-en}" + +# Validate arguments +if [[ -z "$AUDIO" ]]; then + echo "Usage: assemblyai-transcribe <audio-file> [language]" >&2 + echo "Example: assemblyai-transcribe meeting.m4a en" >&2 + exit 1 +fi + +if [[ ! -f "$AUDIO" ]]; then + echo "Error: Audio file not found: $AUDIO" >&2 + exit 1 +fi + +# Check API key is set +if [[ -z "${ASSEMBLYAI_API_KEY:-}" ]]; then + echo "Error: ASSEMBLYAI_API_KEY environment variable not set" >&2 + exit 1 +fi + +# Check curl is available +if ! command -v curl &> /dev/null; then + echo "Error: curl command not found" >&2 + exit 1 +fi + +# Check jq is available (for JSON parsing) +if ! command -v jq &> /dev/null; then + echo "Error: jq command not found (required for JSON parsing)" >&2 + echo "Install with: sudo pacman -S jq" >&2 + exit 1 +fi + +API_BASE="https://api.assemblyai.com/v2" + +# Step 1: Upload audio file +echo "Uploading audio file..." >&2 +UPLOAD_RESPONSE=$(curl -s -X POST "${API_BASE}/upload" \ + -H "Authorization: ${ASSEMBLYAI_API_KEY}" \ + --data-binary "@${AUDIO}") + +UPLOAD_URL=$(echo "$UPLOAD_RESPONSE" | jq -r '.upload_url') + +if [[ -z "$UPLOAD_URL" ]] || [[ "$UPLOAD_URL" == "null" ]]; then + echo "Error: Failed to upload audio file" >&2 + echo "$UPLOAD_RESPONSE" >&2 + exit 1 +fi + +echo "Upload complete. Submitting transcription..." >&2 + +# Step 2: Submit transcription request with speaker labels +TRANSCRIPT_REQUEST=$(cat <<EOF +{ + "audio_url": "${UPLOAD_URL}", + "language_code": "${LANG}", + "speech_model": "universal", + "speaker_labels": true +} +EOF +) + +TRANSCRIPT_RESPONSE=$(curl -s -X POST "${API_BASE}/transcript" \ + -H "Authorization: ${ASSEMBLYAI_API_KEY}" \ + -H "Content-Type: application/json" \ + -d "$TRANSCRIPT_REQUEST") + +TRANSCRIPT_ID=$(echo "$TRANSCRIPT_RESPONSE" | jq -r '.id') + +if [[ -z "$TRANSCRIPT_ID" ]] || [[ "$TRANSCRIPT_ID" == "null" ]]; then + echo "Error: Failed to submit transcription" >&2 + echo "$TRANSCRIPT_RESPONSE" >&2 + exit 1 +fi + +echo "Transcription job submitted (ID: ${TRANSCRIPT_ID})" >&2 +echo "Waiting for completion..." >&2 + +# Step 3: Poll for completion +STATUS="queued" +POLL_INTERVAL=3 +MAX_WAIT=1800 # 30 minutes +ELAPSED=0 + +while [[ "$STATUS" == "queued" ]] || [[ "$STATUS" == "processing" ]]; do + if [[ $ELAPSED -ge $MAX_WAIT ]]; then + echo "Error: Transcription timed out after ${MAX_WAIT} seconds" >&2 + exit 1 + fi + + sleep $POLL_INTERVAL + ELAPSED=$((ELAPSED + POLL_INTERVAL)) + + RESULT=$(curl -s -X GET "${API_BASE}/transcript/${TRANSCRIPT_ID}" \ + -H "Authorization: ${ASSEMBLYAI_API_KEY}") + + STATUS=$(echo "$RESULT" | jq -r '.status') + + if [[ "$STATUS" == "processing" ]]; then + echo "Processing... (${ELAPSED}s elapsed)" >&2 + fi +done + +# Check if transcription failed +if [[ "$STATUS" != "completed" ]]; then + ERROR_MSG=$(echo "$RESULT" | jq -r '.error // "Unknown error"') + echo "Error: Transcription failed with status: ${STATUS}" >&2 + echo "Error message: ${ERROR_MSG}" >&2 + exit 1 +fi + +echo "Transcription complete! (${ELAPSED}s total)" >&2 + +# Step 4: Format output with speaker labels +# Extract utterances and format as "Speaker A: text" +echo "$RESULT" | jq -r ' + if .utterances then + .utterances[] | "Speaker \(.speaker): \(.text)" + else + .text + end +' diff --git a/scripts/delete-elisp-compiled-files.sh b/scripts/delete-elisp-compiled-files.sh deleted file mode 100755 index 5976c92b..00000000 --- a/scripts/delete-elisp-compiled-files.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -location=$HOME/.emacs.d/ - -echo "Deleting emacs lisp compiled files (.eln and .elc) from $location..." -find $location -type f \( -name "*.eln" -o -name "*.elc" \) -exec rm -f {} + -echo "Done." diff --git a/scripts/install-whisper.sh b/scripts/install-whisper.sh new file mode 100755 index 00000000..e2ea4ac9 --- /dev/null +++ b/scripts/install-whisper.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash +# Install OpenAI Whisper for transcription on Arch Linux +# Usage: install-whisper.sh [--yes] # --yes for non-interactive mode + +set -euo pipefail + +# Non-interactive mode +ASSUME_YES=false +if [[ "${1:-}" == "--yes" ]] || [[ "${1:-}" == "-y" ]]; then + ASSUME_YES=true +fi + +echo "=== Whisper Installation for Arch Linux ===" +echo + +# Check if running on Arch +if [[ ! -f /etc/arch-release ]]; then + echo "Warning: This script is designed for Arch Linux" + if [[ "$ASSUME_YES" == false ]]; then + read -p "Continue anyway? [y/N] " -n 1 -r + echo + [[ ! $REPLY =~ ^[Yy]$ ]] && exit 1 + else + echo "Continuing anyway (--yes mode)" + fi +fi + +# 1. Install system dependencies +echo "Step 1/3: Installing system dependencies (ffmpeg)..." +if ! command -v ffmpeg &> /dev/null; then + sudo pacman -S --needed ffmpeg + echo "✓ ffmpeg installed" +else + echo "✓ ffmpeg already installed" +fi + +# 2. Check for AUR package first (optional but cleaner) +echo +echo "Step 2/3: Checking for AUR package..." +AUR_INSTALLED=false + +if command -v yay &> /dev/null; then + echo "Found yay. Checking AUR for python-openai-whisper..." + if yay -Ss python-openai-whisper | grep -q 'python-openai-whisper'; then + INSTALL_AUR=false + if [[ "$ASSUME_YES" == true ]]; then + echo "Installing from AUR (--yes mode)" + INSTALL_AUR=true + else + read -p "Install from AUR via yay? [Y/n] " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then + INSTALL_AUR=true + fi + fi + + if [[ "$INSTALL_AUR" == true ]]; then + yay -S --needed --noconfirm python-openai-whisper + echo "✓ Installed from AUR" + AUR_INSTALLED=true + fi + else + echo "Package python-openai-whisper not found in AUR" + fi +else + echo "yay not found. Skipping AUR installation." + echo "(Install yay if you prefer AUR packages)" +fi + +# 3. Install via pip if not from AUR +if [[ "$AUR_INSTALLED" == false ]]; then + echo + echo "Step 3/3: Installing openai-whisper via pip..." + pip install --user -U openai-whisper + echo "✓ openai-whisper installed via pip" + echo + echo "Note: Ensure ~/.local/bin is in your PATH" + echo "Add to ~/.bashrc or ~/.zshrc: export PATH=\"\$HOME/.local/bin:\$PATH\"" +fi + +# Verify installation +echo +echo "=== Verifying Installation ===" +if command -v whisper &> /dev/null; then + echo "✓ whisper command found at: $(which whisper)" + whisper --help | head -n 3 + echo + echo "=== Installation Complete! ===" + echo + echo "Models available: tiny, base, small, medium, large" + echo "Recommended: small (good balance of speed/accuracy)" + echo "Model will download automatically on first use." + echo + echo "Test with: whisper your-audio.m4a --model small --language en" +else + echo "✗ Installation failed - whisper command not found" + echo + echo "Troubleshooting:" + echo "1. Ensure ~/.local/bin is in your PATH" + echo "2. Run: source ~/.bashrc (or ~/.zshrc)" + echo "3. Try: python -m whisper --help" + exit 1 +fi diff --git a/scripts/languagetool-flycheck b/scripts/languagetool-flycheck new file mode 100755 index 00000000..ecbc900f --- /dev/null +++ b/scripts/languagetool-flycheck @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +""" +Wrapper for LanguageTool to produce flycheck-compatible output. +Output format: filename:line:column: message +""" + +import json +import sys +import subprocess + +def main(): + if len(sys.argv) < 2: + print("Usage: languagetool-flycheck FILE", file=sys.stderr) + sys.exit(1) + + filename = sys.argv[1] + + # Run languagetool with JSON output + try: + result = subprocess.run( + ['languagetool', '-l', 'en-US', '--json', filename], + capture_output=True, + text=True, + timeout=30 + ) + except subprocess.TimeoutExpired: + print(f"{filename}:1:1: LanguageTool timeout", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"{filename}:1:1: LanguageTool error: {e}", file=sys.stderr) + sys.exit(1) + + # Parse JSON output + try: + # Find the JSON in the output (skip warning lines) + json_output = None + for line in result.stdout.split('\n'): + if line.startswith('{'): + json_output = line + break + + if not json_output: + sys.exit(0) # No errors found + + data = json.loads(json_output) + + # Read file to calculate line numbers from character offsets + with open(filename, 'r', encoding='utf-8') as f: + content = f.read() + + # Convert matches to flycheck format + for match in data.get('matches', []): + offset = match['offset'] + length = match['length'] + message = match['message'] + rule_id = match['rule']['id'] + + # Calculate line and column from offset + line = content[:offset].count('\n') + 1 + line_start = content.rfind('\n', 0, offset) + 1 + column = offset - line_start + 1 + + # Get first suggestion if available + suggestions = match.get('replacements', []) + if suggestions: + suggestion = suggestions[0]['value'] + message = f"{rule_id}: {message} Suggestion: {suggestion}" + else: + message = f"{rule_id}: {message}" + + # Output in flycheck format + print(f"{filename}:{line}:{column}: {message}") + + except json.JSONDecodeError as e: + print(f"{filename}:1:1: Failed to parse LanguageTool JSON: {e}", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"{filename}:1:1: Error processing LanguageTool output: {e}", file=sys.stderr) + sys.exit(1) + +if __name__ == '__main__': + main() diff --git a/scripts/local-whisper b/scripts/local-whisper new file mode 100755 index 00000000..b08651c9 --- /dev/null +++ b/scripts/local-whisper @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +# local-whisper - Transcribe audio files using locally installed Whisper +# Usage: local-whisper <audio-file> [model] [language] +# +# Models: tiny, base, small, medium, large (default: small) +# Language: en, es, fr, etc. (default: en) + +set -euo pipefail + +# Parse arguments +AUDIO="${1:-}" +MODEL="${2:-small}" +LANG="${3:-en}" + +# Validate arguments +if [[ -z "$AUDIO" ]]; then + echo "Usage: local-whisper <audio-file> [model] [language]" >&2 + echo "Example: local-whisper meeting.m4a small en" >&2 + exit 1 +fi + +if [[ ! -f "$AUDIO" ]]; then + echo "Error: Audio file not found: $AUDIO" >&2 + exit 1 +fi + +# Check whisper is installed +if ! command -v whisper &> /dev/null; then + echo "Error: whisper command not found" >&2 + echo "Install with: ~/.emacs.d/scripts/install-whisper.sh" >&2 + exit 1 +fi + +# Get absolute path to audio file +AUDIO_ABS="$(realpath "$AUDIO")" +AUDIO_DIR="$(dirname "$AUDIO_ABS")" +AUDIO_BASE="$(basename "$AUDIO_ABS")" +AUDIO_NAME="${AUDIO_BASE%.*}" + +# Run whisper +# Note: whisper creates ${AUDIO_NAME}.txt automatically in the output directory +whisper "$AUDIO_ABS" \ + --model "$MODEL" \ + --language "$LANG" \ + --task transcribe \ + --output_format txt \ + --output_dir "$AUDIO_DIR" \ + --verbose False 2>&1 + +# Output file that whisper creates +OUTPUT_FILE="$AUDIO_DIR/$AUDIO_NAME.txt" + +# Return transcript to stdout +if [[ -f "$OUTPUT_FILE" ]]; then + cat "$OUTPUT_FILE" + exit 0 +else + echo "Error: Whisper did not create expected output file: $OUTPUT_FILE" >&2 + exit 1 +fi diff --git a/scripts/oai-transcribe b/scripts/oai-transcribe new file mode 100755 index 00000000..f64a8122 --- /dev/null +++ b/scripts/oai-transcribe @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# oai-transcribe - Transcribe audio files using OpenAI Whisper API +# Usage: oai-transcribe <audio-file> [language] +# +# Requires: OPENAI_API_KEY environment variable +# Language: en, es, fr, etc. (default: en) + +set -euo pipefail + +# Parse arguments +AUDIO="${1:-}" +LANG="${2:-en}" + +# Validate arguments +if [[ -z "$AUDIO" ]]; then + echo "Usage: oai-transcribe <audio-file> [language]" >&2 + echo "Example: oai-transcribe meeting.m4a en" >&2 + exit 1 +fi + +if [[ ! -f "$AUDIO" ]]; then + echo "Error: Audio file not found: $AUDIO" >&2 + exit 1 +fi + +# Check API key is set +if [[ -z "${OPENAI_API_KEY:-}" ]]; then + echo "Error: OPENAI_API_KEY environment variable not set" >&2 + echo "Set with: export OPENAI_API_KEY='sk-...'" >&2 + exit 1 +fi + +# Check curl is available +if ! command -v curl &> /dev/null; then + echo "Error: curl command not found" >&2 + exit 1 +fi + +# Call OpenAI API +curl -s -X POST "https://api.openai.com/v1/audio/transcriptions" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -F "model=whisper-1" \ + -F "response_format=text" \ + -F "language=${LANG}" \ + -F "file=@${AUDIO}" diff --git a/scripts/profile-dotemacs.sh b/scripts/profile-dotemacs.sh deleted file mode 100755 index b31d078b..00000000 --- a/scripts/profile-dotemacs.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash - -# profile-dotemacs.sh -# Craig Jennings <c@cjennings.net> -# a convenience script to load an emacs-lisp file which will -# startup emacs (with or without an early-init) and provide -# benchmark information on the Emacs config. - -EMACS_HOME="$HOME/.emacs.d/" -EARLY_INIT_FILE="$EMACS_HOME/early-init.el" -PROFILE_FILE="$EMACS_HOME/custom/profile-dotemacs.el" - -if [ -f "$EARLY_INIT_FILE" ] -then - emacs -Q --load $PROFILE_FILE --eval "(progn (load-file \"~/.emacs.d/early-init.el\") (profile-dotemacs))" -else - echo "No early init found. Proceeding to benchmark init.el." - emacs -Q --load $PROFILE_FILE --eval "(profile-dotemacs)" -fi - diff --git a/scripts/reset-to-first-launch.sh b/scripts/reset-to-first-launch.sh deleted file mode 100755 index 7bb15ba5..00000000 --- a/scripts/reset-to-first-launch.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/sh -# script for Emacs config testing -# - clears out all but necessary init/config files -# - removes native ad bytecode files. - -rm -rf ~/.cache/org-persist/ -rm -rf ~/.emacs.d/.cache/ -rm -rf ~/.emacs.d/.elfeed-db/ -rm -rf ~/.emacs.d/auto-save-list/ -rm -rf ~/.emacs.d/backups/ -rm -rf ~/.emacs.d/crossword/ -rm -rf ~/.emacs.d/dirvish/ -rm -rf ~/.emacs.d/eln-cache/ -rm -rf ~/.emacs.d/elpa/ -rm -rf ~/.emacs.d/emms/ -rm -rf ~/.emacs.d/emojis/ -rm -rf ~/.emacs.d/erc/ -rm -rf ~/.emacs.d/eshell/ -rm -rf ~/.emacs.d/multisession -rm -rf ~/.emacs.d/nov-places/ -rm -rf ~/.emacs.d/persist/ -rm -rf ~/.emacs.d/quelpa/ -rm -rf ~/.emacs.d/request/ -rm -rf ~/.emacs.d/tramp-auto-save/ -rm -rf ~/.emacs.d/transient/ -rm -rf ~/.emacs.d/tree-sitter/ -rm -rf ~/.emacs.d/url/ -rm -rf ~/.emacs.d/var -rm ~/.emacs.d/.elfeed-db -rm ~/.emacs.d/.emacs-history -rm ~/.emacs.d/.lsp-session* -rm ~/.emacs.d/.org-generic-id-locations -rm ~/.emacs.d/.org-id-locations -rm ~/.emacs.d/.pdf-view-restore -rm ~/.emacs.d/.scratch -rm ~/.emacs.d/forge-database.sqlite -rm ~/.emacs.d/history -rm ~/.emacs.d/nov-places -rm ~/.emacs.d/org-roam.db -rm ~/.emacs.d/pomm -rm ~/.emacs.d/projectile-bookmarks.eld -rm ~/.emacs.d/projects -rm ~/.emacs.d/recentf -rm ~/.emacs.d/tramp-connection-history -rm ~/sync/org/emacs-theme.persist - -find ~/.emacs.d -name "*.eln" -type f -delete -find ~/.emacs.d -name "*.elc" -type f -delete diff --git a/scripts/uninstall-whisper.sh b/scripts/uninstall-whisper.sh new file mode 100755 index 00000000..e46c6ebc --- /dev/null +++ b/scripts/uninstall-whisper.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# Uninstall OpenAI Whisper + +set -euo pipefail + +echo "=== Whisper Uninstallation ===" +echo + +REMOVED=false + +# Check if installed via AUR +if command -v yay &> /dev/null; then + if yay -Qi python-openai-whisper &> /dev/null 2>&1; then + echo "Detected AUR installation (python-openai-whisper)" + read -p "Remove via yay? [Y/n] " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then + yay -R python-openai-whisper + echo "✓ Removed via AUR" + REMOVED=true + fi + fi +fi + +# Check if installed via pip +if pip list 2>/dev/null | grep -q openai-whisper; then + echo "Detected pip installation (openai-whisper)" + read -p "Remove via pip? [Y/n] " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then + pip uninstall -y openai-whisper + echo "✓ Removed via pip" + REMOVED=true + fi +fi + +if [[ "$REMOVED" == false ]]; then + echo "No whisper installation found (checked AUR and pip)" +fi + +# Ask about ffmpeg +echo +read -p "Remove ffmpeg? (may be used by other apps) [y/N] " -n 1 -r +echo +if [[ $REPLY =~ ^[Yy]$ ]]; then + sudo pacman -R ffmpeg + echo "✓ Removed ffmpeg" +fi + +# Ask about model cache +CACHE_DIR="$HOME/.cache/whisper" +if [[ -d "$CACHE_DIR" ]]; then + echo + echo "Whisper models are cached in: $CACHE_DIR" + du -sh "$CACHE_DIR" 2>/dev/null || echo "Size: unknown" + read -p "Delete cached models? [y/N] " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + rm -rf "$CACHE_DIR" + echo "✓ Deleted model cache" + fi +fi + +echo +echo "=== Uninstallation Complete ===" |
