6 files changed, 489 insertions, 0 deletions
diff --git a/scripts/assemblyai-transcribe b/scripts/assemblyai-transcribe
new file mode 100755
index 00000000..22cbf538
--- /dev/null
+++ b/scripts/assemblyai-transcribe
@@ -0,0 +1,134 @@
+#!/usr/bin/env bash
+# assemblyai-transcribe - Transcribe audio files using AssemblyAI API with speaker diarization
+# Usage: assemblyai-transcribe <audio-file> [language]
+#
+# Requires: ASSEMBLYAI_API_KEY environment variable
+# Language: en, es, fr, etc. (default: en)
+# Features: Speaker diarization (up to 50 speakers)
+
+set -euo pipefail
+
+# Parse arguments
+AUDIO="${1:-}"
+LANG="${2:-en}"
+
+# Validate arguments
+if [[ -z "$AUDIO" ]]; then
+  echo "Usage: assemblyai-transcribe <audio-file> [language]" >&2
+  echo "Example: assemblyai-transcribe meeting.m4a en" >&2
+  exit 1
+fi
+
+if [[ ! -f "$AUDIO" ]]; then
+  echo "Error: Audio file not found: $AUDIO" >&2
+  exit 1
+fi
+
+# Check API key is set
+if [[ -z "${ASSEMBLYAI_API_KEY:-}" ]]; then
+  echo "Error: ASSEMBLYAI_API_KEY environment variable not set" >&2
+  exit 1
+fi
+
+# Check curl is available
+if ! command -v curl &> /dev/null; then
+  echo "Error: curl command not found" >&2
+  exit 1
+fi
+
+# Check jq is available (for JSON parsing)
+if ! command -v jq &> /dev/null; then
+  echo "Error: jq command not found (required for JSON parsing)" >&2
+  echo "Install with: sudo pacman -S jq" >&2
+  exit 1
+fi
+
+API_BASE="https://api.assemblyai.com/v2"
+
+# Step 1: Upload audio file
+echo "Uploading audio file..." >&2
+UPLOAD_RESPONSE=$(curl -s -X POST "${API_BASE}/upload" \
+  -H "Authorization: ${ASSEMBLYAI_API_KEY}" \
+  --data-binary "@${AUDIO}")
+
+UPLOAD_URL=$(echo "$UPLOAD_RESPONSE" | jq -r '.upload_url')
+
+if [[ -z "$UPLOAD_URL" ]] || [[ "$UPLOAD_URL" == "null" ]]; then
+  echo "Error: Failed to upload audio file" >&2
+  echo "$UPLOAD_RESPONSE" >&2
+  exit 1
+fi
+
+echo "Upload complete. Submitting transcription..." >&2
+
+# Step 2: Submit transcription request with speaker labels
+TRANSCRIPT_REQUEST=$(cat <<EOF
+{
+  "audio_url": "${UPLOAD_URL}",
+  "language_code": "${LANG}",
+  "speech_model": "universal",
+  "speaker_labels": true
+}
+EOF
+)
+
+TRANSCRIPT_RESPONSE=$(curl -s -X POST "${API_BASE}/transcript" \
+  -H "Authorization: ${ASSEMBLYAI_API_KEY}" \
+  -H "Content-Type: application/json" \
+  -d "$TRANSCRIPT_REQUEST")
+
+TRANSCRIPT_ID=$(echo "$TRANSCRIPT_RESPONSE" | jq -r '.id')
+
+if [[ -z "$TRANSCRIPT_ID" ]] || [[ "$TRANSCRIPT_ID" == "null" ]]; then
+  echo "Error: Failed to submit transcription" >&2
+  echo "$TRANSCRIPT_RESPONSE" >&2
+  exit 1
+fi
+
+echo "Transcription job submitted (ID: ${TRANSCRIPT_ID})" >&2
+echo "Waiting for completion..." >&2
+
+# Step 3: Poll for completion
+STATUS="queued"
+POLL_INTERVAL=3
+MAX_WAIT=1800  # 30 minutes
+ELAPSED=0
+
+while [[ "$STATUS" == "queued" ]] || [[ "$STATUS" == "processing" ]]; do
+  if [[ $ELAPSED -ge $MAX_WAIT ]]; then
+    echo "Error: Transcription timed out after ${MAX_WAIT} seconds" >&2
+    exit 1
+  fi
+
+  sleep $POLL_INTERVAL
+  ELAPSED=$((ELAPSED + POLL_INTERVAL))
+
+  RESULT=$(curl -s -X GET "${API_BASE}/transcript/${TRANSCRIPT_ID}" \
+    -H "Authorization: ${ASSEMBLYAI_API_KEY}")
+
+  STATUS=$(echo "$RESULT" | jq -r '.status')
+
+  if [[ "$STATUS" == "processing" ]]; then
+    echo "Processing... (${ELAPSED}s elapsed)" >&2
+  fi
+done
+
+# Check if transcription failed
+if [[ "$STATUS" != "completed" ]]; then
+  ERROR_MSG=$(echo "$RESULT" | jq -r '.error // "Unknown error"')
+  echo "Error: Transcription failed with status: ${STATUS}" >&2
+  echo "Error message: ${ERROR_MSG}" >&2
+  exit 1
+fi
+
+echo "Transcription complete! (${ELAPSED}s total)" >&2
+
+# Step 4: Format output with speaker labels
+# Extract utterances and format as "Speaker A: text"
+echo "$RESULT" | jq -r '
+  if .utterances then
+    .utterances[] | "Speaker \(.speaker): \(.text)"
+  else
+    .text
+  end
+'
diff --git a/scripts/install-whisper.sh b/scripts/install-whisper.sh
new file mode 100755
index 00000000..e2ea4ac9
--- /dev/null
+++ b/scripts/install-whisper.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+# Install OpenAI Whisper for transcription on Arch Linux
+# Usage: install-whisper.sh [--yes]   # --yes for non-interactive mode
+
+set -euo pipefail
+
+# Non-interactive mode
+ASSUME_YES=false
+if [[ "${1:-}" == "--yes" ]] || [[ "${1:-}" == "-y" ]]; then
+  ASSUME_YES=true
+fi
+
+echo "=== Whisper Installation for Arch Linux ==="
+echo
+
+# Check if running on Arch
+if [[ ! -f /etc/arch-release ]]; then
+  echo "Warning: This script is designed for Arch Linux"
+  if [[ "$ASSUME_YES" == false ]]; then
+    read -p "Continue anyway? [y/N] " -n 1 -r
+    echo
+    [[ ! $REPLY =~ ^[Yy]$ ]] && exit 1
+  else
+    echo "Continuing anyway (--yes mode)"
+  fi
+fi
+
+# 1. Install system dependencies
+echo "Step 1/3: Installing system dependencies (ffmpeg)..."
+if ! command -v ffmpeg &> /dev/null; then
+  sudo pacman -S --needed ffmpeg
+  echo "✓ ffmpeg installed"
+else
+  echo "✓ ffmpeg already installed"
+fi
+
+# 2. Check for AUR package first (optional but cleaner)
+echo
+echo "Step 2/3: Checking for AUR package..."
+AUR_INSTALLED=false
+
+if command -v yay &> /dev/null; then
+  echo "Found yay. Checking AUR for python-openai-whisper..."
+  if yay -Ss python-openai-whisper | grep -q 'python-openai-whisper'; then
+    INSTALL_AUR=false
+    if [[ "$ASSUME_YES" == true ]]; then
+      echo "Installing from AUR (--yes mode)"
+      INSTALL_AUR=true
+    else
+      read -p "Install from AUR via yay? [Y/n] " -n 1 -r
+      echo
+      if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then
+        INSTALL_AUR=true
+      fi
+    fi
+
+    if [[ "$INSTALL_AUR" == true ]]; then
+      yay -S --needed --noconfirm python-openai-whisper
+      echo "✓ Installed from AUR"
+      AUR_INSTALLED=true
+    fi
+  else
+    echo "Package python-openai-whisper not found in AUR"
+  fi
+else
+  echo "yay not found. Skipping AUR installation."
+  echo "(Install yay if you prefer AUR packages)"
+fi
+
+# 3. Install via pip if not from AUR
+if [[ "$AUR_INSTALLED" == false ]]; then
+  echo
+  echo "Step 3/3: Installing openai-whisper via pip..."
+  pip install --user -U openai-whisper
+  echo "✓ openai-whisper installed via pip"
+  echo
+  echo "Note: Ensure ~/.local/bin is in your PATH"
+  echo "Add to ~/.bashrc or ~/.zshrc: export PATH=\"\$HOME/.local/bin:\$PATH\""
+fi
+
+# Verify installation
+echo
+echo "=== Verifying Installation ==="
+if command -v whisper &> /dev/null; then
+  echo "✓ whisper command found at: $(which whisper)"
+  whisper --help | head -n 3
+  echo
+  echo "=== Installation Complete! ==="
+  echo
+  echo "Models available: tiny, base, small, medium, large"
+  echo "Recommended: small (good balance of speed/accuracy)"
+  echo "Model will download automatically on first use."
+  echo
+  echo "Test with: whisper your-audio.m4a --model small --language en"
+else
+  echo "✗ Installation failed - whisper command not found"
+  echo
+  echo "Troubleshooting:"
+  echo "1. Ensure ~/.local/bin is in your PATH"
+  echo "2. Run: source ~/.bashrc (or ~/.zshrc)"
+  echo "3. Try: python -m whisper --help"
+  exit 1
+fi
diff --git a/scripts/languagetool-flycheck b/scripts/languagetool-flycheck
new file mode 100755
index 00000000..ecbc900f
--- /dev/null
+++ b/scripts/languagetool-flycheck
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+"""
+Wrapper for LanguageTool to produce flycheck-compatible output.
+Output format: filename:line:column: message
+"""
+
+import json
+import sys
+import subprocess
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: languagetool-flycheck FILE", file=sys.stderr)
+        sys.exit(1)
+
+    filename = sys.argv[1]
+
+    # Run languagetool with JSON output
+    try:
+        result = subprocess.run(
+            ['languagetool', '-l', 'en-US', '--json', filename],
+            capture_output=True,
+            text=True,
+            timeout=30
+        )
+    except subprocess.TimeoutExpired:
+        print(f"{filename}:1:1: LanguageTool timeout", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"{filename}:1:1: LanguageTool error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    # Parse JSON output
+    try:
+        # Find the JSON in the output (skip warning lines)
+        json_output = None
+        for line in result.stdout.split('\n'):
+            if line.startswith('{'):
+                json_output = line
+                break
+
+        if not json_output:
+            sys.exit(0)  # No errors found
+
+        data = json.loads(json_output)
+
+        # Read file to calculate line numbers from character offsets
+        with open(filename, 'r', encoding='utf-8') as f:
+            content = f.read()
+
+        # Convert matches to flycheck format
+        for match in data.get('matches', []):
+            offset = match['offset']
+            length = match['length']
+            message = match['message']
+            rule_id = match['rule']['id']
+
+            # Calculate line and column from offset
+            line = content[:offset].count('\n') + 1
+            line_start = content.rfind('\n', 0, offset) + 1
+            column = offset - line_start + 1
+
+            # Get first suggestion if available
+            suggestions = match.get('replacements', [])
+            if suggestions:
+                suggestion = suggestions[0]['value']
+                message = f"{rule_id}: {message} Suggestion: {suggestion}"
+            else:
+                message = f"{rule_id}: {message}"
+
+            # Output in flycheck format
+            print(f"{filename}:{line}:{column}: {message}")
+
+    except json.JSONDecodeError as e:
+        print(f"{filename}:1:1: Failed to parse LanguageTool JSON: {e}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"{filename}:1:1: Error processing LanguageTool output: {e}", file=sys.stderr)
+        sys.exit(1)
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/local-whisper b/scripts/local-whisper
new file mode 100755
index 00000000..b08651c9
--- /dev/null
+++ b/scripts/local-whisper
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+# local-whisper - Transcribe audio files using locally installed Whisper
+# Usage: local-whisper <audio-file> [model] [language]
+#
+# Models: tiny, base, small, medium, large (default: small)
+# Language: en, es, fr, etc. (default: en)
+
+set -euo pipefail
+
+# Parse arguments
+AUDIO="${1:-}"
+MODEL="${2:-small}"
+LANG="${3:-en}"
+
+# Validate arguments
+if [[ -z "$AUDIO" ]]; then
+  echo "Usage: local-whisper <audio-file> [model] [language]" >&2
+  echo "Example: local-whisper meeting.m4a small en" >&2
+  exit 1
+fi
+
+if [[ ! -f "$AUDIO" ]]; then
+  echo "Error: Audio file not found: $AUDIO" >&2
+  exit 1
+fi
+
+# Check whisper is installed
+if ! command -v whisper &> /dev/null; then
+  echo "Error: whisper command not found" >&2
+  echo "Install with: ~/.emacs.d/scripts/install-whisper.sh" >&2
+  exit 1
+fi
+
+# Get absolute path to audio file
+AUDIO_ABS="$(realpath "$AUDIO")"
+AUDIO_DIR="$(dirname "$AUDIO_ABS")"
+AUDIO_BASE="$(basename "$AUDIO_ABS")"
+AUDIO_NAME="${AUDIO_BASE%.*}"
+
+# Run whisper
+# Note: whisper creates ${AUDIO_NAME}.txt automatically in the output directory
+whisper "$AUDIO_ABS" \
+  --model "$MODEL" \
+  --language "$LANG" \
+  --task transcribe \
+  --output_format txt \
+  --output_dir "$AUDIO_DIR" \
+  --verbose False 2>&1
+
+# Output file that whisper creates
+OUTPUT_FILE="$AUDIO_DIR/$AUDIO_NAME.txt"
+
+# Return transcript to stdout
+if [[ -f "$OUTPUT_FILE" ]]; then
+  cat "$OUTPUT_FILE"
+  exit 0
+else
+  echo "Error: Whisper did not create expected output file: $OUTPUT_FILE" >&2
+  exit 1
+fi
diff --git a/scripts/oai-transcribe b/scripts/oai-transcribe
new file mode 100755
index 00000000..f64a8122
--- /dev/null
+++ b/scripts/oai-transcribe
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# oai-transcribe - Transcribe audio files using OpenAI Whisper API
+# Usage: oai-transcribe <audio-file> [language]
+#
+# Requires: OPENAI_API_KEY environment variable
+# Language: en, es, fr, etc. (default: en)
+
+set -euo pipefail
+
+# Parse arguments
+AUDIO="${1:-}"
+LANG="${2:-en}"
+
+# Validate arguments
+if [[ -z "$AUDIO" ]]; then
+  echo "Usage: oai-transcribe <audio-file> [language]" >&2
+  echo "Example: oai-transcribe meeting.m4a en" >&2
+  exit 1
+fi
+
+if [[ ! -f "$AUDIO" ]]; then
+  echo "Error: Audio file not found: $AUDIO" >&2
+  exit 1
+fi
+
+# Check API key is set
+if [[ -z "${OPENAI_API_KEY:-}" ]]; then
+  echo "Error: OPENAI_API_KEY environment variable not set" >&2
+  echo "Set with: export OPENAI_API_KEY='sk-...'" >&2
+  exit 1
+fi
+
+# Check curl is available
+if ! command -v curl &> /dev/null; then
+  echo "Error: curl command not found" >&2
+  exit 1
+fi
+
+# Call OpenAI API
+curl -s -X POST "https://api.openai.com/v1/audio/transcriptions" \
+  -H "Authorization: Bearer $OPENAI_API_KEY" \
+  -F "model=whisper-1" \
+  -F "response_format=text" \
+  -F "language=${LANG}" \
+  -F "file=@${AUDIO}"
diff --git a/scripts/uninstall-whisper.sh b/scripts/uninstall-whisper.sh
new file mode 100755
index 00000000..e46c6ebc
--- /dev/null
+++ b/scripts/uninstall-whisper.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+# Uninstall OpenAI Whisper
+
+set -euo pipefail
+
+echo "=== Whisper Uninstallation ==="
+echo
+
+REMOVED=false
+
+# Check if installed via AUR
+if command -v yay &> /dev/null; then
+  if yay -Qi python-openai-whisper &> /dev/null 2>&1; then
+    echo "Detected AUR installation (python-openai-whisper)"
+    read -p "Remove via yay? [Y/n] " -n 1 -r
+    echo
+    if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then
+      yay -R python-openai-whisper
+      echo "✓ Removed via AUR"
+      REMOVED=true
+    fi
+  fi
+fi
+
+# Check if installed via pip
+if pip list 2>/dev/null | grep -q openai-whisper; then
+  echo "Detected pip installation (openai-whisper)"
+  read -p "Remove via pip? [Y/n] " -n 1 -r
+  echo
+  if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then
+    pip uninstall -y openai-whisper
+    echo "✓ Removed via pip"
+    REMOVED=true
+  fi
+fi
+
+if [[ "$REMOVED" == false ]]; then
+  echo "No whisper installation found (checked AUR and pip)"
+fi
+
+# Ask about ffmpeg
+echo
+read -p "Remove ffmpeg? (may be used by other apps) [y/N] " -n 1 -r
+echo
+if [[ $REPLY =~ ^[Yy]$ ]]; then
+  sudo pacman -R ffmpeg
+  echo "✓ Removed ffmpeg"
+fi
+
+# Ask about model cache
+CACHE_DIR="$HOME/.cache/whisper"
+if [[ -d "$CACHE_DIR" ]]; then
+  echo
+  echo "Whisper models are cached in: $CACHE_DIR"
+  du -sh "$CACHE_DIR" 2>/dev/null || echo "Size: unknown"
+  read -p "Delete cached models? [y/N] " -n 1 -r
+  echo
+  if [[ $REPLY =~ ^[Yy]$ ]]; then
+    rm -rf "$CACHE_DIR"
+    echo "✓ Deleted model cache"
+  fi
+fi
+
+echo
+echo "=== Uninstallation Complete ==="