| | """ |
| | Audio Transcription Tool - Whisper speech-to-text |
| | Author: @mangobee |
| | Date: 2026-01-13 |
| | |
| | Provides audio transcription using OpenAI Whisper: |
| | - Supports MP3, WAV, M4A, and other audio formats |
| | - ZeroGPU acceleration via @spaces.GPU decorator |
| | - Model caching for efficient repeated use |
| | - Unified tool for Phase 1 (YouTube fallback) and Phase 2 (MP3 files) |
| | |
| | Requirements: |
| | - openai-whisper: pip install openai-whisper |
| | - ZeroGPU: @spaces.GPU decorator required for HF Spaces |
| | """ |
| |
|
| | import logging |
| | import os |
| | import tempfile |
| | from typing import Dict, Any |
| | from pathlib import Path |
| |
|
| | |
| | |
| | |
| | WHISPER_MODEL = "small" |
| | WHISPER_LANGUAGE = "en" |
| | AUDIO_FORMATS = [".mp3", ".wav", ".m4a", ".ogg", ".flac", ".aac"] |
| |
|
| | |
| | |
| | |
| | logger = logging.getLogger(__name__) |
| |
|
| | |
| | |
| | |
| | _MODEL = None |
| |
|
| |
|
| | |
| | |
| | |
| | try: |
| | from spaces import GPU |
| | ZERO_GPU_AVAILABLE = True |
| | except ImportError: |
| | |
| | def GPU(func): |
| | return func |
| | ZERO_GPU_AVAILABLE = False |
| | logger.info("ZeroGPU not available, running in CPU mode") |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | @GPU |
| | def transcribe_audio(file_path: str) -> Dict[str, Any]: |
| | """ |
| | Transcribe audio file using Whisper (ZeroGPU accelerated). |
| | |
| | Args: |
| | file_path: Path to audio file (MP3, WAV, M4A, etc.) |
| | |
| | Returns: |
| | Dict with structure: { |
| | "text": str, # Transcribed text |
| | "file_path": str, # Original file path |
| | "success": bool, # True if transcription succeeded |
| | "error": str or None # Error message if failed |
| | } |
| | |
| | Raises: |
| | FileNotFoundError: If audio file doesn't exist |
| | ValueError: If file format is not supported |
| | |
| | Examples: |
| | >>> transcribe_audio("audio.mp3") |
| | {"text": "Hello world", "file_path": "audio.mp3", "success": True, "error": None} |
| | """ |
| | global _MODEL |
| |
|
| | |
| | if not file_path: |
| | logger.error("Empty file path provided") |
| | return { |
| | "text": "", |
| | "file_path": "", |
| | "success": False, |
| | "error": "Empty file path provided" |
| | } |
| |
|
| | file_path = Path(file_path) |
| |
|
| | if not file_path.exists(): |
| | logger.error(f"File not found: {file_path}") |
| | return { |
| | "text": "", |
| | "file_path": str(file_path), |
| | "success": False, |
| | "error": f"File not found: {file_path}" |
| | } |
| |
|
| | |
| | if file_path.suffix.lower() not in AUDIO_FORMATS: |
| | logger.error(f"Unsupported audio format: {file_path.suffix}") |
| | return { |
| | "text": "", |
| | "file_path": str(file_path), |
| | "success": False, |
| | "error": f"Unsupported audio format: {file_path.suffix}. Supported: {AUDIO_FORMATS}" |
| | } |
| |
|
| | logger.info(f"Transcribing audio: {file_path}") |
| |
|
| | try: |
| | |
| | import whisper |
| |
|
| | |
| | if _MODEL is None: |
| | logger.info(f"Loading Whisper model: {WHISPER_MODEL}") |
| | device = "cuda" if ZERO_GPU_AVAILABLE else "cpu" |
| | _MODEL = whisper.load_model(WHISPER_MODEL, device=device) |
| | logger.info(f"Whisper model loaded on {device}") |
| |
|
| | |
| | result = _MODEL.transcribe( |
| | str(file_path), |
| | language=WHISPER_LANGUAGE, |
| | fp16=False |
| | ) |
| |
|
| | text = result["text"].strip() |
| | logger.info(f"Transcription successful: {len(text)} characters") |
| |
|
| | return { |
| | "text": text, |
| | "file_path": str(file_path), |
| | "success": True, |
| | "error": None |
| | } |
| |
|
| | except FileNotFoundError: |
| | logger.error(f"Audio file not found: {file_path}") |
| | return { |
| | "text": "", |
| | "file_path": str(file_path), |
| | "success": False, |
| | "error": f"Audio file not found: {file_path}" |
| | } |
| | except Exception as e: |
| | logger.error(f"Transcription failed: {e}") |
| | return { |
| | "text": "", |
| | "file_path": str(file_path), |
| | "success": False, |
| | "error": f"Transcription failed: {str(e)}" |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def cleanup(): |
| | """Reset global model cache (useful for testing).""" |
| | global _MODEL |
| | _MODEL = None |
| | logger.info("Whisper model cache cleared") |
| |
|