Spaces:
Running
Running
| from flask_cors import CORS | |
| from flask import Flask, request, jsonify | |
| import tempfile | |
| import os | |
| from werkzeug.utils import secure_filename | |
| import logging | |
| from datetime import datetime | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| app = Flask(__name__) | |
| CORS(app) | |
| # Configuration | |
| app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB max file size | |
| ALLOWED_EXTENSIONS = {'wav', 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'webm', 'flac'} | |
| # Initialize model variable | |
| model = None | |
| MODEL_SIZE = "medium" # Use 'tiny' for faster loading on Hugging Face Spaces, change to 'base', 'small', 'medium', or 'large' as needed | |
| # Set cache directory for Hugging Face Spaces | |
| def setup_cache_directory(): | |
| """Setup cache directory for Hugging Face Spaces""" | |
| # Create a writable cache directory in the current working directory | |
| cache_dir = os.path.join(os.getcwd(), ".whisper_cache") | |
| os.makedirs(cache_dir, exist_ok=True) | |
| # Set environment variables for Whisper cache | |
| os.environ['XDG_CACHE_HOME'] = cache_dir | |
| os.environ['WHISPER_CACHE'] = cache_dir | |
| logger.info(f"Cache directory set to: {cache_dir}") | |
| return cache_dir | |
| def load_whisper_model(): | |
| """Load Whisper model with proper error handling""" | |
| global model | |
| try: | |
| # Setup cache directory first | |
| cache_dir = setup_cache_directory() | |
| # Try multiple import strategies for openai-whisper | |
| whisper_module = None | |
| # Strategy 1: Direct import (most common) | |
| try: | |
| import whisper as whisper_module | |
| except ImportError: | |
| pass | |
| # Strategy 2: Try importing as openai_whisper | |
| if whisper_module is None: | |
| try: | |
| import openai_whisper as whisper_module | |
| except ImportError: | |
| pass | |
| # Strategy 3: Try importing with explicit path | |
| if whisper_module is None: | |
| try: | |
| import sys | |
| import importlib.util | |
| # This is a fallback - usually not needed | |
| import whisper as whisper_module | |
| except ImportError: | |
| pass | |
| if whisper_module is None: | |
| logger.error("OpenAI Whisper not installed. Install with: pip install openai-whisper") | |
| return False | |
| # Check if the module has the load_model function | |
| if not hasattr(whisper_module, 'load_model'): | |
| logger.error("Whisper module found but missing 'load_model' function") | |
| logger.error("This suggests you have the wrong 'whisper' package installed") | |
| logger.error("Solution:") | |
| logger.error("1. pip uninstall whisper") | |
| logger.error("2. pip uninstall openai-whisper (if exists)") | |
| logger.error("3. pip install openai-whisper") | |
| logger.error("4. pip install torch torchaudio") | |
| return False | |
| logger.info(f"Loading Whisper model: {MODEL_SIZE}") | |
| logger.info(f"Using cache directory: {cache_dir}") | |
| # Load model with explicit download root | |
| try: | |
| model = whisper_module.load_model(MODEL_SIZE, download_root=cache_dir) | |
| except TypeError: | |
| # Fallback if download_root parameter is not supported | |
| model = whisper_module.load_model(MODEL_SIZE) | |
| logger.info("Whisper model loaded successfully") | |
| return True | |
| except ImportError as e: | |
| logger.error(f"Import error: {e}") | |
| logger.error("OpenAI Whisper not installed. Install with: pip install openai-whisper torch torchaudio") | |
| return False | |
| except AttributeError as e: | |
| logger.error(f"Whisper import error: {e}") | |
| logger.error("Make sure you have the correct whisper package installed:") | |
| logger.error("Solution:") | |
| logger.error("1. pip uninstall whisper") | |
| logger.error("2. pip install openai-whisper torch torchaudio") | |
| return False | |
| except PermissionError as e: | |
| logger.error(f"Permission error: {e}") | |
| logger.error("Cannot write to cache directory. This might be a Hugging Face Spaces limitation.") | |
| logger.error("Trying alternative cache locations...") | |
| # Try alternative cache locations | |
| alternative_dirs = [ | |
| "/tmp/.whisper_cache", | |
| os.path.expanduser("~/.whisper_cache"), | |
| "./whisper_models" | |
| ] | |
| for alt_dir in alternative_dirs: | |
| try: | |
| os.makedirs(alt_dir, exist_ok=True) | |
| os.environ['XDG_CACHE_HOME'] = alt_dir | |
| os.environ['WHISPER_CACHE'] = alt_dir | |
| logger.info(f"Trying alternative cache: {alt_dir}") | |
| import whisper | |
| model = whisper.load_model(MODEL_SIZE, download_root=alt_dir) | |
| logger.info(f"Successfully loaded model with cache: {alt_dir}") | |
| return True | |
| except Exception as alt_e: | |
| logger.warning(f"Alternative cache {alt_dir} failed: {alt_e}") | |
| continue | |
| logger.error("All cache directory attempts failed") | |
| return False | |
| except Exception as e: | |
| logger.error(f"Error loading Whisper model: {e}") | |
| logger.error("This could be due to:") | |
| logger.error("- Insufficient memory") | |
| logger.error("- Missing PyTorch/CUDA dependencies") | |
| logger.error("- Network issues downloading the model") | |
| logger.error("- Hugging Face Spaces limitations") | |
| return False | |
| # Try to load the model at startup | |
| model_loaded = load_whisper_model() | |
| def allowed_file(filename): | |
| """Check if the file extension is allowed""" | |
| return '.' in filename and \ | |
| filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| def format_timestamp(seconds): | |
| """Convert seconds to HH:MM:SS.mmm format""" | |
| hours = int(seconds // 3600) | |
| minutes = int((seconds % 3600) // 60) | |
| secs = seconds % 60 | |
| return f"{hours:02d}:{minutes:02d}:{secs:06.3f}" | |
| def health_check(): | |
| """Health check endpoint""" | |
| return jsonify({ | |
| "status": "healthy" if model_loaded else "unhealthy", | |
| "message": "Whisper Transcription API is running" if model_loaded else "Whisper model failed to load", | |
| "model": MODEL_SIZE if model_loaded else "none", | |
| "model_loaded": model_loaded, | |
| "timestamp": datetime.now().isoformat() | |
| }) | |
| def transcribe_audio(): | |
| """ | |
| Transcribe audio file and return word-level timestamps | |
| Expected form data: | |
| - audio_file: The audio file to transcribe | |
| - language (optional): Language code (e.g., 'en', 'es', 'fr') | |
| - task (optional): 'transcribe' or 'translate' (default: transcribe) | |
| """ | |
| try: | |
| # Check if model is loaded | |
| if not model_loaded or model is None: | |
| return jsonify({ | |
| 'error': 'Whisper model not loaded. Please check server logs and ensure openai-whisper is installed correctly.' | |
| }), 503 | |
| # Check if audio file is present | |
| if 'audio_file' not in request.files: | |
| return jsonify({'error': 'No audio file provided'}), 400 | |
| file = request.files['audio_file'] | |
| if file.filename == '': | |
| return jsonify({'error': 'No file selected'}), 400 | |
| if not allowed_file(file.filename): | |
| return jsonify({ | |
| 'error': f'File type not allowed. Supported formats: {", ".join(ALLOWED_EXTENSIONS)}' | |
| }), 400 | |
| # Get optional parameters | |
| language = request.form.get('language', None) | |
| task = request.form.get('task', 'transcribe') | |
| if task not in ['transcribe', 'translate']: | |
| return jsonify({'error': 'Task must be either "transcribe" or "translate"'}), 400 | |
| # Save uploaded file temporarily | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.filename.rsplit('.', 1)[1].lower()}") as tmp_file: | |
| file.save(tmp_file.name) | |
| temp_path = tmp_file.name | |
| logger.info(f"Processing file: {file.filename}") | |
| try: | |
| # Transcribe with word-level timestamps | |
| result = model.transcribe( | |
| temp_path, | |
| language=language, | |
| task=task, | |
| word_timestamps=True, | |
| verbose=False | |
| ) | |
| # Extract word-level data | |
| word_segments = [] | |
| for segment in result.get("segments", []): | |
| if "words" in segment: | |
| for word_data in segment["words"]: | |
| word_segments.append({ | |
| "word": word_data.get("word", "").strip(), | |
| "start": word_data.get("start", 0), | |
| "end": word_data.get("end", 0), | |
| "start_formatted": format_timestamp(word_data.get("start", 0)), | |
| "end_formatted": format_timestamp(word_data.get("end", 0)), | |
| "confidence": word_data.get("probability", 0) | |
| }) | |
| # Prepare response | |
| response_data = { | |
| "success": True, | |
| "filename": secure_filename(file.filename), | |
| "language": result.get("language", "unknown"), | |
| "task": task, | |
| "duration": result.get("segments", [{}])[-1].get("end", 0) if result.get("segments") else 0, | |
| "text": result.get("text", ""), | |
| "word_count": len(word_segments), | |
| "segments": result.get("segments", []), | |
| "words": word_segments, | |
| "model_used": MODEL_SIZE, | |
| "processing_time": None # You can add timing if needed | |
| } | |
| logger.info(f"Successfully transcribed {len(word_segments)} words from {file.filename}") | |
| return jsonify(response_data) | |
| except Exception as e: | |
| logger.error(f"Transcription error: {str(e)}") | |
| return jsonify({'error': f'Transcription failed: {str(e)}'}), 500 | |
| finally: | |
| # Clean up temporary file | |
| if os.path.exists(temp_path): | |
| os.unlink(temp_path) | |
| except Exception as e: | |
| logger.error(f"API error: {str(e)}") | |
| return jsonify({'error': f'Server error: {str(e)}'}), 500 | |
| def available_models(): | |
| """Get information about available Whisper models""" | |
| models_info = { | |
| "current_model": MODEL_SIZE if model_loaded else "none", | |
| "model_loaded": model_loaded, | |
| "available_models": { | |
| "tiny": {"size": "~39 MB", "speed": "~32x", "accuracy": "lowest"}, | |
| "base": {"size": "~74 MB", "speed": "~16x", "accuracy": "low"}, | |
| "small": {"size": "~244 MB", "speed": "~6x", "accuracy": "medium"}, | |
| "medium": {"size": "~769 MB", "speed": "~2x", "accuracy": "high"}, | |
| "large": {"size": "~1550 MB", "speed": "~1x", "accuracy": "highest"} | |
| }, | |
| "supported_languages": [ | |
| "en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl", "ca", "nl", | |
| "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk", "el", "ms", "cs", "ro", | |
| "da", "hu", "ta", "no", "th", "ur", "hr", "bg", "lt", "la", "mi", "ml", "cy", | |
| "sk", "te", "fa", "lv", "bn", "sr", "az", "sl", "kn", "et", "mk", "br", "eu", | |
| "is", "hy", "ne", "mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km", | |
| "sn", "yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi", "lo", | |
| "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my", "bo", "tl", "mg", | |
| "as", "tt", "haw", "ln", "ha", "ba", "jw", "su" | |
| ], | |
| "installation_help": { | |
| "error": "Whisper model not loaded" if not model_loaded else None, | |
| "install_command": "pip install openai-whisper torch torchaudio", | |
| "uninstall_conflicts": "pip uninstall whisper (if you have conflicting whisper package)" | |
| } | |
| } | |
| return jsonify(models_info) | |
| def too_large(e): | |
| return jsonify({'error': 'File too large. Maximum size is 100MB'}), 413 | |
| def not_found(e): | |
| return jsonify({'error': 'Endpoint not found'}), 404 | |
| def internal_error(e): | |
| return jsonify({'error': 'Internal server error'}), 500 | |
| if __name__ == '__main__': | |
| if not model_loaded: | |
| print(f""" | |
| ⚠️ WHISPER MODEL LOADING FAILED ⚠️ | |
| =================================== | |
| The Whisper model could not be loaded. Please check: | |
| 1. Install the correct package: | |
| pip install openai-whisper torch torchaudio | |
| 2. If you have conflicts, uninstall the wrong whisper package: | |
| pip uninstall whisper | |
| pip install openai-whisper | |
| 3. Make sure you have sufficient disk space for the model | |
| The server will start but transcription will not work until the model is loaded. | |
| """) | |
| else: | |
| print(f""" | |
| Whisper Transcription API Server | |
| ================================ | |
| Model: {MODEL_SIZE} ✅ | |
| Status: Ready | |
| Endpoints: | |
| - GET / : Health check | |
| - POST /transcribe : Transcribe audio file | |
| - GET /models : Available models info | |
| Supported formats: {', '.join(ALLOWED_EXTENSIONS)} | |
| Max file size: 100MB | |
| """) | |
| app.run(debug=True, host='0.0.0.0', port=7860) |