Spaces:

NitinBot001
/

Whisper-api

Running

App Files Files Community

NitinBot001 commited on Jun 27, 2025

Commit

6c3cd1d

verified ·

1 Parent(s): 489f2f3

Update main.py

Browse files

Files changed (1) hide show

main.py +183 -23

main.py CHANGED Viewed

@@ -1,32 +1,192 @@
-from fastapi import FastAPI, UploadFile, File
-from fastapi.responses import JSONResponse
-import whisperx
-import torch
 import tempfile
-import shutil
 import os
-app = FastAPI()
-# Load model globally to avoid reloading for every request
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = whisperx.load_model("medium", device)
-@app.post("/transcribe")
-async def transcribe_audio(file: UploadFile = File(...)):
-    try:
-        # Save uploaded audio to temp file
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-            shutil.copyfileobj(file.file, tmp)
-            temp_audio_path = tmp.name
-        # Load and process audio
-        audio = whisperx.load_audio(temp_audio_path)
-        result = model.transcribe(audio, batch_size=16, return_word_timestamps=True)
-        # Clean up temp file
-        os.remove(temp_audio_path)
-        return JSONResponse(content=result)
     except Exception as e:
-        return JSONResponse(status_code=500, content={"error": str(e)})

+from flask import Flask, request, jsonify
+import whisper
 import tempfile
 import os
+from werkzeug.utils import secure_filename
+import logging
+from datetime import datetime
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = Flask(__name__)
+# Configuration
+app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024  # 100MB max file size
+ALLOWED_EXTENSIONS = {'wav', 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'webm', 'flac'}
+# Load Whisper model (you can change the model size: tiny, base, small, medium, large)
+MODEL_SIZE = "base"  # Change this to your preferred model size
+logger.info(f"Loading Whisper model: {MODEL_SIZE}")
+model = whisper.load_model(MODEL_SIZE)
+logger.info("Whisper model loaded successfully")
+def allowed_file(filename):
+    """Check if the file extension is allowed"""
+    return '.' in filename and \
+           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+def format_timestamp(seconds):
+    """Convert seconds to HH:MM:SS.mmm format"""
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    secs = seconds % 60
+    return f"{hours:02d}:{minutes:02d}:{secs:06.3f}"
+@app.route('/', methods=['GET'])
+def health_check():
+    """Health check endpoint"""
+    return jsonify({
+        "status": "healthy",
+        "message": "Whisper Transcription API is running",
+        "model": MODEL_SIZE,
+        "timestamp": datetime.now().isoformat()
+    })
+@app.route('/transcribe', methods=['POST'])
+def transcribe_audio():
+    """
+    Transcribe audio file and return word-level timestamps
+    Expected form data:
+    - audio_file: The audio file to transcribe
+    - language (optional): Language code (e.g., 'en', 'es', 'fr')
+    - task (optional): 'transcribe' or 'translate' (default: transcribe)
+    """
+    try:
+        # Check if audio file is present
+        if 'audio_file' not in request.files:
+            return jsonify({'error': 'No audio file provided'}), 400
+        file = request.files['audio_file']
+        if file.filename == '':
+            return jsonify({'error': 'No file selected'}), 400
+        if not allowed_file(file.filename):
+            return jsonify({
+                'error': f'File type not allowed. Supported formats: {", ".join(ALLOWED_EXTENSIONS)}'
+            }), 400
+        # Get optional parameters
+        language = request.form.get('language', None)
+        task = request.form.get('task', 'transcribe')
+        if task not in ['transcribe', 'translate']:
+            return jsonify({'error': 'Task must be either "transcribe" or "translate"'}), 400
+        # Save uploaded file temporarily
+        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.filename.rsplit('.', 1)[1].lower()}") as tmp_file:
+            file.save(tmp_file.name)
+            temp_path = tmp_file.name
+        logger.info(f"Processing file: {file.filename}")
+        try:
+            # Transcribe with word-level timestamps
+            result = model.transcribe(
+                temp_path,
+                language=language,
+                task=task,
+                word_timestamps=True,
+                verbose=False
+            )
+            # Extract word-level data
+            word_segments = []
+            for segment in result.get("segments", []):
+                if "words" in segment:
+                    for word_data in segment["words"]:
+                        word_segments.append({
+                            "word": word_data.get("word", "").strip(),
+                            "start": word_data.get("start", 0),
+                            "end": word_data.get("end", 0),
+                            "start_formatted": format_timestamp(word_data.get("start", 0)),
+                            "end_formatted": format_timestamp(word_data.get("end", 0)),
+                            "confidence": word_data.get("probability", 0)
+                        })
+            # Prepare response
+            response_data = {
+                "success": True,
+                "filename": secure_filename(file.filename),
+                "language": result.get("language", "unknown"),
+                "task": task,
+                "duration": result.get("segments", [{}])[-1].get("end", 0) if result.get("segments") else 0,
+                "text": result.get("text", ""),
+                "word_count": len(word_segments),
+                "segments": result.get("segments", []),
+                "words": word_segments,
+                "model_used": MODEL_SIZE,
+                "processing_time": None  # You can add timing if needed
+            }
+            logger.info(f"Successfully transcribed {len(word_segments)} words from {file.filename}")
+            return jsonify(response_data)
+        except Exception as e:
+            logger.error(f"Transcription error: {str(e)}")
+            return jsonify({'error': f'Transcription failed: {str(e)}'}), 500
+        finally:
+            # Clean up temporary file
+            if os.path.exists(temp_path):
+                os.unlink(temp_path)
     except Exception as e:
+        logger.error(f"API error: {str(e)}")
+        return jsonify({'error': f'Server error: {str(e)}'}), 500
+@app.route('/models', methods=['GET'])
+def available_models():
+    """Get information about available Whisper models"""
+    models_info = {
+        "current_model": MODEL_SIZE,
+        "available_models": {
+            "tiny": {"size": "~39 MB", "speed": "~32x", "accuracy": "lowest"},
+            "base": {"size": "~74 MB", "speed": "~16x", "accuracy": "low"},
+            "small": {"size": "~244 MB", "speed": "~6x", "accuracy": "medium"},
+            "medium": {"size": "~769 MB", "speed": "~2x", "accuracy": "high"},
+            "large": {"size": "~1550 MB", "speed": "~1x", "accuracy": "highest"}
+        },
+        "supported_languages": [
+            "en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl", "ca", "nl",
+            "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk", "el", "ms", "cs", "ro",
+            "da", "hu", "ta", "no", "th", "ur", "hr", "bg", "lt", "la", "mi", "ml", "cy",
+            "sk", "te", "fa", "lv", "bn", "sr", "az", "sl", "kn", "et", "mk", "br", "eu",
+            "is", "hy", "ne", "mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km",
+            "sn", "yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi", "lo",
+            "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my", "bo", "tl", "mg",
+            "as", "tt", "haw", "ln", "ha", "ba", "jw", "su"
+        ]
+    }
+    return jsonify(models_info)
+@app.errorhandler(413)
+def too_large(e):
+    return jsonify({'error': 'File too large. Maximum size is 100MB'}), 413
+@app.errorhandler(404)
+def not_found(e):
+    return jsonify({'error': 'Endpoint not found'}), 404
+@app.errorhandler(500)
+def internal_error(e):
+    return jsonify({'error': 'Internal server error'}), 500
+if __name__ == '__main__':
+    print(f"""
+    Whisper Transcription API Server
+    ================================
+    Model: {MODEL_SIZE}
+    Endpoints:
+    - GET  /           : Health check
+    - POST /transcribe : Transcribe audio file
+    - GET  /models     : Available models info
+    Supported formats: {', '.join(ALLOWED_EXTENSIONS)}
+    Max file size: 100MB
+    """)
+    app.run(debug=True, host='0.0.0.0', port=5000)