Spaces:

NitinBot001
/

Whisper-api

Running

App Files Files Community

NitinBot001 commited on Jun 27, 2025

Commit

e1ed6ea

verified ·

1 Parent(s): ced86e4

Update main.py

Browse files

Files changed (1) hide show

main.py +42 -175

main.py CHANGED Viewed

@@ -2,193 +2,60 @@ from flask import Flask, request, jsonify
 import whisper
 import tempfile
 import os
 from flask_cors import CORS
-from werkzeug.utils import secure_filename
-import logging
-from datetime import datetime
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
 app = Flask(__name__)
 CORS(app)
-# Configuration
-app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024  # 100MB max file size
-ALLOWED_EXTENSIONS = {'wav', 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'webm', 'flac'}
-# Load Whisper model (you can change the model size: tiny, base, small, medium, large)
-MODEL_SIZE = "base"  # Change this to your preferred model size
-logger.info(f"Loading Whisper model: {MODEL_SIZE}")
-model = whisper.load_model(MODEL_SIZE)
-logger.info("Whisper model loaded successfully")
-def allowed_file(filename):
-    """Check if the file extension is allowed"""
-    return '.' in filename and \
-           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
-def format_timestamp(seconds):
-    """Convert seconds to HH:MM:SS.mmm format"""
-    hours = int(seconds // 3600)
-    minutes = int((seconds % 3600) // 60)
-    secs = seconds % 60
-    return f"{hours:02d}:{minutes:02d}:{secs:06.3f}"
-@app.route('/', methods=['GET'])
-def health_check():
-    """Health check endpoint"""
-    return jsonify({
-        "status": "healthy",
-        "message": "Whisper Transcription API is running",
-        "model": MODEL_SIZE,
-        "timestamp": datetime.now().isoformat()
-    })
 @app.route('/transcribe', methods=['POST'])
 def transcribe_audio():
-    """
-    Transcribe audio file and return word-level timestamps
-    Expected form data:
-    - audio_file: The audio file to transcribe
-    - language (optional): Language code (e.g., 'en', 'es', 'fr')
-    - task (optional): 'transcribe' or 'translate' (default: transcribe)
-    """
     try:
-        # Check if audio file is present
-        if 'audio_file' not in request.files:
             return jsonify({'error': 'No audio file provided'}), 400
-        file = request.files['audio_file']
-        if file.filename == '':
-            return jsonify({'error': 'No file selected'}), 400
-        if not allowed_file(file.filename):
-            return jsonify({
-                'error': f'File type not allowed. Supported formats: {", ".join(ALLOWED_EXTENSIONS)}'
-            }), 400
-        # Get optional parameters
-        language = request.form.get('language', None)
-        task = request.form.get('task', 'transcribe')
-        if task not in ['transcribe', 'translate']:
-            return jsonify({'error': 'Task must be either "transcribe" or "translate"'}), 400
-        # Save uploaded file temporarily
-        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.filename.rsplit('.', 1)[1].lower()}") as tmp_file:
-            file.save(tmp_file.name)
-            temp_path = tmp_file.name
-        logger.info(f"Processing file: {file.filename}")
-        try:
-            # Transcribe with word-level timestamps
-            result = model.transcribe(
-                temp_path,
-                language=language,
-                task=task,
-                word_timestamps=True,
-                verbose=False
-            )
-            # Extract word-level data
-            word_segments = []
-            for segment in result.get("segments", []):
-                if "words" in segment:
-                    for word_data in segment["words"]:
-                        word_segments.append({
-                            "word": word_data.get("word", "").strip(),
-                            "start": word_data.get("start", 0),
-                            "end": word_data.get("end", 0),
-                            "start_formatted": format_timestamp(word_data.get("start", 0)),
-                            "end_formatted": format_timestamp(word_data.get("end", 0)),
-                            "confidence": word_data.get("probability", 0)
-                        })
-            # Prepare response
-            response_data = {
-                "success": True,
-                "filename": secure_filename(file.filename),
-                "language": result.get("language", "unknown"),
-                "task": task,
-                "duration": result.get("segments", [{}])[-1].get("end", 0) if result.get("segments") else 0,
-                "text": result.get("text", ""),
-                "word_count": len(word_segments),
-                "segments": result.get("segments", []),
-                "words": word_segments,
-                "model_used": MODEL_SIZE,
-                "processing_time": None  # You can add timing if needed
-            }
-            logger.info(f"Successfully transcribed {len(word_segments)} words from {file.filename}")
-            return jsonify(response_data)
-        except Exception as e:
-            logger.error(f"Transcription error: {str(e)}")
-            return jsonify({'error': f'Transcription failed: {str(e)}'}), 500
-        finally:
-            # Clean up temporary file
-            if os.path.exists(temp_path):
-                os.unlink(temp_path)
-    except Exception as e:
-        logger.error(f"API error: {str(e)}")
-        return jsonify({'error': f'Server error: {str(e)}'}), 500
-@app.route('/models', methods=['GET'])
-def available_models():
-    """Get information about available Whisper models"""
-    models_info = {
-        "current_model": MODEL_SIZE,
-        "available_models": {
-            "tiny": {"size": "~39 MB", "speed": "~32x", "accuracy": "lowest"},
-            "base": {"size": "~74 MB", "speed": "~16x", "accuracy": "low"},
-            "small": {"size": "~244 MB", "speed": "~6x", "accuracy": "medium"},
-            "medium": {"size": "~769 MB", "speed": "~2x", "accuracy": "high"},
-            "large": {"size": "~1550 MB", "speed": "~1x", "accuracy": "highest"}
-        },
-        "supported_languages": [
-            "en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl", "ca", "nl",
-            "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk", "el", "ms", "cs", "ro",
-            "da", "hu", "ta", "no", "th", "ur", "hr", "bg", "lt", "la", "mi", "ml", "cy",
-            "sk", "te", "fa", "lv", "bn", "sr", "az", "sl", "kn", "et", "mk", "br", "eu",
-            "is", "hy", "ne", "mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km",
-            "sn", "yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi", "lo",
-            "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my", "bo", "tl", "mg",
-            "as", "tt", "haw", "ln", "ha", "ba", "jw", "su"
-        ]
-    }
-    return jsonify(models_info)
-@app.errorhandler(413)
-def too_large(e):
-    return jsonify({'error': 'File too large. Maximum size is 100MB'}), 413
-@app.errorhandler(404)
-def not_found(e):
-    return jsonify({'error': 'Endpoint not found'}), 404
-@app.errorhandler(500)
-def internal_error(e):
-    return jsonify({'error': 'Internal server error'}), 500
 if __name__ == '__main__':
-    print(f"""
-    Whisper Transcription API Server
-    ================================
-    Model: {MODEL_SIZE}
-    Endpoints:
-    - GET  /           : Health check
-    - POST /transcribe : Transcribe audio file
-    - GET  /models     : Available models info
-    Supported formats: {', '.join(ALLOWED_EXTENSIONS)}
-    Max file size: 100MB
-    """)
     app.run(debug=True, host='0.0.0.0', port=7860)

 import whisper
 import tempfile
 import os
+from pathlib import Path
+import torch
 from flask_cors import CORS
 app = Flask(__name__)
 CORS(app)
+# Load Whisper model
+model = whisper.load_model("base")
 @app.route('/transcribe', methods=['POST'])
 def transcribe_audio():
     try:
+        # Check if audio file is in the request
+        if 'audio' not in request.files:
             return jsonify({'error': 'No audio file provided'}), 400
+        audio_file = request.files['audio']
+        # Save audio file temporarily
+        temp_dir = tempfile.mkdtemp()
+        temp_path = os.path.join(temp_dir, audio_file.filename)
+        audio_file.save(temp_path)
+        # Transcribe audio with word-level timestamps
+        result = model.transcribe(
+            temp_path,
+            word_timestamps=True,
+            language="en"  # Adjust based on your needs
+        )
+        # Format word-level transcription with timestamps
+        word_level_transcription = []
+        for segment in result['segments']:
+            for word in segment['words']:
+                word_level_transcription.append({
+                    'word': word['word'],
+                    'start': word['start'],
+                    'end': word['end'],
+                    'confidence': word['probability']
+                })
+        # Clean up temporary file
+        os.remove(temp_path)
+        os.rmdir(temp_dir)
+        # Return transcription results
+        return jsonify({
+            'transcription': word_level_transcription,
+            'full_text': result['text']
+        }), 200
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
 if __name__ == '__main__':
     app.run(debug=True, host='0.0.0.0', port=7860)