Spaces:
Sleeping
Sleeping
| from flask import Flask, request, jsonify, Response # Add Response here | |
| from faster_whisper import WhisperModel | |
| import torch | |
| import io | |
| import time | |
| import datetime | |
| from threading import Semaphore | |
| import os | |
| from werkzeug.utils import secure_filename | |
| import tempfile | |
| app = Flask(__name__) | |
| # Configuration | |
| MAX_CONCURRENT_REQUESTS = 2 # Adjust based on your server capacity | |
| MAX_AUDIO_DURATION = 60 * 30 # 30 minutes maximum audio duration (adjust as needed) | |
| TEMPORARY_FOLDER = tempfile.gettempdir() | |
| ALLOWED_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac'} | |
| # Device check for faster-whisper | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| compute_type = "float16" if device == "cuda" else "int8" | |
| print(f"Using device: {device} with compute_type: {compute_type}") | |
| # Faster Whisper setup with optimized parameters for long audio | |
| beamsize = 2 # Slightly larger beam size can help with long-form accuracy | |
| wmodel = WhisperModel( | |
| "guillaumekln/faster-whisper-small", | |
| device=device, | |
| compute_type=compute_type, | |
| download_root="./model_cache" # Cache model to avoid re-downloading | |
| ) | |
| # Concurrency control | |
| request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS) | |
| active_requests = 0 | |
| def allowed_file(filename): | |
| return '.' in filename and \ | |
| filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| def cleanup_temp_files(file_path): | |
| """Ensure temporary files are deleted after processing""" | |
| try: | |
| if os.path.exists(file_path): | |
| os.remove(file_path) | |
| except Exception as e: | |
| print(f"Error cleaning up temp file {file_path}: {str(e)}") | |
| def health_check(): | |
| """Endpoint to check if API is running""" | |
| return jsonify({ | |
| 'status': 'API is running', | |
| 'timestamp': datetime.datetime.now().isoformat(), | |
| 'device': device, | |
| 'compute_type': compute_type, | |
| 'active_requests': active_requests, | |
| 'max_duration_supported': MAX_AUDIO_DURATION | |
| }) | |
| def server_busy(): | |
| """Endpoint to check if server is busy""" | |
| is_busy = active_requests >= MAX_CONCURRENT_REQUESTS | |
| return jsonify({ | |
| 'is_busy': is_busy, | |
| 'active_requests': active_requests, | |
| 'max_capacity': MAX_CONCURRENT_REQUESTS | |
| }) | |
| def whisper_transcribe(): | |
| global active_requests | |
| if not request_semaphore.acquire(blocking=False): | |
| return jsonify({'error': 'Server busy'}), 503 | |
| active_requests += 1 | |
| start_time = time.time() | |
| temp_file_path = None | |
| try: | |
| if 'audio' not in request.files: | |
| return jsonify({'error': 'No file provided'}), 400 | |
| audio_file = request.files['audio'] | |
| if not (audio_file and allowed_file(audio_file.filename)): | |
| return jsonify({'error': 'Invalid file format'}), 400 | |
| temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(audio_file.filename)) | |
| audio_file.save(temp_file_path) | |
| segments, _ = wmodel.transcribe( | |
| temp_file_path, | |
| beam_size=beamsize, | |
| vad_filter=True, | |
| without_timestamps=True, # Ensure timestamps are not included | |
| compression_ratio_threshold=2.4, | |
| word_timestamps=False | |
| ) | |
| full_text = " ".join(segment.text for segment in segments) | |
| return jsonify({'transcription': full_text}), 200 | |
| except Exception as e: | |
| return jsonify({'error': str(e)}), 500 | |
| finally: | |
| if temp_file_path: | |
| cleanup_temp_files(temp_file_path) | |
| active_requests -= 1 | |
| request_semaphore.release() | |
| print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})") | |
| if __name__ == "__main__": | |
| # Create temporary folder if it doesn't exist | |
| if not os.path.exists(TEMPORARY_FOLDER): | |
| os.makedirs(TEMPORARY_FOLDER) | |
| app.run(host="0.0.0.0", port=7860, threaded=True) |