Spaces:
Running
Running
| from fastapi import APIRouter, UploadFile, File, HTTPException, Request | |
| from faster_whisper import WhisperModel | |
| import io | |
| import asyncio | |
| router = APIRouter() | |
| # Global lock to protect RAM usage during concurrent requests | |
| asr_lock = asyncio.Lock() | |
| # Function to load the ASR model | |
| def load_model_asr(): | |
| # 'tiny' model -> minimal RAM usage, int8 -> even less memory | |
| model = WhisperModel("tiny", device="cpu", compute_type="int8") | |
| return model | |
| async def asr(request: Request, audio: UploadFile = File(...)): | |
| """ | |
| Transcribe audio to text using faster-whisper (locally, no HTTP calls) | |
| """ | |
| async with asr_lock: | |
| try: | |
| # Get the local model from app.state | |
| model = request.app.state.model_asr | |
| # Read audio file into memory | |
| audio_bytes = await audio.read() | |
| buffer = io.BytesIO(audio_bytes) | |
| # Transcription | |
| segments, info = model.transcribe(buffer, beam_size=1, vad_filter=True) | |
| # Combine segments into full transcription | |
| transcription = " ".join([s.text for s in segments]) | |
| return {"transcription": transcription} | |
| except Exception as e: | |
| # Raise HTTP exception if anything goes wrong | |
| raise HTTPException(status_code=500, detail=str(e)) | |