from fastapi import FastAPI, UploadFile, File, HTTPException from fastapi.middleware.cors import CORSMiddleware from faster_whisper import WhisperModel import anyio import os import tempfile import shutil import asyncio app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # Use the 'Base' model for maximum speed on Free Tier CPU MODEL_ID = "Systran/faster-whisper-small"#Systran/faster-whisper-base inference_lock = asyncio.Lock() user_sessions = {} try: print(f"Loading {MODEL_ID}...") # int8 quantization keeps it tiny and fast model = WhisperModel(MODEL_ID, device="cpu", compute_type="int8", download_root="./model_cache") print("Base Model Loaded!") except Exception as e: print(f"Error: {e}") @app.post("/whisper") async def transcribe_audio(audio: UploadFile = File(...), session_id: str = "default",lang: str = None): async with inference_lock: if session_id not in user_sessions: temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".webm", dir="/tmp") user_sessions[session_id] = temp_file.name temp_path = user_sessions[session_id] try: with open(temp_path, "ab") as buffer: shutil.copyfileobj(audio.file, buffer) # Performance settings for the Base model segments, info = await anyio.to_thread.run_sync( lambda: model.transcribe( temp_path, beam_size=1, vad_filter=True, language=lang, # Helping the small model with language context initial_prompt="English and Arabic conversation. مرحبا بكم" ) ) full_text = " ".join([s.text for s in segments]).strip() return {"text": full_text, "language": info.language} except Exception as e: return {"text": "", "error": str(e)} @app.get("/") def health(): return {"status": "base-model-active"}