STT-MODEL / main.py
MaenGit
update to small
10b630e
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from faster_whisper import WhisperModel
import anyio
import os
import tempfile
import shutil
import asyncio
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# Use the 'Base' model for maximum speed on Free Tier CPU
MODEL_ID = "Systran/faster-whisper-small"#Systran/faster-whisper-base
inference_lock = asyncio.Lock()
user_sessions = {}
try:
print(f"Loading {MODEL_ID}...")
# int8 quantization keeps it tiny and fast
model = WhisperModel(MODEL_ID, device="cpu", compute_type="int8", download_root="./model_cache")
print("Base Model Loaded!")
except Exception as e:
print(f"Error: {e}")
@app.post("/whisper")
async def transcribe_audio(audio: UploadFile = File(...), session_id: str = "default",lang: str = None):
async with inference_lock:
if session_id not in user_sessions:
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".webm", dir="/tmp")
user_sessions[session_id] = temp_file.name
temp_path = user_sessions[session_id]
try:
with open(temp_path, "ab") as buffer:
shutil.copyfileobj(audio.file, buffer)
# Performance settings for the Base model
segments, info = await anyio.to_thread.run_sync(
lambda: model.transcribe(
temp_path,
beam_size=1,
vad_filter=True,
language=lang,
# Helping the small model with language context
initial_prompt="English and Arabic conversation. مرحبا بكم"
)
)
full_text = " ".join([s.text for s in segments]).strip()
return {"text": full_text, "language": info.language}
except Exception as e:
return {"text": "", "error": str(e)}
@app.get("/")
def health(): return {"status": "base-model-active"}