STT / app.py
Percy3822's picture
Update app.py
cd2fd2f verified
import os, uuid, tempfile, shutil
from fastapi import FastAPI, UploadFile, File, HTTPException
from pydantic import BaseModel
from faster_whisper import WhisperModel
MODEL_NAME = os.getenv("FASTER_WHISPER_MODEL", "tiny.en")
NUM_THREADS = int(os.getenv("NUM_THREADS", "2"))
# Load model once (CPU, int8)
model = WhisperModel(MODEL_NAME, device="cpu", compute_type="int8", num_workers=NUM_THREADS)
app = FastAPI(title="STT (faster-whisper CPU)")
class TranscribeOut(BaseModel):
text: str
language: str | None = None
duration: float | None = None
@app.post("/transcribe", response_model=TranscribeOut)
async def transcribe(
file: UploadFile = File(...),
beam_size: int = 1,
vad_filter: bool = True,
):
# Read the upload
payload = await file.read()
await file.close()
if not payload:
raise HTTPException(status_code=400, detail="Empty file")
# Persist to a temp file so ffmpeg can probe it robustly
suffix = os.path.splitext(file.filename or "")[1] or ".wav"
tmp_path = os.path.join(tempfile.gettempdir(), f"stt_{uuid.uuid4().hex}{suffix}")
try:
with open(tmp_path, "wb") as f:
f.write(payload)
# Transcribe via file path (lets faster-whisper/ffmpeg do decoding)
segments, info = model.transcribe(
tmp_path,
beam_size=beam_size,
vad_filter=vad_filter,
)
parts = [seg.text.strip() for seg in segments if seg.text and seg.text.strip()]
text = " ".join(parts).strip()
return TranscribeOut(
text=text,
language=getattr(info, "language", None),
duration=getattr(info, "duration", None),
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Transcription failed: {e}")
finally:
try:
os.remove(tmp_path)
except Exception:
pass