import os, uuid, tempfile, shutil from fastapi import FastAPI, UploadFile, File, HTTPException from pydantic import BaseModel from faster_whisper import WhisperModel MODEL_NAME = os.getenv("FASTER_WHISPER_MODEL", "tiny.en") NUM_THREADS = int(os.getenv("NUM_THREADS", "2")) # Load model once (CPU, int8) model = WhisperModel(MODEL_NAME, device="cpu", compute_type="int8", num_workers=NUM_THREADS) app = FastAPI(title="STT (faster-whisper CPU)") class TranscribeOut(BaseModel): text: str language: str | None = None duration: float | None = None @app.post("/transcribe", response_model=TranscribeOut) async def transcribe( file: UploadFile = File(...), beam_size: int = 1, vad_filter: bool = True, ): # Read the upload payload = await file.read() await file.close() if not payload: raise HTTPException(status_code=400, detail="Empty file") # Persist to a temp file so ffmpeg can probe it robustly suffix = os.path.splitext(file.filename or "")[1] or ".wav" tmp_path = os.path.join(tempfile.gettempdir(), f"stt_{uuid.uuid4().hex}{suffix}") try: with open(tmp_path, "wb") as f: f.write(payload) # Transcribe via file path (lets faster-whisper/ffmpeg do decoding) segments, info = model.transcribe( tmp_path, beam_size=beam_size, vad_filter=vad_filter, ) parts = [seg.text.strip() for seg in segments if seg.text and seg.text.strip()] text = " ".join(parts).strip() return TranscribeOut( text=text, language=getattr(info, "language", None), duration=getattr(info, "duration", None), ) except Exception as e: raise HTTPException(status_code=500, detail=f"Transcription failed: {e}") finally: try: os.remove(tmp_path) except Exception: pass