File size: 1,910 Bytes
cd2fd2f
d5083c7
081d1f9
d5083c7
081d1f9
d5083c7
 
081d1f9
cd2fd2f
d5083c7
081d1f9
d5083c7
 
 
 
 
 
 
 
cd2fd2f
 
 
 
 
 
 
 
d5083c7
 
 
cd2fd2f
 
 
d5083c7
cd2fd2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5083c7
 
cd2fd2f
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os, uuid, tempfile, shutil
from fastapi import FastAPI, UploadFile, File, HTTPException
from pydantic import BaseModel
from faster_whisper import WhisperModel

MODEL_NAME = os.getenv("FASTER_WHISPER_MODEL", "tiny.en")
NUM_THREADS = int(os.getenv("NUM_THREADS", "2"))

# Load model once (CPU, int8)
model = WhisperModel(MODEL_NAME, device="cpu", compute_type="int8", num_workers=NUM_THREADS)

app = FastAPI(title="STT (faster-whisper CPU)")

class TranscribeOut(BaseModel):
    text: str
    language: str | None = None
    duration: float | None = None

@app.post("/transcribe", response_model=TranscribeOut)
async def transcribe(
    file: UploadFile = File(...),
    beam_size: int = 1,
    vad_filter: bool = True,
):
    # Read the upload
    payload = await file.read()
    await file.close()
    if not payload:
        raise HTTPException(status_code=400, detail="Empty file")

    # Persist to a temp file so ffmpeg can probe it robustly
    suffix = os.path.splitext(file.filename or "")[1] or ".wav"
    tmp_path = os.path.join(tempfile.gettempdir(), f"stt_{uuid.uuid4().hex}{suffix}")
    try:
        with open(tmp_path, "wb") as f:
            f.write(payload)

        # Transcribe via file path (lets faster-whisper/ffmpeg do decoding)
        segments, info = model.transcribe(
            tmp_path,
            beam_size=beam_size,
            vad_filter=vad_filter,
        )

        parts = [seg.text.strip() for seg in segments if seg.text and seg.text.strip()]
        text = " ".join(parts).strip()

        return TranscribeOut(
            text=text,
            language=getattr(info, "language", None),
            duration=getattr(info, "duration", None),
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Transcription failed: {e}")
    finally:
        try:
            os.remove(tmp_path)
        except Exception:
            pass