Spaces:

Percy3822
/

STT

Runtime error

App Files Files Community

Percy3822 commited on Sep 2, 2025

Commit

d5083c7

verified ·

1 Parent(s): 2bf0546

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -8

app.py CHANGED Viewed

@@ -1,12 +1,59 @@
-from fastapi import FastAPI
 from pydantic import BaseModel
-app = FastAPI(title="STT Space (stub)")
-class TranscribeIn(BaseModel):
-    hint: str = "hello"
-@app.post("/transcribe")
-def transcribe(inp: TranscribeIn):
-    # Stub: echo a short transcript. Swap with Whisper later.
-    return {"text": inp.hint or "hello", "language": "en"}

+import os, io, time
+from fastapi import FastAPI, UploadFile, File, HTTPException
 from pydantic import BaseModel
+from faster_whisper import WhisperModel
+MODEL_NAME = os.getenv("FASTER_WHISPER_MODEL", "tiny.en")
+NUM_THREADS = int(os.getenv("NUM_THREADS", "2"))
+# Load model at startup (CPU, int8)
+model = WhisperModel(MODEL_NAME, device="cpu", compute_type="int8", num_workers=NUM_THREADS)
+app = FastAPI(title="STT (faster-whisper CPU)")
+class TranscribeOut(BaseModel):
+    text: str
+    language: str | None = None
+    duration: float | None = None
+@app.get("/health")
+def health():
+    return {"ok": True, "model": MODEL_NAME}
+@app.post("/transcribe", response_model=TranscribeOut)
+async def transcribe(file: UploadFile = File(...)):
+    # Basic validations
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="No filename")
+    if not file.content_type or not file.content_type.startswith("audio/"):
+        # Allow unknown types; client may not set correctly
+        pass
+    # Read all bytes in memory (small test files)
+    try:
+        payload = await file.read()
+    finally:
+        await file.close()
+    if not payload:
+        raise HTTPException(status_code=400, detail="Empty file")
+    # Run inference
+    start = time.time()
+    audio_buf = io.BytesIO(payload)
+    try:
+        segments, info = model.transcribe(audio_buf, vad_filter=True)
+    except Exception as e:
+        # Most common: ffmpeg missing (fixed by Dockerfile), or invalid audio
+        raise HTTPException(status_code=500, detail=f"Transcription failed: {e}")
+    text_chunks = []
+    for seg in segments:
+        text_chunks.append(seg.text.strip())
+    text = " ".join([t for t in text_chunks if t])
+    return TranscribeOut(
+        text=text.strip(),
+        language=getattr(info, "language", None),
+        duration=getattr(info, "duration", None),
+    )