Quartz4065's picture
Update app.py
5b717de verified
import os
import tempfile
import subprocess
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse, PlainTextResponse
from pydantic import BaseModel
from faster_whisper import WhisperModel
# Force writable cache path
CACHE_ROOT = os.environ.get("HF_HOME", "/data/hf")
os.environ["HF_HOME"] = CACHE_ROOT
os.environ["HUGGINGFACE_HUB_CACHE"] = CACHE_ROOT
os.environ["TRANSFORMERS_CACHE"] = CACHE_ROOT
os.makedirs(CACHE_ROOT, exist_ok=True)
app = FastAPI(title="Nuvia Free Transcriber", version="1.4.0")
# Root route so "/" is never a 404
@app.get("/", response_class=PlainTextResponse)
def root():
return "Nuvia Free Transcriber · try POST /transcribe or GET /health"
class HealthOut(BaseModel):
ok: bool
@app.get("/health", response_model=HealthOut)
def health():
return {"ok": True}
MODEL_NAME = os.environ.get("WHISPER_REPO", "Systran/faster-whisper-tiny.en")
COMPUTE_TYPE = os.environ.get("WHISPER_COMPUTE", "int8")
model = WhisperModel(
MODEL_NAME,
device="cpu",
compute_type=COMPUTE_TYPE,
download_root=CACHE_ROOT,
)
def ffprobe_duration(path: str):
try:
out = subprocess.check_output(
["ffprobe","-v","error","-show_entries","format=duration",
"-of","default=noprint_wrappers=1:nokey=1", path],
stderr=subprocess.STDOUT,
)
return float(out.decode().strip())
except Exception:
return None
def estimate_wpm(text: str, duration_sec: float | None):
if not text or not duration_sec or duration_sec <= 0:
return None
words = len(text.strip().split())
mins = duration_sec / 60.0
if mins <= 0:
return None
return words / mins
class TranscribeOut(BaseModel):
text: str
duration_sec: float | None = None
wpm: float | None = None
@app.post("/transcribe", response_model=TranscribeOut)
async def transcribe(file: UploadFile = File(...)):
if not file.filename:
raise HTTPException(400, "Missing file name")
suffix = os.path.splitext(file.filename)[1].lower() or ".mp3"
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
tmp.write(await file.read())
tmp_path = tmp.name
duration = ffprobe_duration(tmp_path)
segments, info = model.transcribe(
tmp_path,
language="en",
beam_size=1,
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=600),
)
parts = [seg.text.strip() for seg in segments if seg.text and seg.text.strip()]
text = " ".join(parts)
wpm = estimate_wpm(text, duration)
try:
os.unlink(tmp_path)
except Exception:
pass
return TranscribeOut(text=text, duration_sec=duration, wpm=wpm)