import os import tempfile import subprocess from fastapi import FastAPI, UploadFile, File, HTTPException from fastapi.responses import JSONResponse, PlainTextResponse from pydantic import BaseModel from faster_whisper import WhisperModel # Force writable cache path CACHE_ROOT = os.environ.get("HF_HOME", "/data/hf") os.environ["HF_HOME"] = CACHE_ROOT os.environ["HUGGINGFACE_HUB_CACHE"] = CACHE_ROOT os.environ["TRANSFORMERS_CACHE"] = CACHE_ROOT os.makedirs(CACHE_ROOT, exist_ok=True) app = FastAPI(title="Nuvia Free Transcriber", version="1.4.0") # Root route so "/" is never a 404 @app.get("/", response_class=PlainTextResponse) def root(): return "Nuvia Free Transcriber ยท try POST /transcribe or GET /health" class HealthOut(BaseModel): ok: bool @app.get("/health", response_model=HealthOut) def health(): return {"ok": True} MODEL_NAME = os.environ.get("WHISPER_REPO", "Systran/faster-whisper-tiny.en") COMPUTE_TYPE = os.environ.get("WHISPER_COMPUTE", "int8") model = WhisperModel( MODEL_NAME, device="cpu", compute_type=COMPUTE_TYPE, download_root=CACHE_ROOT, ) def ffprobe_duration(path: str): try: out = subprocess.check_output( ["ffprobe","-v","error","-show_entries","format=duration", "-of","default=noprint_wrappers=1:nokey=1", path], stderr=subprocess.STDOUT, ) return float(out.decode().strip()) except Exception: return None def estimate_wpm(text: str, duration_sec: float | None): if not text or not duration_sec or duration_sec <= 0: return None words = len(text.strip().split()) mins = duration_sec / 60.0 if mins <= 0: return None return words / mins class TranscribeOut(BaseModel): text: str duration_sec: float | None = None wpm: float | None = None @app.post("/transcribe", response_model=TranscribeOut) async def transcribe(file: UploadFile = File(...)): if not file.filename: raise HTTPException(400, "Missing file name") suffix = os.path.splitext(file.filename)[1].lower() or ".mp3" with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: tmp.write(await file.read()) tmp_path = tmp.name duration = ffprobe_duration(tmp_path) segments, info = model.transcribe( tmp_path, language="en", beam_size=1, vad_filter=True, vad_parameters=dict(min_silence_duration_ms=600), ) parts = [seg.text.strip() for seg in segments if seg.text and seg.text.strip()] text = " ".join(parts) wpm = estimate_wpm(text, duration) try: os.unlink(tmp_path) except Exception: pass return TranscribeOut(text=text, duration_sec=duration, wpm=wpm)