Spaces:

Quartz4065
/

AudioTranscriber

Sleeping

File size: 2,755 Bytes

ae77e3b
9afa571
 
033e455
 
 
 
ae77e3b
5b717de
033e455
 
 
 
 
54358d8
033e455
829baf1
9afa571
033e455
 
 
ae77e3b
033e455
 
ae77e3b
033e455
 
 
 
 
 
829baf1
 
 
033e455
9afa571
829baf1
54358d8
9afa571
54358d8
 
9afa571
 
033e455
ae77e3b
033e455
54358d8
 
ae77e3b
9afa571
033e455
 
 
9afa571
 
033e455
9afa571
ae77e3b
033e455
 
 
 
ae77e3b
 
54358d8
033e455
 
9afa571
54358d8
033e455
9afa571
033e455
 
 
 
 
 
 
 
 
9afa571
033e455
 
9afa571
 
 
54358d8
033e455
 
 
 
54358d8
9afa571

import os
import tempfile
import subprocess
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse, PlainTextResponse
from pydantic import BaseModel
from faster_whisper import WhisperModel

# Force writable cache path
CACHE_ROOT = os.environ.get("HF_HOME", "/data/hf")
os.environ["HF_HOME"] = CACHE_ROOT
os.environ["HUGGINGFACE_HUB_CACHE"] = CACHE_ROOT
os.environ["TRANSFORMERS_CACHE"] = CACHE_ROOT
os.makedirs(CACHE_ROOT, exist_ok=True)

app = FastAPI(title="Nuvia Free Transcriber", version="1.4.0")

# Root route so "/" is never a 404
@app.get("/", response_class=PlainTextResponse)
def root():
    return "Nuvia Free Transcriber · try POST /transcribe or GET /health"

class HealthOut(BaseModel):
    ok: bool

@app.get("/health", response_model=HealthOut)
def health():
    return {"ok": True}

MODEL_NAME = os.environ.get("WHISPER_REPO", "Systran/faster-whisper-tiny.en")
COMPUTE_TYPE = os.environ.get("WHISPER_COMPUTE", "int8")
model = WhisperModel(
    MODEL_NAME,
    device="cpu",
    compute_type=COMPUTE_TYPE,
    download_root=CACHE_ROOT,
)

def ffprobe_duration(path: str):
    try:
        out = subprocess.check_output(
            ["ffprobe","-v","error","-show_entries","format=duration",
             "-of","default=noprint_wrappers=1:nokey=1", path],
            stderr=subprocess.STDOUT,
        )
        return float(out.decode().strip())
    except Exception:
        return None

def estimate_wpm(text: str, duration_sec: float | None):
    if not text or not duration_sec or duration_sec <= 0:
        return None
    words = len(text.strip().split())
    mins = duration_sec / 60.0
    if mins <= 0:
        return None
    return words / mins

class TranscribeOut(BaseModel):
    text: str
    duration_sec: float | None = None
    wpm: float | None = None

@app.post("/transcribe", response_model=TranscribeOut)
async def transcribe(file: UploadFile = File(...)):
    if not file.filename:
        raise HTTPException(400, "Missing file name")
    suffix = os.path.splitext(file.filename)[1].lower() or ".mp3"

    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
        tmp.write(await file.read())
        tmp_path = tmp.name

    duration = ffprobe_duration(tmp_path)

    segments, info = model.transcribe(
        tmp_path,
        language="en",
        beam_size=1,
        vad_filter=True,
        vad_parameters=dict(min_silence_duration_ms=600),
    )

    parts = [seg.text.strip() for seg in segments if seg.text and seg.text.strip()]
    text = " ".join(parts)
    wpm = estimate_wpm(text, duration)

    try:
        os.unlink(tmp_path)
    except Exception:
        pass

    return TranscribeOut(text=text, duration_sec=duration, wpm=wpm)