import os
import tempfile
import subprocess
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse, PlainTextResponse
from pydantic import BaseModel
from faster_whisper import WhisperModel

# Force writable cache path
CACHE_ROOT = os.environ.get("HF_HOME", "/data/hf")
os.environ["HF_HOME"] = CACHE_ROOT
os.environ["HUGGINGFACE_HUB_CACHE"] = CACHE_ROOT
os.environ["TRANSFORMERS_CACHE"] = CACHE_ROOT
os.makedirs(CACHE_ROOT, exist_ok=True)

app = FastAPI(title="Nuvia Free Transcriber", version="1.4.0")

# Root route so "/" is never a 404
@app.get("/", response_class=PlainTextResponse)
def root():
    return "Nuvia Free Transcriber · try POST /transcribe or GET /health"

class HealthOut(BaseModel):
    ok: bool

@app.get("/health", response_model=HealthOut)
def health():
    return {"ok": True}

MODEL_NAME = os.environ.get("WHISPER_REPO", "Systran/faster-whisper-tiny.en")
COMPUTE_TYPE = os.environ.get("WHISPER_COMPUTE", "int8")
model = WhisperModel(
    MODEL_NAME,
    device="cpu",
    compute_type=COMPUTE_TYPE,
    download_root=CACHE_ROOT,
)

def ffprobe_duration(path: str):
    try:
        out = subprocess.check_output(
            ["ffprobe","-v","error","-show_entries","format=duration",
             "-of","default=noprint_wrappers=1:nokey=1", path],
            stderr=subprocess.STDOUT,
        )
        return float(out.decode().strip())
    except Exception:
        return None

def estimate_wpm(text: str, duration_sec: float | None):
    if not text or not duration_sec or duration_sec <= 0:
        return None
    words = len(text.strip().split())
    mins = duration_sec / 60.0
    if mins <= 0:
        return None
    return words / mins

class TranscribeOut(BaseModel):
    text: str
    duration_sec: float | None = None
    wpm: float | None = None

@app.post("/transcribe", response_model=TranscribeOut)
async def transcribe(file: UploadFile = File(...)):
    if not file.filename:
        raise HTTPException(400, "Missing file name")
    suffix = os.path.splitext(file.filename)[1].lower() or ".mp3"

    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
        tmp.write(await file.read())
        tmp_path = tmp.name

    duration = ffprobe_duration(tmp_path)

    segments, info = model.transcribe(
        tmp_path,
        language="en",
        beam_size=1,
        vad_filter=True,
        vad_parameters=dict(min_silence_duration_ms=600),
    )

    parts = [seg.text.strip() for seg in segments if seg.text and seg.text.strip()]
    text = " ".join(parts)
    wpm = estimate_wpm(text, duration)

    try:
        os.unlink(tmp_path)
    except Exception:
        pass

    return TranscribeOut(text=text, duration_sec=duration, wpm=wpm)