Spaces:

Quartz4065
/

AudioTranscriber

Sleeping

App Files Files Community

AudioTranscriber / app.py

Quartz4065

Update app.py

5b717de verified 3 months ago

raw

history blame contribute delete

2.76 kB

	import os
	import tempfile
	import subprocess
	from fastapi import FastAPI, UploadFile, File, HTTPException
	from fastapi.responses import JSONResponse, PlainTextResponse
	from pydantic import BaseModel
	from faster_whisper import WhisperModel

	# Force writable cache path
	CACHE_ROOT = os.environ.get("HF_HOME", "/data/hf")
	os.environ["HF_HOME"] = CACHE_ROOT
	os.environ["HUGGINGFACE_HUB_CACHE"] = CACHE_ROOT
	os.environ["TRANSFORMERS_CACHE"] = CACHE_ROOT
	os.makedirs(CACHE_ROOT, exist_ok=True)

	app = FastAPI(title="Nuvia Free Transcriber", version="1.4.0")

	# Root route so "/" is never a 404
	@app.get("/", response_class=PlainTextResponse)
	def root():
	return "Nuvia Free Transcriber · try POST /transcribe or GET /health"

	class HealthOut(BaseModel):
	ok: bool

	@app.get("/health", response_model=HealthOut)
	def health():
	return {"ok": True}

	MODEL_NAME = os.environ.get("WHISPER_REPO", "Systran/faster-whisper-tiny.en")
	COMPUTE_TYPE = os.environ.get("WHISPER_COMPUTE", "int8")
	model = WhisperModel(
	MODEL_NAME,
	device="cpu",
	compute_type=COMPUTE_TYPE,
	download_root=CACHE_ROOT,
	)

	def ffprobe_duration(path: str):
	try:
	out = subprocess.check_output(
	["ffprobe","-v","error","-show_entries","format=duration",
	"-of","default=noprint_wrappers=1:nokey=1", path],
	stderr=subprocess.STDOUT,
	)
	return float(out.decode().strip())
	except Exception:
	return None

	def estimate_wpm(text: str, duration_sec: float \| None):
	if not text or not duration_sec or duration_sec <= 0:
	return None
	words = len(text.strip().split())
	mins = duration_sec / 60.0
	if mins <= 0:
	return None
	return words / mins

	class TranscribeOut(BaseModel):
	text: str
	duration_sec: float \| None = None
	wpm: float \| None = None

	@app.post("/transcribe", response_model=TranscribeOut)
	async def transcribe(file: UploadFile = File(...)):
	if not file.filename:
	raise HTTPException(400, "Missing file name")
	suffix = os.path.splitext(file.filename)[1].lower() or ".mp3"

	with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
	tmp.write(await file.read())
	tmp_path = tmp.name

	duration = ffprobe_duration(tmp_path)

	segments, info = model.transcribe(
	tmp_path,
	language="en",
	beam_size=1,
	vad_filter=True,
	vad_parameters=dict(min_silence_duration_ms=600),
	)

	parts = [seg.text.strip() for seg in segments if seg.text and seg.text.strip()]
	text = " ".join(parts)
	wpm = estimate_wpm(text, duration)

	try:
	os.unlink(tmp_path)
	except Exception:
	pass

	return TranscribeOut(text=text, duration_sec=duration, wpm=wpm)