Spaces:

Quartz4065
/

AudioTranscriber

Sleeping

App Files Files Community

Quartz4065 commited on Sep 18, 2025

Commit

9afa571

verified ·

1 Parent(s): 9d1fee0

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -51

app.py CHANGED Viewed

@@ -1,28 +1,26 @@
 import os
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.responses import JSONResponse, PlainTextResponse
 from pydantic import BaseModel
 from faster_whisper import WhisperModel
-import tempfile
-import subprocess
-import math
-# ---------- Writable caches (prevents PermissionError on /.cache) ----------
 CACHE_ROOT = os.environ.get("HF_HOME", "/data/hf")
 os.environ["HF_HOME"] = CACHE_ROOT
 os.environ["HUGGINGFACE_HUB_CACHE"] = CACHE_ROOT
 os.environ["TRANSFORMERS_CACHE"] = CACHE_ROOT
 os.makedirs(CACHE_ROOT, exist_ok=True)
-# ---------- App ----------
 app = FastAPI(title="Nuvia Free Transcriber", version="1.4.0")
-# Root route (avoid 404 at "/")
 @app.get("/", response_class=PlainTextResponse)
 def root():
     return "Nuvia Free Transcriber · try POST /transcribe or GET /health"
-# Health route used by your GPT Action
 class HealthOut(BaseModel):
     ok: bool
@@ -30,96 +28,67 @@ class HealthOut(BaseModel):
 def health():
     return {"ok": True}
-# ---------- Load model (tiny.en = fastest on CPU) ----------
-# You can switch to "base.en" if you want a bit more accuracy (slower).
 MODEL_NAME = os.environ.get("WHISPER_REPO", "Systran/faster-whisper-tiny.en")
 COMPUTE_TYPE = os.environ.get("WHISPER_COMPUTE", "int8")
-# Ensure the cache dir exists and is writable before model download
-os.makedirs(CACHE_ROOT, exist_ok=True)
 model = WhisperModel(
     MODEL_NAME,
     device="cpu",
     compute_type=COMPUTE_TYPE,
-    download_root=CACHE_ROOT,  # <— keeps models inside /data/hf
 )
-# ---------- Helpers ----------
-def ffprobe_duration(path: str) -> float | None:
-    """Return duration in seconds using ffprobe, or None on failure."""
     try:
         out = subprocess.check_output(
-            [
-                "ffprobe",
-                "-v", "error",
-                "-show_entries", "format=duration",
-                "-of", "default=noprint_wrappers=1:nokey=1",
-                path,
-            ],
             stderr=subprocess.STDOUT,
         )
         return float(out.decode().strip())
     except Exception:
         return None
-def estimate_wpm(text: str, duration_sec: float | None) -> float | None:
     if not text or not duration_sec or duration_sec <= 0:
         return None
     words = len(text.strip().split())
-    minutes = duration_sec / 60.0
-    if minutes <= 0:
         return None
-    return words / minutes
-# ---------- Schemas ----------
 class TranscribeOut(BaseModel):
     text: str
     duration_sec: float | None = None
     wpm: float | None = None
-# ---------- API ----------
 @app.post("/transcribe", response_model=TranscribeOut)
 async def transcribe(file: UploadFile = File(...)):
     if not file.filename:
         raise HTTPException(400, "Missing file name")
-    suffix = os.path.splitext(file.filename)[1].lower()
-    if suffix not in [".mp3", ".m4a", ".wav", ".aac", ".flac"]:
-        # allow anyway; faster-whisper handles most formats via ffmpeg
-        pass
-    # Save upload to a temp file
     with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
-        content = await file.read()
-        tmp.write(content)
         tmp_path = tmp.name
-    # Duration via ffprobe (best effort)
     duration = ffprobe_duration(tmp_path)
-    # Transcribe
-    # NOTE: beam_size=1 and vad_filter=True for speed/legibility on CPU Spaces
     segments, info = model.transcribe(
         tmp_path,
         language="en",
         beam_size=1,
         vad_filter=True,
-        vad_parameters=dict(min_silence_duration_ms=600)
     )
-    # Concatenate text
-    parts = []
-    for seg in segments:
-        # You can keep timestamps if you want: f"[{seg.start:.2f}-{seg.end:.2f}] {seg.text}"
-        parts.append(seg.text.strip())
-    full_text = " ".join([p for p in parts if p])
-    # Compute WPM if possible
-    wpm = estimate_wpm(full_text, duration)
     try:
         os.unlink(tmp_path)
     except Exception:
         pass
-    return TranscribeOut(text=full_text, duration_sec=duration, wpm=wpm)

 import os
+import math
+import tempfile
+import subprocess
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.responses import JSONResponse, PlainTextResponse
 from pydantic import BaseModel
 from faster_whisper import WhisperModel
+# ---- force writable model cache (avoid /.cache permission errors)
 CACHE_ROOT = os.environ.get("HF_HOME", "/data/hf")
 os.environ["HF_HOME"] = CACHE_ROOT
 os.environ["HUGGINGFACE_HUB_CACHE"] = CACHE_ROOT
 os.environ["TRANSFORMERS_CACHE"] = CACHE_ROOT
 os.makedirs(CACHE_ROOT, exist_ok=True)
 app = FastAPI(title="Nuvia Free Transcriber", version="1.4.0")
+# Root route so "/" is never a 404
 @app.get("/", response_class=PlainTextResponse)
 def root():
     return "Nuvia Free Transcriber · try POST /transcribe or GET /health"
 class HealthOut(BaseModel):
     ok: bool
 def health():
     return {"ok": True}
 MODEL_NAME = os.environ.get("WHISPER_REPO", "Systran/faster-whisper-tiny.en")
 COMPUTE_TYPE = os.environ.get("WHISPER_COMPUTE", "int8")
 model = WhisperModel(
     MODEL_NAME,
     device="cpu",
     compute_type=COMPUTE_TYPE,
+    download_root=CACHE_ROOT,
 )
+def ffprobe_duration(path: str):
     try:
         out = subprocess.check_output(
+            ["ffprobe","-v","error","-show_entries","format=duration",
+             "-of","default=noprint_wrappers=1:nokey=1", path],
             stderr=subprocess.STDOUT,
         )
         return float(out.decode().strip())
     except Exception:
         return None
+def estimate_wpm(text: str, duration_sec: float | None):
     if not text or not duration_sec or duration_sec <= 0:
         return None
     words = len(text.strip().split())
+    mins = duration_sec / 60.0
+    if mins <= 0:
         return None
+    return words / mins
 class TranscribeOut(BaseModel):
     text: str
     duration_sec: float | None = None
     wpm: float | None = None
 @app.post("/transcribe", response_model=TranscribeOut)
 async def transcribe(file: UploadFile = File(...)):
     if not file.filename:
         raise HTTPException(400, "Missing file name")
+    suffix = os.path.splitext(file.filename)[1].lower() or ".mp3"
     with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+        tmp.write(await file.read())
         tmp_path = tmp.name
     duration = ffprobe_duration(tmp_path)
     segments, info = model.transcribe(
         tmp_path,
         language="en",
         beam_size=1,
         vad_filter=True,
+        vad_parameters=dict(min_silence_duration_ms=600),
     )
+    parts = [seg.text.strip() for seg in segments if seg.text and seg.text.strip()]
+    text = " ".join(parts)
+    wpm = estimate_wpm(text, duration)
     try:
         os.unlink(tmp_path)
     except Exception:
         pass
+    return TranscribeOut(text=text, duration_sec=duration, wpm=wpm)