Spaces:

Quartz4065
/

AudioTranscriber

Sleeping

App Files Files Community

Quartz4065 commited on Sep 18, 2025

Commit

de0a97e

verified ·

1 Parent(s): bcedbf0

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -34

app.py CHANGED Viewed

@@ -1,35 +1,42 @@
-import io
 import os
-import math
 import subprocess
 from typing import Optional
 from fastapi import FastAPI, File, UploadFile
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
-# Optional CORS (safe default in Spaces)
-from fastapi.middleware.cors import CORSMiddleware
-# Transcription (CPU)
 from faster_whisper import WhisperModel
-import soundfile as sf
-# ---------- App ----------
-app = FastAPI(title="Nuvia Free Transcriber", version="1.0.0")
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"], allow_credentials=True,
     allow_methods=["*"], allow_headers=["*"],
 )
-# ---------- Model load (CPU, small for free tier) ----------
-# You can switch to "base.en" if needed; "tiny.en" is faster.
-MODEL_NAME = os.environ.get("WHISPER_MODEL", "tiny.en")
 model = WhisperModel(MODEL_NAME, device="cpu", compute_type="int8")
-# ---------- Helpers ----------
 def ffprobe_duration(path: str) -> Optional[float]:
     try:
         out = subprocess.check_output(
@@ -43,19 +50,8 @@ def ffprobe_duration(path: str) -> Optional[float]:
 def word_count(text: str) -> int:
     return len([w for w in text.split() if w.strip()])
-# ---------- Schemas ----------
-class HealthOut(BaseModel):
-    ok: bool
-class TranscribeOut(BaseModel):
-    text: str
-    duration_sec: Optional[float] = None
-    wpm: Optional[float] = None
-# ---------- Routes ----------
 @app.get("/", response_model=HealthOut)
 def root():
-    """Root route so probes and GPT 'test connection' don’t 404."""
     return HealthOut(ok=True)
 @app.get("/health", response_model=HealthOut)
@@ -64,32 +60,29 @@ def health():
 @app.post("/transcribe", response_model=TranscribeOut)
 async def transcribe(file: UploadFile = File(...)):
-    # Read uploaded bytes
     raw = await file.read()
-    # Save to temp wav (Spaces use ephemeral FS; this is fine)
-    tmp_in = "/tmp/infile"
-    # Keep original extension if present
     ext = os.path.splitext(file.filename or "")[1].lower() or ".bin"
-    tmp_in = tmp_in + ext
     with open(tmp_in, "wb") as f:
         f.write(raw)
-    # Ensure we have a WAV for robust decode
     tmp_wav = "/tmp/in.wav"
     try:
-        subprocess.check_call(["ffmpeg", "-y", "-i", tmp_in, "-ar", "16000", "-ac", "1", tmp_wav], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
     except subprocess.CalledProcessError:
         return JSONResponse(status_code=400, content={"error": "ffmpeg failed to decode the audio"})
-    # Duration via ffprobe (more accurate than guessing)
     duration = ffprobe_duration(tmp_wav)
     # Transcribe
-    segments, info = model.transcribe(tmp_wav, language="en")
     text = "".join([seg.text for seg in segments]).strip()
-    # WPM (best-effort)
     wpm = None
     if duration and duration > 0:
         wc = word_count(text)

 import os
 import subprocess
 from typing import Optional
 from fastapi import FastAPI, File, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
+# ---- Writable caches for Spaces (fixes PermissionError: '/.cache') ----
+os.environ.setdefault("HF_HOME", "/tmp/hf")
+os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/hf")
+os.environ.setdefault("XDG_CACHE_HOME", "/tmp/.cache")
+os.makedirs(os.environ["HF_HOME"], exist_ok=True)
+os.makedirs(os.environ["XDG_CACHE_HOME"], exist_ok=True)
+# ---- Transcription (CPU) ----
 from faster_whisper import WhisperModel
+MODEL_NAME = os.environ.get("WHISPER_MODEL", "tiny.en")  # fast & CPU-friendly
+app = FastAPI(title="Nuvia Free Transcriber", version="1.1.0")
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"], allow_credentials=True,
     allow_methods=["*"], allow_headers=["*"],
 )
+# Load model once at startup
 model = WhisperModel(MODEL_NAME, device="cpu", compute_type="int8")
+class HealthOut(BaseModel):
+    ok: bool
+class TranscribeOut(BaseModel):
+    text: str
+    duration_sec: Optional[float] = None
+    wpm: Optional[float] = None
 def ffprobe_duration(path: str) -> Optional[float]:
     try:
         out = subprocess.check_output(
 def word_count(text: str) -> int:
     return len([w for w in text.split() if w.strip()])
 @app.get("/", response_model=HealthOut)
 def root():
     return HealthOut(ok=True)
 @app.get("/health", response_model=HealthOut)
 @app.post("/transcribe", response_model=TranscribeOut)
 async def transcribe(file: UploadFile = File(...)):
+    # Save upload
     raw = await file.read()
     ext = os.path.splitext(file.filename or "")[1].lower() or ".bin"
+    tmp_in = f"/tmp/in{ext}"
     with open(tmp_in, "wb") as f:
         f.write(raw)
+    # Convert to mono 16k wav for robust decode
     tmp_wav = "/tmp/in.wav"
     try:
+        subprocess.check_call(
+            ["ffmpeg", "-y", "-i", tmp_in, "-ar", "16000", "-ac", "1", tmp_wav],
+            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+        )
     except subprocess.CalledProcessError:
         return JSONResponse(status_code=400, content={"error": "ffmpeg failed to decode the audio"})
     duration = ffprobe_duration(tmp_wav)
     # Transcribe
+    segments, _ = model.transcribe(tmp_wav, language="en")
     text = "".join([seg.text for seg in segments]).strip()
     wpm = None
     if duration and duration > 0:
         wc = word_count(text)