Quartz4065 commited on
Commit
de0a97e
·
verified ·
1 Parent(s): bcedbf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -34
app.py CHANGED
@@ -1,35 +1,42 @@
1
- import io
2
  import os
3
- import math
4
  import subprocess
5
  from typing import Optional
6
 
7
  from fastapi import FastAPI, File, UploadFile
 
8
  from fastapi.responses import JSONResponse
9
  from pydantic import BaseModel
10
 
11
- # Optional CORS (safe default in Spaces)
12
- from fastapi.middleware.cors import CORSMiddleware
 
 
 
 
13
 
14
- # Transcription (CPU)
15
  from faster_whisper import WhisperModel
16
- import soundfile as sf
17
 
18
- # ---------- App ----------
19
- app = FastAPI(title="Nuvia Free Transcriber", version="1.0.0")
20
 
 
21
  app.add_middleware(
22
  CORSMiddleware,
23
  allow_origins=["*"], allow_credentials=True,
24
  allow_methods=["*"], allow_headers=["*"],
25
  )
26
 
27
- # ---------- Model load (CPU, small for free tier) ----------
28
- # You can switch to "base.en" if needed; "tiny.en" is faster.
29
- MODEL_NAME = os.environ.get("WHISPER_MODEL", "tiny.en")
30
  model = WhisperModel(MODEL_NAME, device="cpu", compute_type="int8")
31
 
32
- # ---------- Helpers ----------
 
 
 
 
 
 
 
33
  def ffprobe_duration(path: str) -> Optional[float]:
34
  try:
35
  out = subprocess.check_output(
@@ -43,19 +50,8 @@ def ffprobe_duration(path: str) -> Optional[float]:
43
  def word_count(text: str) -> int:
44
  return len([w for w in text.split() if w.strip()])
45
 
46
- # ---------- Schemas ----------
47
- class HealthOut(BaseModel):
48
- ok: bool
49
-
50
- class TranscribeOut(BaseModel):
51
- text: str
52
- duration_sec: Optional[float] = None
53
- wpm: Optional[float] = None
54
-
55
- # ---------- Routes ----------
56
  @app.get("/", response_model=HealthOut)
57
  def root():
58
- """Root route so probes and GPT 'test connection' don’t 404."""
59
  return HealthOut(ok=True)
60
 
61
  @app.get("/health", response_model=HealthOut)
@@ -64,32 +60,29 @@ def health():
64
 
65
  @app.post("/transcribe", response_model=TranscribeOut)
66
  async def transcribe(file: UploadFile = File(...)):
67
- # Read uploaded bytes
68
  raw = await file.read()
69
-
70
- # Save to temp wav (Spaces use ephemeral FS; this is fine)
71
- tmp_in = "/tmp/infile"
72
- # Keep original extension if present
73
  ext = os.path.splitext(file.filename or "")[1].lower() or ".bin"
74
- tmp_in = tmp_in + ext
75
  with open(tmp_in, "wb") as f:
76
  f.write(raw)
77
 
78
- # Ensure we have a WAV for robust decode
79
  tmp_wav = "/tmp/in.wav"
80
  try:
81
- subprocess.check_call(["ffmpeg", "-y", "-i", tmp_in, "-ar", "16000", "-ac", "1", tmp_wav], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 
 
 
82
  except subprocess.CalledProcessError:
83
  return JSONResponse(status_code=400, content={"error": "ffmpeg failed to decode the audio"})
84
 
85
- # Duration via ffprobe (more accurate than guessing)
86
  duration = ffprobe_duration(tmp_wav)
87
 
88
  # Transcribe
89
- segments, info = model.transcribe(tmp_wav, language="en")
90
  text = "".join([seg.text for seg in segments]).strip()
91
 
92
- # WPM (best-effort)
93
  wpm = None
94
  if duration and duration > 0:
95
  wc = word_count(text)
 
 
1
  import os
 
2
  import subprocess
3
  from typing import Optional
4
 
5
  from fastapi import FastAPI, File, UploadFile
6
+ from fastapi.middleware.cors import CORSMiddleware
7
  from fastapi.responses import JSONResponse
8
  from pydantic import BaseModel
9
 
10
+ # ---- Writable caches for Spaces (fixes PermissionError: '/.cache') ----
11
+ os.environ.setdefault("HF_HOME", "/tmp/hf")
12
+ os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/hf")
13
+ os.environ.setdefault("XDG_CACHE_HOME", "/tmp/.cache")
14
+ os.makedirs(os.environ["HF_HOME"], exist_ok=True)
15
+ os.makedirs(os.environ["XDG_CACHE_HOME"], exist_ok=True)
16
 
17
+ # ---- Transcription (CPU) ----
18
  from faster_whisper import WhisperModel
 
19
 
20
+ MODEL_NAME = os.environ.get("WHISPER_MODEL", "tiny.en") # fast & CPU-friendly
 
21
 
22
+ app = FastAPI(title="Nuvia Free Transcriber", version="1.1.0")
23
  app.add_middleware(
24
  CORSMiddleware,
25
  allow_origins=["*"], allow_credentials=True,
26
  allow_methods=["*"], allow_headers=["*"],
27
  )
28
 
29
+ # Load model once at startup
 
 
30
  model = WhisperModel(MODEL_NAME, device="cpu", compute_type="int8")
31
 
32
+ class HealthOut(BaseModel):
33
+ ok: bool
34
+
35
+ class TranscribeOut(BaseModel):
36
+ text: str
37
+ duration_sec: Optional[float] = None
38
+ wpm: Optional[float] = None
39
+
40
  def ffprobe_duration(path: str) -> Optional[float]:
41
  try:
42
  out = subprocess.check_output(
 
50
  def word_count(text: str) -> int:
51
  return len([w for w in text.split() if w.strip()])
52
 
 
 
 
 
 
 
 
 
 
 
53
  @app.get("/", response_model=HealthOut)
54
  def root():
 
55
  return HealthOut(ok=True)
56
 
57
  @app.get("/health", response_model=HealthOut)
 
60
 
61
  @app.post("/transcribe", response_model=TranscribeOut)
62
  async def transcribe(file: UploadFile = File(...)):
63
+ # Save upload
64
  raw = await file.read()
 
 
 
 
65
  ext = os.path.splitext(file.filename or "")[1].lower() or ".bin"
66
+ tmp_in = f"/tmp/in{ext}"
67
  with open(tmp_in, "wb") as f:
68
  f.write(raw)
69
 
70
+ # Convert to mono 16k wav for robust decode
71
  tmp_wav = "/tmp/in.wav"
72
  try:
73
+ subprocess.check_call(
74
+ ["ffmpeg", "-y", "-i", tmp_in, "-ar", "16000", "-ac", "1", tmp_wav],
75
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
76
+ )
77
  except subprocess.CalledProcessError:
78
  return JSONResponse(status_code=400, content={"error": "ffmpeg failed to decode the audio"})
79
 
 
80
  duration = ffprobe_duration(tmp_wav)
81
 
82
  # Transcribe
83
+ segments, _ = model.transcribe(tmp_wav, language="en")
84
  text = "".join([seg.text for seg in segments]).strip()
85
 
 
86
  wpm = None
87
  if duration and duration > 0:
88
  wc = word_count(text)