Quartz4065 commited on
Commit
9afa571
·
verified ·
1 Parent(s): 9d1fee0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -51
app.py CHANGED
@@ -1,28 +1,26 @@
1
  import os
 
 
 
2
  from fastapi import FastAPI, UploadFile, File, HTTPException
3
  from fastapi.responses import JSONResponse, PlainTextResponse
4
  from pydantic import BaseModel
5
  from faster_whisper import WhisperModel
6
- import tempfile
7
- import subprocess
8
- import math
9
 
10
- # ---------- Writable caches (prevents PermissionError on /.cache) ----------
11
  CACHE_ROOT = os.environ.get("HF_HOME", "/data/hf")
12
  os.environ["HF_HOME"] = CACHE_ROOT
13
  os.environ["HUGGINGFACE_HUB_CACHE"] = CACHE_ROOT
14
  os.environ["TRANSFORMERS_CACHE"] = CACHE_ROOT
15
  os.makedirs(CACHE_ROOT, exist_ok=True)
16
 
17
- # ---------- App ----------
18
  app = FastAPI(title="Nuvia Free Transcriber", version="1.4.0")
19
 
20
- # Root route (avoid 404 at "/")
21
  @app.get("/", response_class=PlainTextResponse)
22
  def root():
23
  return "Nuvia Free Transcriber · try POST /transcribe or GET /health"
24
 
25
- # Health route used by your GPT Action
26
  class HealthOut(BaseModel):
27
  ok: bool
28
 
@@ -30,96 +28,67 @@ class HealthOut(BaseModel):
30
  def health():
31
  return {"ok": True}
32
 
33
- # ---------- Load model (tiny.en = fastest on CPU) ----------
34
- # You can switch to "base.en" if you want a bit more accuracy (slower).
35
  MODEL_NAME = os.environ.get("WHISPER_REPO", "Systran/faster-whisper-tiny.en")
36
  COMPUTE_TYPE = os.environ.get("WHISPER_COMPUTE", "int8")
37
-
38
- # Ensure the cache dir exists and is writable before model download
39
- os.makedirs(CACHE_ROOT, exist_ok=True)
40
-
41
  model = WhisperModel(
42
  MODEL_NAME,
43
  device="cpu",
44
  compute_type=COMPUTE_TYPE,
45
- download_root=CACHE_ROOT, # <— keeps models inside /data/hf
46
  )
47
 
48
- # ---------- Helpers ----------
49
- def ffprobe_duration(path: str) -> float | None:
50
- """Return duration in seconds using ffprobe, or None on failure."""
51
  try:
52
  out = subprocess.check_output(
53
- [
54
- "ffprobe",
55
- "-v", "error",
56
- "-show_entries", "format=duration",
57
- "-of", "default=noprint_wrappers=1:nokey=1",
58
- path,
59
- ],
60
  stderr=subprocess.STDOUT,
61
  )
62
  return float(out.decode().strip())
63
  except Exception:
64
  return None
65
 
66
- def estimate_wpm(text: str, duration_sec: float | None) -> float | None:
67
  if not text or not duration_sec or duration_sec <= 0:
68
  return None
69
  words = len(text.strip().split())
70
- minutes = duration_sec / 60.0
71
- if minutes <= 0:
72
  return None
73
- return words / minutes
74
 
75
- # ---------- Schemas ----------
76
  class TranscribeOut(BaseModel):
77
  text: str
78
  duration_sec: float | None = None
79
  wpm: float | None = None
80
 
81
- # ---------- API ----------
82
  @app.post("/transcribe", response_model=TranscribeOut)
83
  async def transcribe(file: UploadFile = File(...)):
84
  if not file.filename:
85
  raise HTTPException(400, "Missing file name")
86
- suffix = os.path.splitext(file.filename)[1].lower()
87
- if suffix not in [".mp3", ".m4a", ".wav", ".aac", ".flac"]:
88
- # allow anyway; faster-whisper handles most formats via ffmpeg
89
- pass
90
 
91
- # Save upload to a temp file
92
  with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
93
- content = await file.read()
94
- tmp.write(content)
95
  tmp_path = tmp.name
96
 
97
- # Duration via ffprobe (best effort)
98
  duration = ffprobe_duration(tmp_path)
99
 
100
- # Transcribe
101
- # NOTE: beam_size=1 and vad_filter=True for speed/legibility on CPU Spaces
102
  segments, info = model.transcribe(
103
  tmp_path,
104
  language="en",
105
  beam_size=1,
106
  vad_filter=True,
107
- vad_parameters=dict(min_silence_duration_ms=600)
108
  )
109
 
110
- # Concatenate text
111
- parts = []
112
- for seg in segments:
113
- # You can keep timestamps if you want: f"[{seg.start:.2f}-{seg.end:.2f}] {seg.text}"
114
- parts.append(seg.text.strip())
115
- full_text = " ".join([p for p in parts if p])
116
-
117
- # Compute WPM if possible
118
- wpm = estimate_wpm(full_text, duration)
119
 
120
  try:
121
  os.unlink(tmp_path)
122
  except Exception:
123
  pass
124
 
125
- return TranscribeOut(text=full_text, duration_sec=duration, wpm=wpm)
 
1
  import os
2
+ import math
3
+ import tempfile
4
+ import subprocess
5
  from fastapi import FastAPI, UploadFile, File, HTTPException
6
  from fastapi.responses import JSONResponse, PlainTextResponse
7
  from pydantic import BaseModel
8
  from faster_whisper import WhisperModel
 
 
 
9
 
10
+ # ---- force writable model cache (avoid /.cache permission errors)
11
  CACHE_ROOT = os.environ.get("HF_HOME", "/data/hf")
12
  os.environ["HF_HOME"] = CACHE_ROOT
13
  os.environ["HUGGINGFACE_HUB_CACHE"] = CACHE_ROOT
14
  os.environ["TRANSFORMERS_CACHE"] = CACHE_ROOT
15
  os.makedirs(CACHE_ROOT, exist_ok=True)
16
 
 
17
  app = FastAPI(title="Nuvia Free Transcriber", version="1.4.0")
18
 
19
+ # Root route so "/" is never a 404
20
  @app.get("/", response_class=PlainTextResponse)
21
  def root():
22
  return "Nuvia Free Transcriber · try POST /transcribe or GET /health"
23
 
 
24
  class HealthOut(BaseModel):
25
  ok: bool
26
 
 
28
  def health():
29
  return {"ok": True}
30
 
 
 
31
  MODEL_NAME = os.environ.get("WHISPER_REPO", "Systran/faster-whisper-tiny.en")
32
  COMPUTE_TYPE = os.environ.get("WHISPER_COMPUTE", "int8")
 
 
 
 
33
  model = WhisperModel(
34
  MODEL_NAME,
35
  device="cpu",
36
  compute_type=COMPUTE_TYPE,
37
+ download_root=CACHE_ROOT,
38
  )
39
 
40
+ def ffprobe_duration(path: str):
 
 
41
  try:
42
  out = subprocess.check_output(
43
+ ["ffprobe","-v","error","-show_entries","format=duration",
44
+ "-of","default=noprint_wrappers=1:nokey=1", path],
 
 
 
 
 
45
  stderr=subprocess.STDOUT,
46
  )
47
  return float(out.decode().strip())
48
  except Exception:
49
  return None
50
 
51
+ def estimate_wpm(text: str, duration_sec: float | None):
52
  if not text or not duration_sec or duration_sec <= 0:
53
  return None
54
  words = len(text.strip().split())
55
+ mins = duration_sec / 60.0
56
+ if mins <= 0:
57
  return None
58
+ return words / mins
59
 
 
60
  class TranscribeOut(BaseModel):
61
  text: str
62
  duration_sec: float | None = None
63
  wpm: float | None = None
64
 
 
65
  @app.post("/transcribe", response_model=TranscribeOut)
66
  async def transcribe(file: UploadFile = File(...)):
67
  if not file.filename:
68
  raise HTTPException(400, "Missing file name")
69
+ suffix = os.path.splitext(file.filename)[1].lower() or ".mp3"
 
 
 
70
 
 
71
  with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
72
+ tmp.write(await file.read())
 
73
  tmp_path = tmp.name
74
 
 
75
  duration = ffprobe_duration(tmp_path)
76
 
 
 
77
  segments, info = model.transcribe(
78
  tmp_path,
79
  language="en",
80
  beam_size=1,
81
  vad_filter=True,
82
+ vad_parameters=dict(min_silence_duration_ms=600),
83
  )
84
 
85
+ parts = [seg.text.strip() for seg in segments if seg.text and seg.text.strip()]
86
+ text = " ".join(parts)
87
+ wpm = estimate_wpm(text, duration)
 
 
 
 
 
 
88
 
89
  try:
90
  os.unlink(tmp_path)
91
  except Exception:
92
  pass
93
 
94
+ return TranscribeOut(text=text, duration_sec=duration, wpm=wpm)