Quartz4065 commited on
Commit
033e455
·
verified ·
1 Parent(s): a847e8e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -78
app.py CHANGED
@@ -1,104 +1,125 @@
1
  import os
 
 
 
 
 
2
  import subprocess
3
- from typing import Optional
4
 
5
- from fastapi import FastAPI, File, UploadFile
6
- from fastapi.middleware.cors import CORSMiddleware
7
- from fastapi.responses import JSONResponse
8
- from pydantic import BaseModel
 
 
9
 
10
- # ---- CACHES: point to writable dirs, ensure perms ----
11
- os.environ.setdefault("HF_HOME", "/data/hf")
12
- os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/data/hf")
13
- os.environ.setdefault("XDG_CACHE_HOME", "/data/.cache")
14
- os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
15
- os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
16
 
17
- for p in ("/data", "/data/hf", "/data/.cache", "/tmp"):
18
- try:
19
- os.makedirs(p, exist_ok=True)
20
- os.chmod(p, 0o777)
21
- except Exception:
22
- pass
23
 
24
- # ---- Transcriber (CPU) ----
25
- from faster_whisper import WhisperModel
26
- MODEL_NAME = os.environ.get("WHISPER_MODEL", "tiny.en")
27
 
28
- app = FastAPI(title="Nuvia Free Transcriber", version="1.3.0")
29
- app.add_middleware(
30
- CORSMiddleware,
31
- allow_origins=["*"], allow_credentials=True,
32
- allow_methods=["*"], allow_headers=["*"],
33
- )
 
 
 
 
 
34
 
35
- # IMPORTANT:
36
- # local_files_only=True prevents any runtime writes/downloads
37
  model = WhisperModel(
38
  MODEL_NAME,
39
  device="cpu",
40
- compute_type="int8",
41
- download_root="/data/hf",
42
- local_files_only=True,
43
  )
44
 
45
- class HealthOut(BaseModel):
46
- ok: bool
47
-
48
- class TranscribeOut(BaseModel):
49
- text: str
50
- duration_sec: Optional[float] = None
51
- wpm: Optional[float] = None
52
-
53
- def ffprobe_duration(path: str) -> Optional[float]:
54
  try:
55
  out = subprocess.check_output(
56
- ["ffprobe", "-v", "error", "-show_entries", "format=duration",
57
- "-of", "default=noprint_wrappers=1:nokey=1", path]
 
 
 
 
 
 
58
  )
59
- return float(out.decode("utf-8").strip())
60
  except Exception:
61
  return None
62
 
63
- def word_count(text: str) -> int:
64
- return len([w for w in text.split() if w.strip()])
65
-
66
- @app.get("/", response_model=HealthOut)
67
- def root():
68
- return HealthOut(ok=True)
 
 
69
 
70
- @app.get("/health", response_model=HealthOut)
71
- def health():
72
- return HealthOut(ok=True)
 
 
73
 
 
74
  @app.post("/transcribe", response_model=TranscribeOut)
75
  async def transcribe(file: UploadFile = File(...)):
76
- # Save upload
77
- ext = os.path.splitext(file.filename or "")[1].lower() or ".bin"
78
- tmp_in = f"/tmp/in{ext}"
79
- raw = await file.read()
80
- with open(tmp_in, "wb") as f:
81
- f.write(raw)
82
-
83
- # Convert to mono 16k WAV
84
- tmp_wav = "/tmp/in.wav"
85
- try:
86
- subprocess.check_call(
87
- ["ffmpeg", "-y", "-i", tmp_in, "-ar", "16000", "-ac", "1", tmp_wav],
88
- stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
89
- )
90
- except subprocess.CalledProcessError:
91
- return JSONResponse(status_code=400, content={"error": "ffmpeg failed to decode audio"})
92
-
93
- duration = ffprobe_duration(tmp_wav)
94
 
95
- # Transcribe (no downloads, model already cached)
96
- segments, _ = model.transcribe(tmp_wav, language="en")
97
- text = "".join(seg.text for seg in segments).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
- wpm = None
100
- if duration and duration > 0:
101
- wc = word_count(text)
102
- wpm = round((wc / (duration / 60.0)), 1)
103
 
104
- return TranscribeOut(text=text, duration_sec=duration, wpm=wpm)
 
1
  import os
2
+ from fastapi import FastAPI, UploadFile, File, HTTPException
3
+ from fastapi.responses import JSONResponse, PlainTextResponse
4
+ from pydantic import BaseModel
5
+ from faster_whisper import WhisperModel
6
+ import tempfile
7
  import subprocess
8
+ import math
9
 
10
+ # ---------- Writable caches (prevents PermissionError on /.cache) ----------
11
+ CACHE_ROOT = os.environ.get("HF_HOME", "/data/hf")
12
+ os.environ["HF_HOME"] = CACHE_ROOT
13
+ os.environ["HUGGINGFACE_HUB_CACHE"] = CACHE_ROOT
14
+ os.environ["TRANSFORMERS_CACHE"] = CACHE_ROOT
15
+ os.makedirs(CACHE_ROOT, exist_ok=True)
16
 
17
+ # ---------- App ----------
18
+ app = FastAPI(title="Nuvia Free Transcriber", version="1.4.0")
 
 
 
 
19
 
20
+ # Root route (avoid 404 at "/")
21
+ @app.get("/", response_class=PlainTextResponse)
22
+ def root():
23
+ return "Nuvia Free Transcriber · try POST /transcribe or GET /health"
 
 
24
 
25
+ # Health route used by your GPT Action
26
+ class HealthOut(BaseModel):
27
+ ok: bool
28
 
29
+ @app.get("/health", response_model=HealthOut)
30
+ def health():
31
+ return {"ok": True}
32
+
33
+ # ---------- Load model (tiny.en = fastest on CPU) ----------
34
+ # You can switch to "base.en" if you want a bit more accuracy (slower).
35
+ MODEL_NAME = os.environ.get("WHISPER_REPO", "Systran/faster-whisper-tiny.en")
36
+ COMPUTE_TYPE = os.environ.get("WHISPER_COMPUTE", "int8")
37
+
38
+ # Ensure the cache dir exists and is writable before model download
39
+ os.makedirs(CACHE_ROOT, exist_ok=True)
40
 
 
 
41
  model = WhisperModel(
42
  MODEL_NAME,
43
  device="cpu",
44
+ compute_type=COMPUTE_TYPE,
45
+ download_root=CACHE_ROOT, # <— keeps models inside /data/hf
 
46
  )
47
 
48
+ # ---------- Helpers ----------
49
+ def ffprobe_duration(path: str) -> float | None:
50
+ """Return duration in seconds using ffprobe, or None on failure."""
 
 
 
 
 
 
51
  try:
52
  out = subprocess.check_output(
53
+ [
54
+ "ffprobe",
55
+ "-v", "error",
56
+ "-show_entries", "format=duration",
57
+ "-of", "default=noprint_wrappers=1:nokey=1",
58
+ path,
59
+ ],
60
+ stderr=subprocess.STDOUT,
61
  )
62
+ return float(out.decode().strip())
63
  except Exception:
64
  return None
65
 
66
+ def estimate_wpm(text: str, duration_sec: float | None) -> float | None:
67
+ if not text or not duration_sec or duration_sec <= 0:
68
+ return None
69
+ words = len(text.strip().split())
70
+ minutes = duration_sec / 60.0
71
+ if minutes <= 0:
72
+ return None
73
+ return words / minutes
74
 
75
+ # ---------- Schemas ----------
76
+ class TranscribeOut(BaseModel):
77
+ text: str
78
+ duration_sec: float | None = None
79
+ wpm: float | None = None
80
 
81
+ # ---------- API ----------
82
  @app.post("/transcribe", response_model=TranscribeOut)
83
  async def transcribe(file: UploadFile = File(...)):
84
+ if not file.filename:
85
+ raise HTTPException(400, "Missing file name")
86
+ suffix = os.path.splitext(file.filename)[1].lower()
87
+ if suffix not in [".mp3", ".m4a", ".wav", ".aac", ".flac"]:
88
+ # allow anyway; faster-whisper handles most formats via ffmpeg
89
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
+ # Save upload to a temp file
92
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
93
+ content = await file.read()
94
+ tmp.write(content)
95
+ tmp_path = tmp.name
96
+
97
+ # Duration via ffprobe (best effort)
98
+ duration = ffprobe_duration(tmp_path)
99
+
100
+ # Transcribe
101
+ # NOTE: beam_size=1 and vad_filter=True for speed/legibility on CPU Spaces
102
+ segments, info = model.transcribe(
103
+ tmp_path,
104
+ language="en",
105
+ beam_size=1,
106
+ vad_filter=True,
107
+ vad_parameters=dict(min_silence_duration_ms=600)
108
+ )
109
+
110
+ # Concatenate text
111
+ parts = []
112
+ for seg in segments:
113
+ # You can keep timestamps if you want: f"[{seg.start:.2f}-{seg.end:.2f}] {seg.text}"
114
+ parts.append(seg.text.strip())
115
+ full_text = " ".join([p for p in parts if p])
116
+
117
+ # Compute WPM if possible
118
+ wpm = estimate_wpm(full_text, duration)
119
 
120
+ try:
121
+ os.unlink(tmp_path)
122
+ except Exception:
123
+ pass
124
 
125
+ return TranscribeOut(text=full_text, duration_sec=duration, wpm=wpm)