Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,17 +1,22 @@
|
|
| 1 |
-
import os, time, re, shutil, zipfile, subprocess
|
| 2 |
import gradio as gr
|
| 3 |
from faster_whisper import WhisperModel
|
| 4 |
|
| 5 |
-
#
|
| 6 |
DEVICE = "cuda" if os.path.exists("/dev/nvidia0") else "cpu"
|
| 7 |
COMPUTE = "float16" if DEVICE == "cuda" else "int8"
|
| 8 |
MODEL_ID = os.getenv(
|
| 9 |
"VOXO_MODEL",
|
| 10 |
"Systran/faster-whisper-large-v3" if DEVICE == "cuda" else "Systran/faster-whisper-small"
|
| 11 |
)
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def _ts(t: float) -> str:
|
| 16 |
m, s = divmod(int(t), 60); h, m = divmod(m, 60)
|
| 17 |
return f"{h:02d}:{m:02d}:{s:02d}"
|
|
@@ -30,8 +35,7 @@ def _fmt_bytes(n: int) -> str:
|
|
| 30 |
n /= 1024
|
| 31 |
|
| 32 |
def _safe(name: str) -> str:
|
| 33 |
-
|
| 34 |
-
return re.sub(r"[^A-Za-z0-9._-]+", "_", base)
|
| 35 |
|
| 36 |
def _duration_secs(path: str) -> float:
|
| 37 |
try:
|
|
@@ -44,17 +48,17 @@ def _duration_secs(path: str) -> float:
|
|
| 44 |
except Exception:
|
| 45 |
return 0.0
|
| 46 |
|
| 47 |
-
#
|
| 48 |
def transcribe(audio_path, language="auto", timestamps=True):
|
| 49 |
if not audio_path:
|
| 50 |
return ""
|
| 51 |
lang = None if language == "auto" else language
|
| 52 |
-
segments, _info =
|
| 53 |
audio_path,
|
| 54 |
language=lang,
|
| 55 |
vad_filter=True,
|
| 56 |
vad_parameters=dict(min_silence_duration_ms=500),
|
| 57 |
-
beam_size=1,
|
| 58 |
best_of=1,
|
| 59 |
condition_on_previous_text=False,
|
| 60 |
no_speech_threshold=0.3,
|
|
@@ -63,7 +67,23 @@ def transcribe(audio_path, language="auto", timestamps=True):
|
|
| 63 |
if timestamps else [s.text.strip() for s in segments])
|
| 64 |
return "\n".join(lines)
|
| 65 |
|
| 66 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
def batch_transcribe_stream(file_paths, language="auto", timestamps=True, progress=gr.Progress(track_tqdm=True)):
|
| 68 |
if not file_paths:
|
| 69 |
yield "No files selected.", None
|
|
@@ -78,11 +98,11 @@ def batch_transcribe_stream(file_paths, language="auto", timestamps=True, progre
|
|
| 78 |
|
| 79 |
summary_parts, processed_audio, completed = [], 0.0, 0
|
| 80 |
|
| 81 |
-
def status_md(
|
| 82 |
elapsed = time.time() - start
|
| 83 |
rtf = elapsed / processed_audio if processed_audio > 0 else 0.0
|
| 84 |
-
|
| 85 |
-
eta =
|
| 86 |
header = [
|
| 87 |
"### Batch Progress",
|
| 88 |
f"- Files: **{completed}/{n}**",
|
|
@@ -91,7 +111,7 @@ def batch_transcribe_stream(file_paths, language="auto", timestamps=True, progre
|
|
| 91 |
f"- Est. RTF: **{rtf:.2f}**" if processed_audio else "- Est. RTF: **…**",
|
| 92 |
f"- ETA: **{_fmt_hms(eta)}**" if processed_audio else "- ETA: **…**",
|
| 93 |
]
|
| 94 |
-
if
|
| 95 |
tail = "\n".join(summary_parts[-2:]) if summary_parts else ""
|
| 96 |
return "\n".join(header) + ("\n\n" + tail if tail else "")
|
| 97 |
|
|
@@ -110,11 +130,9 @@ def batch_transcribe_stream(file_paths, language="auto", timestamps=True, progre
|
|
| 110 |
f.write(text)
|
| 111 |
|
| 112 |
wall = time.time() - t0
|
| 113 |
-
|
| 114 |
-
f"#### ✅ {name}\n- Audio: {_fmt_hms(file_dur)} | "
|
| 115 |
-
f"Wall: {_fmt_hms(wall)} | RTF: {(wall/max(1e-6,file_dur)):.2f}\n\n{text}\n"
|
| 116 |
)
|
| 117 |
-
summary_parts.append(per_file)
|
| 118 |
|
| 119 |
processed_audio += file_dur
|
| 120 |
completed += 1
|
|
@@ -122,6 +140,7 @@ def batch_transcribe_stream(file_paths, language="auto", timestamps=True, progre
|
|
| 122 |
|
| 123 |
yield status_md(), None
|
| 124 |
|
|
|
|
| 125 |
combined_path = os.path.join(workdir, "_ALL_TRANSCRIPTS.txt")
|
| 126 |
with open(combined_path, "w", encoding="utf-8") as f:
|
| 127 |
f.write("\n\n".join(summary_parts))
|
|
@@ -134,208 +153,44 @@ def batch_transcribe_stream(file_paths, language="auto", timestamps=True, progre
|
|
| 134 |
|
| 135 |
yield status_md("All done. Download the ZIP for every transcript."), zip_path
|
| 136 |
|
| 137 |
-
#
|
| 138 |
-
UPLOAD_ROOT = "/tmp/voxo_progress_uploads"
|
| 139 |
-
|
| 140 |
-
def adopt_uploaded(json_paths: str, session_id: str):
|
| 141 |
-
"""Take server paths from the custom uploader and prep status + list for batch."""
|
| 142 |
-
try:
|
| 143 |
-
paths = json.loads(json_paths) if json_paths else []
|
| 144 |
-
except Exception:
|
| 145 |
-
paths = []
|
| 146 |
-
safe_paths, total_size, total_audio = [], 0, 0.0
|
| 147 |
-
base = os.path.join(UPLOAD_ROOT, _safe(session_id))
|
| 148 |
-
for p in paths:
|
| 149 |
-
if not p: continue
|
| 150 |
-
p = os.path.abspath(p)
|
| 151 |
-
if not p.startswith(base): # sandbox check
|
| 152 |
-
continue
|
| 153 |
-
if os.path.exists(p):
|
| 154 |
-
safe_paths.append(p)
|
| 155 |
-
total_size += os.path.getsize(p)
|
| 156 |
-
total_audio += _duration_secs(p)
|
| 157 |
-
status = (
|
| 158 |
-
"### Files staged\n" +
|
| 159 |
-
"\n".join([f"- ✅ **{_safe(p)}** — {_fmt_hms(_duration_secs(p))} | {_fmt_bytes(os.path.getsize(p))}" for p in safe_paths]) +
|
| 160 |
-
(f"\n\n**Total:** {len(safe_paths)} files — {_fmt_hms(total_audio)} — {_fmt_bytes(total_size)}" if safe_paths else "\n\nNo valid files.")
|
| 161 |
-
)
|
| 162 |
-
return status, safe_paths, gr.update(interactive=bool(safe_paths))
|
| 163 |
-
|
| 164 |
-
# ===== UI =====
|
| 165 |
with gr.Blocks(title="Voxo – Audio to Text") as demo:
|
| 166 |
gr.Markdown("# 🎧 Voxo\nDrop audio, get text. GPU = fast, CPU = free.")
|
| 167 |
|
| 168 |
with gr.Tabs():
|
| 169 |
-
#
|
| 170 |
with gr.Tab("Single file"):
|
| 171 |
with gr.Row():
|
| 172 |
audio = gr.Audio(sources=["upload","microphone"], type="filepath", label="Audio (mp3/wav)")
|
| 173 |
-
lang
|
| 174 |
-
["auto","en","es","fr","de","it","pt","ja","ko","zh"],
|
| 175 |
-
value="auto", label="Language"
|
| 176 |
-
)
|
| 177 |
ts = gr.Checkbox(value=True, label="Show timestamps")
|
| 178 |
btn = gr.Button("Transcribe", variant="primary")
|
| 179 |
out = gr.Textbox(lines=20, label="Transcript", show_copy_button=True)
|
| 180 |
btn.click(transcribe, inputs=[audio, lang, ts], outputs=out, concurrency_limit=1)
|
| 181 |
|
| 182 |
-
#
|
| 183 |
with gr.Tab("Batch"):
|
| 184 |
-
gr.
|
| 185 |
-
|
| 186 |
-
# Hidden wiring to carry results from JS → Python
|
| 187 |
-
uploaded_json = gr.Textbox(visible=False)
|
| 188 |
-
session_box = gr.Textbox(visible=False)
|
| 189 |
-
staged_files = gr.State([]) # python list[str] of server paths
|
| 190 |
-
|
| 191 |
-
# Custom HTML + JS uploader with true progress bars
|
| 192 |
-
uploader = gr.HTML("""
|
| 193 |
-
<div id="vx_uploader_wrap" style="border:1px dashed #7c3aed;padding:14px;border-radius:12px">
|
| 194 |
-
<input id="vx_input" type="file" multiple accept="audio/*" style="margin-bottom:8px"/>
|
| 195 |
-
<div id="vx_hint" style="font-size:12px;opacity:.7;margin-bottom:8px">Select multiple audio files. Upload starts immediately.</div>
|
| 196 |
-
<div id="vx_progress_list" style="display:flex;flex-direction:column;gap:6px"></div>
|
| 197 |
-
<div id="vx_totals" style="margin-top:8px;font-size:12px;opacity:.8"></div>
|
| 198 |
-
</div>
|
| 199 |
-
<script>
|
| 200 |
-
(function(){
|
| 201 |
-
const uploadUrl = "/voxo-upload";
|
| 202 |
-
const input = document.getElementById("vx_input");
|
| 203 |
-
const list = document.getElementById("vx_progress_list");
|
| 204 |
-
const totals = document.getElementById("vx_totals");
|
| 205 |
-
window.voxoUploadedPaths = [];
|
| 206 |
-
window.voxoSession = String(Date.now());
|
| 207 |
-
|
| 208 |
-
function fmtBytes(n){const u=["B","KB","MB","GB","TB"];let i=0;while(n>=1024&&i<u.length-1){n/=1024;i++;}return (i?n.toFixed(1):n)+" "+u[i];}
|
| 209 |
-
function fmtHMS(sec){sec=Math.max(0,sec|0);let m=sec/60|0,s=sec%60,h=m/60|0;m%=60;return h?`${h}h ${String(m).padStart(2,'0')}m ${String(s).padStart(2,'0')}s`:m?`${m}m ${String(s).padStart(2,'0')}s`:`${s}s`;}
|
| 210 |
-
|
| 211 |
-
function makeRow(name,size){
|
| 212 |
-
const row=document.createElement("div");
|
| 213 |
-
row.style="display:flex;align-items:center;gap:8px;white-space:nowrap";
|
| 214 |
-
row.innerHTML = `
|
| 215 |
-
<span style="flex:1;overflow:hidden;text-overflow:ellipsis">${name}</span>
|
| 216 |
-
<span style="width:78px;text-align:right;font-size:12px;opacity:.7">${fmtBytes(size)}</span>
|
| 217 |
-
<progress value="0" max="100" style="flex:0 0 160px;height:10px"></progress>
|
| 218 |
-
<span class="pct" style="width:40px;text-align:right;font-size:12px">0%</span>
|
| 219 |
-
<span class="spd" style="width:90px;text-align:right;font-size:12px;opacity:.7"></span>
|
| 220 |
-
`;
|
| 221 |
-
return row;
|
| 222 |
-
}
|
| 223 |
-
|
| 224 |
-
function postOne(file){
|
| 225 |
-
return new Promise((resolve)=>{
|
| 226 |
-
const row = makeRow(file.name, file.size);
|
| 227 |
-
const bar = row.querySelector("progress");
|
| 228 |
-
const pct = row.querySelector(".pct");
|
| 229 |
-
const spd = row.querySelector(".spd");
|
| 230 |
-
list.appendChild(row);
|
| 231 |
-
|
| 232 |
-
const xhr = new XMLHttpRequest();
|
| 233 |
-
xhr.open("POST", uploadUrl);
|
| 234 |
-
const t0 = performance.now();
|
| 235 |
-
|
| 236 |
-
xhr.upload.onprogress = (e)=>{
|
| 237 |
-
if(e.lengthComputable){
|
| 238 |
-
const p = Math.round(100*e.loaded/e.total);
|
| 239 |
-
bar.value = p; pct.textContent = p+"%";
|
| 240 |
-
const sec = (performance.now()-t0)/1000;
|
| 241 |
-
const rate = e.loaded / Math.max(1e-6, sec); // B/s
|
| 242 |
-
const remain = (e.total - e.loaded) / Math.max(1e-6, rate); // s
|
| 243 |
-
spd.textContent = (rate<1024?`${rate|0} B/s` : rate<1024*1024?`${(rate/1024).toFixed(1)} KB/s` : `${(rate/1024/1024).toFixed(1)} MB/s`) + " · " + fmtHMS(remain);
|
| 244 |
-
}
|
| 245 |
-
};
|
| 246 |
-
xhr.onload = ()=>{
|
| 247 |
-
if(xhr.status===200){
|
| 248 |
-
try{
|
| 249 |
-
const resp = JSON.parse(xhr.responseText);
|
| 250 |
-
window.voxoUploadedPaths.push(resp.path);
|
| 251 |
-
bar.value = 100; pct.textContent = "✓"; spd.textContent = "";
|
| 252 |
-
}catch(e){ pct.textContent = "err"; }
|
| 253 |
-
}else{ pct.textContent = "err"; }
|
| 254 |
-
const done = list.querySelectorAll("progress[value='100']").length;
|
| 255 |
-
totals.textContent = done + " / " + list.children.length + " uploaded";
|
| 256 |
-
resolve();
|
| 257 |
-
};
|
| 258 |
-
const form = new FormData();
|
| 259 |
-
form.append("session", window.voxoSession);
|
| 260 |
-
form.append("file", file, file.name);
|
| 261 |
-
xhr.send(form);
|
| 262 |
-
});
|
| 263 |
-
}
|
| 264 |
-
|
| 265 |
-
input.addEventListener("change", async ()=>{
|
| 266 |
-
list.innerHTML = ""; totals.textContent = "";
|
| 267 |
-
window.voxoUploadedPaths = [];
|
| 268 |
-
const files = Array.from(input.files||[]);
|
| 269 |
-
for (const f of files){ await postOne(f); } // sequential for reliability
|
| 270 |
-
});
|
| 271 |
-
})();
|
| 272 |
-
</script>
|
| 273 |
-
""")
|
| 274 |
-
|
| 275 |
-
# Bridge: JS -> Python
|
| 276 |
-
adopt_btn = gr.Button("Use uploaded files", variant="primary")
|
| 277 |
-
uploaded_json = gr.Textbox(visible=False)
|
| 278 |
-
session_box = gr.Textbox(visible=False)
|
| 279 |
-
adopt_btn.click(
|
| 280 |
-
fn=None,
|
| 281 |
-
inputs=[],
|
| 282 |
-
outputs=[uploaded_json, session_box],
|
| 283 |
-
js="() => [JSON.stringify(window.voxoUploadedPaths||[]), window.voxoSession||'default']"
|
| 284 |
-
)
|
| 285 |
-
|
| 286 |
-
# Stage for batch
|
| 287 |
-
upload_summary = gr.Markdown("No uploads yet.")
|
| 288 |
-
staged_files = gr.State([])
|
| 289 |
-
stage_btn = gr.Button("Confirm & Stage", variant="secondary", interactive=True)
|
| 290 |
-
stage_btn.click(
|
| 291 |
-
adopt_uploaded,
|
| 292 |
-
inputs=[uploaded_json, session_box],
|
| 293 |
-
outputs=[upload_summary, staged_files, stage_btn],
|
| 294 |
-
concurrency_limit=1
|
| 295 |
-
)
|
| 296 |
-
|
| 297 |
with gr.Row():
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
)
|
| 302 |
-
ts3 = gr.Checkbox(value=True, label="Show timestamps")
|
| 303 |
-
run_batch = gr.Button("Run Batch", variant="primary", interactive=False)
|
| 304 |
batch_out = gr.Markdown("Ready.")
|
| 305 |
-
zip_out
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
batch_transcribe_stream,
|
| 308 |
-
inputs=[
|
| 309 |
outputs=[batch_out, zip_out],
|
| 310 |
concurrency_limit=1
|
| 311 |
)
|
| 312 |
|
| 313 |
-
gr.Markdown(
|
| 314 |
-
f"**Engine**: `{MODEL_ID}` on `{DEVICE}` ({COMPUTE}). "
|
| 315 |
-
"Tip: Use an L4 GPU for large-v3 fast runs; switch back to CPU Basic to save dollars."
|
| 316 |
-
)
|
| 317 |
-
|
| 318 |
-
# Queue for Gradio events (uploads handled by FastAPI below)
|
| 319 |
-
demo.queue(default_concurrency_limit=1)
|
| 320 |
-
|
| 321 |
-
# ===== FastAPI: real upload endpoint =====
|
| 322 |
-
from fastapi import FastAPI, UploadFile, File as _FAFile, Form
|
| 323 |
-
from fastapi.responses import JSONResponse
|
| 324 |
-
|
| 325 |
-
api = FastAPI()
|
| 326 |
-
|
| 327 |
-
@api.post("/voxo-upload")
|
| 328 |
-
async def voxo_upload(file: UploadFile = _FAFile(...), session: str = Form("default")):
|
| 329 |
-
session_dir = os.path.join(UPLOAD_ROOT, _safe(session))
|
| 330 |
-
os.makedirs(session_dir, exist_ok=True)
|
| 331 |
-
dest = os.path.join(session_dir, _safe(file.filename))
|
| 332 |
-
with open(dest, "wb") as out:
|
| 333 |
-
while True:
|
| 334 |
-
chunk = await file.read(1024 * 1024) # 1MB chunks
|
| 335 |
-
if not chunk:
|
| 336 |
-
break
|
| 337 |
-
out.write(chunk)
|
| 338 |
-
return JSONResponse({"path": dest, "name": os.path.basename(dest)})
|
| 339 |
|
| 340 |
-
#
|
| 341 |
-
|
|
|
|
| 1 |
+
import os, time, re, shutil, zipfile, subprocess
|
| 2 |
import gradio as gr
|
| 3 |
from faster_whisper import WhisperModel
|
| 4 |
|
| 5 |
+
# ---------- Device & Model (lazy load so startup is instant) ----------
|
| 6 |
DEVICE = "cuda" if os.path.exists("/dev/nvidia0") else "cpu"
|
| 7 |
COMPUTE = "float16" if DEVICE == "cuda" else "int8"
|
| 8 |
MODEL_ID = os.getenv(
|
| 9 |
"VOXO_MODEL",
|
| 10 |
"Systran/faster-whisper-large-v3" if DEVICE == "cuda" else "Systran/faster-whisper-small"
|
| 11 |
)
|
| 12 |
+
_model = None
|
| 13 |
+
def get_model():
|
| 14 |
+
global _model
|
| 15 |
+
if _model is None:
|
| 16 |
+
_model = WhisperModel(MODEL_ID, device=DEVICE, compute_type=COMPUTE)
|
| 17 |
+
return _model
|
| 18 |
+
|
| 19 |
+
# ---------- Helpers ----------
|
| 20 |
def _ts(t: float) -> str:
|
| 21 |
m, s = divmod(int(t), 60); h, m = divmod(m, 60)
|
| 22 |
return f"{h:02d}:{m:02d}:{s:02d}"
|
|
|
|
| 35 |
n /= 1024
|
| 36 |
|
| 37 |
def _safe(name: str) -> str:
|
| 38 |
+
return re.sub(r"[^A-Za-z0-9._-]+", "_", os.path.basename(name))
|
|
|
|
| 39 |
|
| 40 |
def _duration_secs(path: str) -> float:
|
| 41 |
try:
|
|
|
|
| 48 |
except Exception:
|
| 49 |
return 0.0
|
| 50 |
|
| 51 |
+
# ---------- Core Transcribe ----------
|
| 52 |
def transcribe(audio_path, language="auto", timestamps=True):
|
| 53 |
if not audio_path:
|
| 54 |
return ""
|
| 55 |
lang = None if language == "auto" else language
|
| 56 |
+
segments, _info = get_model().transcribe(
|
| 57 |
audio_path,
|
| 58 |
language=lang,
|
| 59 |
vad_filter=True,
|
| 60 |
vad_parameters=dict(min_silence_duration_ms=500),
|
| 61 |
+
beam_size=1, # fast; bump to 3–5 for more accuracy
|
| 62 |
best_of=1,
|
| 63 |
condition_on_previous_text=False,
|
| 64 |
no_speech_threshold=0.3,
|
|
|
|
| 67 |
if timestamps else [s.text.strip() for s in segments])
|
| 68 |
return "\n".join(lines)
|
| 69 |
|
| 70 |
+
# ---------- Batch with live ETA (streams updates) ----------
|
| 71 |
+
def files_added_status(file_paths, progress=gr.Progress(track_tqdm=True)):
|
| 72 |
+
if not file_paths:
|
| 73 |
+
return "No files yet. Add some audio to get started.", gr.update(interactive=False)
|
| 74 |
+
total_size, total_audio = 0, 0.0
|
| 75 |
+
lines = ["### Files added"]
|
| 76 |
+
for i, p in enumerate(file_paths, 1):
|
| 77 |
+
name = _safe(p)
|
| 78 |
+
progress(i/len(file_paths), desc=f"Scanning {name}")
|
| 79 |
+
size = os.path.getsize(p) if os.path.exists(p) else 0
|
| 80 |
+
dur = _duration_secs(p)
|
| 81 |
+
total_size += size
|
| 82 |
+
total_audio += dur
|
| 83 |
+
lines.append(f"- ✅ **{name}** — {(_fmt_hms(dur) if dur else '…')} | {_fmt_bytes(size)}")
|
| 84 |
+
lines += ["", f"**Total:** {len(file_paths)} files — {_fmt_hms(total_audio)} audio — {_fmt_bytes(total_size)}", "Ready to run the batch."]
|
| 85 |
+
return "\n".join(lines), gr.update(interactive=True)
|
| 86 |
+
|
| 87 |
def batch_transcribe_stream(file_paths, language="auto", timestamps=True, progress=gr.Progress(track_tqdm=True)):
|
| 88 |
if not file_paths:
|
| 89 |
yield "No files selected.", None
|
|
|
|
| 98 |
|
| 99 |
summary_parts, processed_audio, completed = [], 0.0, 0
|
| 100 |
|
| 101 |
+
def status_md(note: str = "") -> str:
|
| 102 |
elapsed = time.time() - start
|
| 103 |
rtf = elapsed / processed_audio if processed_audio > 0 else 0.0
|
| 104 |
+
remaining = max(0.0, total_audio - processed_audio)
|
| 105 |
+
eta = remaining * rtf if processed_audio > 0 else 0.0
|
| 106 |
header = [
|
| 107 |
"### Batch Progress",
|
| 108 |
f"- Files: **{completed}/{n}**",
|
|
|
|
| 111 |
f"- Est. RTF: **{rtf:.2f}**" if processed_audio else "- Est. RTF: **…**",
|
| 112 |
f"- ETA: **{_fmt_hms(eta)}**" if processed_audio else "- ETA: **…**",
|
| 113 |
]
|
| 114 |
+
if note: header.append(f"\n{note}")
|
| 115 |
tail = "\n".join(summary_parts[-2:]) if summary_parts else ""
|
| 116 |
return "\n".join(header) + ("\n\n" + tail if tail else "")
|
| 117 |
|
|
|
|
| 130 |
f.write(text)
|
| 131 |
|
| 132 |
wall = time.time() - t0
|
| 133 |
+
summary_parts.append(
|
| 134 |
+
f"#### ✅ {name}\n- Audio: {_fmt_hms(file_dur)} | Wall: {_fmt_hms(wall)} | RTF: {(wall/max(1e-6,file_dur)):.2f}\n\n{text}\n"
|
|
|
|
| 135 |
)
|
|
|
|
| 136 |
|
| 137 |
processed_audio += file_dur
|
| 138 |
completed += 1
|
|
|
|
| 140 |
|
| 141 |
yield status_md(), None
|
| 142 |
|
| 143 |
+
# combined + zip
|
| 144 |
combined_path = os.path.join(workdir, "_ALL_TRANSCRIPTS.txt")
|
| 145 |
with open(combined_path, "w", encoding="utf-8") as f:
|
| 146 |
f.write("\n\n".join(summary_parts))
|
|
|
|
| 153 |
|
| 154 |
yield status_md("All done. Download the ZIP for every transcript."), zip_path
|
| 155 |
|
| 156 |
+
# ---------- UI ----------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
with gr.Blocks(title="Voxo – Audio to Text") as demo:
|
| 158 |
gr.Markdown("# 🎧 Voxo\nDrop audio, get text. GPU = fast, CPU = free.")
|
| 159 |
|
| 160 |
with gr.Tabs():
|
| 161 |
+
# Single
|
| 162 |
with gr.Tab("Single file"):
|
| 163 |
with gr.Row():
|
| 164 |
audio = gr.Audio(sources=["upload","microphone"], type="filepath", label="Audio (mp3/wav)")
|
| 165 |
+
lang = gr.Dropdown(["auto","en","es","fr","de","it","pt","ja","ko","zh"], value="auto", label="Language")
|
|
|
|
|
|
|
|
|
|
| 166 |
ts = gr.Checkbox(value=True, label="Show timestamps")
|
| 167 |
btn = gr.Button("Transcribe", variant="primary")
|
| 168 |
out = gr.Textbox(lines=20, label="Transcript", show_copy_button=True)
|
| 169 |
btn.click(transcribe, inputs=[audio, lang, ts], outputs=out, concurrency_limit=1)
|
| 170 |
|
| 171 |
+
# Batch (simple uploader + live ETA)
|
| 172 |
with gr.Tab("Batch"):
|
| 173 |
+
files = gr.File(file_count="multiple", type="filepath", file_types=["audio"], label="Select multiple audio files")
|
| 174 |
+
upload_status = gr.Markdown("No files yet. Add some audio.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
with gr.Row():
|
| 176 |
+
lang2 = gr.Dropdown(["auto","en","es","fr","de","it","pt","ja","ko","zh"], value="auto", label="Language")
|
| 177 |
+
ts2 = gr.Checkbox(value=True, label="Show timestamps")
|
| 178 |
+
batch_btn = gr.Button("Run Batch", variant="primary", interactive=False)
|
|
|
|
|
|
|
|
|
|
| 179 |
batch_out = gr.Markdown("Ready.")
|
| 180 |
+
zip_out = gr.File(label="Download transcripts (ZIP)")
|
| 181 |
+
|
| 182 |
+
# Enable the Run button after files are added + show a summary
|
| 183 |
+
files.change(files_added_status, inputs=[files], outputs=[upload_status, batch_btn])
|
| 184 |
+
|
| 185 |
+
# Stream progress + final ZIP
|
| 186 |
+
batch_btn.click(
|
| 187 |
batch_transcribe_stream,
|
| 188 |
+
inputs=[files, lang2, ts2],
|
| 189 |
outputs=[batch_out, zip_out],
|
| 190 |
concurrency_limit=1
|
| 191 |
)
|
| 192 |
|
| 193 |
+
gr.Markdown(f"**Engine**: `{MODEL_ID}` on `{DEVICE}` ({COMPUTE}). Tip: Use an L4 GPU for large-v3 fast runs; switch back to CPU Basic to save dollars.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
+
# Start Gradio server (Spaces-friendly)
|
| 196 |
+
demo.queue(default_concurrency_limit=1).launch()
|