testingfaces commited on
Commit
a229c78
Β·
verified Β·
1 Parent(s): 372a9c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +251 -94
app.py CHANGED
@@ -1,9 +1,17 @@
1
  """
2
  ClearWave AI β€” HuggingFace Spaces
3
- Gradio UI + FastAPI routes for /api/health and /api/process-url
 
 
 
 
 
 
 
4
  """
5
 
6
  import os
 
7
  import json
8
  import base64
9
  import tempfile
@@ -38,39 +46,75 @@ LANGUAGES_DISPLAY = {
38
  OUT_LANGS = {k: v for k, v in LANGUAGES_DISPLAY.items() if k != "Auto Detect"}
39
 
40
  # ══════════════════════════════════════════════════════════════════════
41
- # AUDIO FORMAT CONVERTER β€” supports .mpeg, .mp4, .m4a etc.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # ══════════════════════════════════════════════════════════════════════
43
  def convert_to_wav(audio_path: str) -> str:
44
- """
45
- Convert any audio format (including .mpeg, .mp4, .m4a) to .wav
46
- so the pipeline can process it reliably.
47
- Returns path to converted .wav file (or original if already .wav).
48
- """
49
  if audio_path is None:
50
  return audio_path
51
  ext = os.path.splitext(audio_path)[1].lower()
52
- # Already a safe format β€” no conversion needed
53
  if ext in [".wav", ".mp3", ".flac", ".ogg", ".aac"]:
54
  return audio_path
55
- # Convert .mpeg / .mp4 / .m4a / .wma / .amr etc. β†’ .wav
56
  try:
57
  converted = audio_path + "_converted.wav"
58
  result = subprocess.run([
59
  "ffmpeg", "-y", "-i", audio_path,
60
- "-ar", "16000",
61
- "-ac", "1",
62
- "-acodec", "pcm_s16le",
63
- converted
64
  ], capture_output=True)
65
  if result.returncode == 0 and os.path.exists(converted):
66
- logger.info(f"Converted {ext} β†’ .wav successfully")
67
  return converted
68
- else:
69
- logger.warning(f"Conversion failed: {result.stderr.decode()}")
70
- return audio_path
71
  except Exception as e:
72
  logger.warning(f"Conversion error: {e}")
73
- return audio_path
74
 
75
 
76
  # ══════════════════════════════════════════════════════════════════════
@@ -78,10 +122,17 @@ def convert_to_wav(audio_path: str) -> str:
78
  # ══════════════════════════════════════════════════════════════════════
79
  def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
80
  opt_fillers=True, opt_stutters=True, opt_silences=True,
81
- opt_breaths=True, opt_mouth=True):
 
 
 
 
 
 
 
82
  out_dir = tempfile.mkdtemp()
83
  try:
84
- yield {"status": "processing", "step": 1, "message": "⏳ Step 1/5 β€” Denoising..."}
85
  denoise1 = denoiser.process(
86
  audio_path, out_dir,
87
  remove_fillers=False, remove_stutters=False,
@@ -91,12 +142,12 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
91
  clean1 = denoise1['audio_path']
92
  stats = denoise1['stats']
93
 
94
- yield {"status": "processing", "step": 2, "message": "⏳ Step 2/5 β€” Transcribing..."}
95
  transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
96
  word_segs = transcriber._last_segments
97
 
98
  if (opt_fillers or opt_stutters) and word_segs:
99
- yield {"status": "processing", "step": 3, "message": "⏳ Step 3/5 β€” Removing fillers & stutters..."}
100
  import soundfile as sf
101
  audio_data, sr = sf.read(clean1)
102
  if audio_data.ndim == 2:
@@ -117,24 +168,24 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
117
  translation = transcript
118
  tl_method = "same language"
119
  if tgt_lang != "auto" and detected_lang != tgt_lang:
120
- yield {"status": "processing", "step": 4, "message": "⏳ Step 4/5 β€” Translating..."}
121
  translation, tl_method = translator.translate(transcript, detected_lang, tgt_lang)
122
 
123
- yield {"status": "processing", "step": 5, "message": "⏳ Step 5/5 β€” Summarizing..."}
124
  summary = translator.summarize(transcript)
125
 
126
  with open(clean1, "rb") as f:
127
  enhanced_b64 = base64.b64encode(f.read()).decode("utf-8")
128
 
129
- yield {
130
  "status": "done",
131
  "step": 5,
132
  "message": "βœ… Done!",
133
  "transcript": transcript,
134
  "translation": translation,
135
  "summary": summary,
136
- "enhancedAudio": enhanced_b64,
137
  "audioPath": clean1,
 
138
  "stats": {
139
  "language": detected_lang.upper(),
140
  "noise_method": stats.get("noise_method", "noisereduce"),
@@ -150,9 +201,40 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
150
  "transcript_words": len(transcript.split()),
151
  },
152
  }
 
 
 
 
 
 
153
  except Exception as e:
154
  logger.error(f"Pipeline failed: {e}", exc_info=True)
155
- yield {"status": "error", "message": f"❌ Error: {str(e)}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
 
158
  # ══════════════════════════════════════════════════════════════════════
@@ -165,22 +247,40 @@ def process_audio_gradio(audio_path, in_lang_name, out_lang_name,
165
  yield ("❌ Please upload an audio file.", "", "", None, "", "")
166
  return
167
 
168
- # gr.File returns a dict with 'name' or 'path' key
169
  if isinstance(audio_path, dict):
170
  audio_path = audio_path.get("name") or audio_path.get("path", "")
171
 
172
- # βœ… Auto-convert .mpeg / .mp4 / .m4a and any unsupported format β†’ .wav
173
  audio_path = convert_to_wav(audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- src_lang = LANGUAGES_DISPLAY.get(in_lang_name, "auto")
176
- tgt_lang = LANGUAGES_DISPLAY.get(out_lang_name, "te")
177
- for result in run_pipeline(audio_path, src_lang, tgt_lang,
178
- opt_fillers, opt_stutters, opt_silences,
179
- opt_breaths, opt_mouth):
180
- if result["status"] == "processing":
181
- yield (result["message"], "", "", None, "", "")
182
- elif result["status"] == "done":
183
- s = result.get("stats", {})
184
  stats_str = "\n".join([
185
  f"πŸŽ™οΈ Language : {s.get('language','?')}",
186
  f"πŸ”Š Noise method : {s.get('noise_method','?')}",
@@ -191,15 +291,21 @@ def process_audio_gradio(audio_path, in_lang_name, out_lang_name,
191
  f"🌐 Translation : {s.get('translation_method','?')}",
192
  f"⏱️ Total time : {s.get('processing_sec', 0):.1f}s",
193
  ])
194
- yield (result["message"], result.get("transcript",""),
195
- result.get("translation",""), result.get("audioPath"),
196
- stats_str, result.get("summary",""))
197
- elif result["status"] == "error":
198
- yield (result["message"], "", "", None, "Failed.", "")
 
 
 
 
 
 
199
 
200
 
201
  with gr.Blocks(title="ClearWave AI") as demo:
202
- gr.Markdown("# 🎡 ClearWave AI\n### Professional Audio Enhancement")
203
  with gr.Row():
204
  with gr.Column(scale=1):
205
  audio_in = gr.File(
@@ -251,22 +357,33 @@ with gr.Blocks(title="ClearWave AI") as demo:
251
 
252
 
253
  # ══════════════════════════════════════════════════════════════════════
254
- # API ROUTES β€” registered directly on demo.app (Gradio's FastAPI)
255
  # ══════════════════════════════════════════════════════════════════════
256
  import json as _json
257
  from fastapi import Request as _Request
258
- from fastapi.responses import StreamingResponse as _StreamingResponse, JSONResponse as _JSONResponse
 
259
 
260
  @demo.app.get("/api/health")
261
  async def api_health():
262
- return _JSONResponse({"status": "ok", "service": "ClearWave AI on HuggingFace"})
 
 
 
 
 
263
 
264
  @demo.app.post("/api/process-url")
265
  async def api_process_url(request: _Request):
266
- data = await request.json()
267
- # Handle both plain JSON and Gradio-wrapped {"data": {...}}
 
 
 
 
268
  if "data" in data and isinstance(data["data"], dict):
269
  data = data["data"]
 
270
  audio_url = data.get("audioUrl")
271
  audio_id = data.get("audioId", "")
272
  src_lang = data.get("srcLang", "auto")
@@ -280,29 +397,25 @@ async def api_process_url(request: _Request):
280
  if not audio_url:
281
  return _JSONResponse({"error": "audioUrl is required"}, status_code=400)
282
 
283
- async def generate():
284
- import sys
285
-
286
- def sse(obj):
287
- sys.stdout.flush()
288
- return "data: " + _json.dumps(obj) + "\n\n"
289
-
290
- yield sse({"status": "processing", "step": 0, "message": "Downloading audio..."})
291
 
 
 
 
292
  try:
293
- resp = requests.get(audio_url, timeout=60, stream=True)
 
294
  resp.raise_for_status()
295
- # βœ… Detect correct suffix from URL
296
  url_lower = audio_url.lower()
297
- if "wav" in url_lower:
298
- suffix = ".wav"
299
- elif "mpeg" in url_lower:
300
- suffix = ".mpeg"
301
- elif "mp4" in url_lower:
302
- suffix = ".mp4"
303
- else:
304
- suffix = ".mp3"
305
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
 
306
  downloaded = 0
307
  total = int(resp.headers.get("content-length", 0))
308
  for chunk in resp.iter_content(chunk_size=65536):
@@ -311,39 +424,83 @@ async def api_process_url(request: _Request):
311
  downloaded += len(chunk)
312
  if total:
313
  pct = int(downloaded * 100 / total)
314
- yield sse({"status": "processing", "step": 0,
315
- "message": "Downloading... " + str(pct) + "%"})
316
  tmp.close()
317
- except Exception as e:
318
- yield sse({"status": "error", "message": "Download failed: " + str(e)})
319
- return
320
 
321
- # βœ… Convert to wav if needed
322
- converted_path = convert_to_wav(tmp.name)
323
 
324
- for result in run_pipeline(converted_path, src_lang, tgt_lang,
325
- opt_fillers, opt_stutters, opt_silences,
326
- opt_breaths, opt_mouth):
327
- result["audioId"] = audio_id
328
- yield sse(result)
 
 
329
 
330
- try:
331
- os.unlink(tmp.name)
332
- if converted_path != tmp.name:
333
- os.unlink(converted_path)
334
- except Exception:
335
- pass
336
 
337
- return _StreamingResponse(
338
- generate(),
339
- media_type="text/event-stream",
340
- headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
341
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
 
343
- logger.info("βœ… /api/health and /api/process-url registered on demo.app")
344
 
345
- # ══════════════════════════════════════════════════════════════════════
346
- # LAUNCH
347
- # ══════════════════════════════════════════════════════════════════════
348
  if __name__ == "__main__":
349
  demo.launch()
 
1
  """
2
  ClearWave AI β€” HuggingFace Spaces
3
+ Gradio UI + FastAPI routes
4
+
5
+ BACKGROUND JOB SYSTEM:
6
+ - POST /api/process-url β†’ returns {jobId} instantly (no timeout)
7
+ - GET /api/job/{jobId} β†’ poll for progress / result
8
+ - Jobs run in background threads β€” handles 1hr+ audio safely
9
+ - Job results stored in memory for 1 hour then auto-cleaned
10
+ - Gradio UI uses same background thread approach
11
  """
12
 
13
  import os
14
+ import uuid
15
  import json
16
  import base64
17
  import tempfile
 
46
  OUT_LANGS = {k: v for k, v in LANGUAGES_DISPLAY.items() if k != "Auto Detect"}
47
 
48
  # ══════════════════════════════════════════════════════════════════════
49
+ # JOB STORE β€” in-memory job registry
50
+ # ══════════════════════════════════════════════════════════════════════
51
+ _jobs: dict = {}
52
+ _jobs_lock = threading.Lock()
53
+ JOB_TTL_SEC = 3600 # keep results for 1 hour
54
+
55
+
56
+ def _new_job() -> str:
57
+ job_id = str(uuid.uuid4())
58
+ with _jobs_lock:
59
+ _jobs[job_id] = {
60
+ "status": "queued",
61
+ "step": 0,
62
+ "message": "Queued...",
63
+ "result": None,
64
+ "created_at": time.time(),
65
+ }
66
+ return job_id
67
+
68
+
69
+ def _update_job(job_id: str, **kwargs):
70
+ with _jobs_lock:
71
+ if job_id in _jobs:
72
+ _jobs[job_id].update(kwargs)
73
+
74
+
75
+ def _get_job(job_id: str) -> dict:
76
+ with _jobs_lock:
77
+ return dict(_jobs.get(job_id, {}))
78
+
79
+
80
+ def _cleanup_loop():
81
+ """Remove jobs older than JOB_TTL_SEC β€” runs every 5 minutes."""
82
+ while True:
83
+ time.sleep(300)
84
+ now = time.time()
85
+ with _jobs_lock:
86
+ expired = [k for k, v in _jobs.items()
87
+ if now - v.get("created_at", 0) > JOB_TTL_SEC]
88
+ for k in expired:
89
+ del _jobs[k]
90
+ if expired:
91
+ logger.info(f"[Jobs] Cleaned {len(expired)} expired jobs")
92
+
93
+
94
+ threading.Thread(target=_cleanup_loop, daemon=True).start()
95
+
96
+
97
+ # ══════════════════════════════════════════════════════════════════════
98
+ # AUDIO FORMAT CONVERTER
99
  # ══════════════════════════════════════════════════════════════════════
100
  def convert_to_wav(audio_path: str) -> str:
 
 
 
 
 
101
  if audio_path is None:
102
  return audio_path
103
  ext = os.path.splitext(audio_path)[1].lower()
 
104
  if ext in [".wav", ".mp3", ".flac", ".ogg", ".aac"]:
105
  return audio_path
 
106
  try:
107
  converted = audio_path + "_converted.wav"
108
  result = subprocess.run([
109
  "ffmpeg", "-y", "-i", audio_path,
110
+ "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", converted
 
 
 
111
  ], capture_output=True)
112
  if result.returncode == 0 and os.path.exists(converted):
113
+ logger.info(f"Converted {ext} β†’ .wav")
114
  return converted
 
 
 
115
  except Exception as e:
116
  logger.warning(f"Conversion error: {e}")
117
+ return audio_path
118
 
119
 
120
  # ══════════════════════════════════════════════════════════════════════
 
122
  # ══════════════════════════════════════════════════════════════════════
123
  def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
124
  opt_fillers=True, opt_stutters=True, opt_silences=True,
125
+ opt_breaths=True, opt_mouth=True, job_id=None):
126
+
127
+ def progress(step, message):
128
+ update = {"status": "processing", "step": step, "message": message}
129
+ if job_id:
130
+ _update_job(job_id, **update)
131
+ return update
132
+
133
  out_dir = tempfile.mkdtemp()
134
  try:
135
+ yield progress(1, "⏳ Step 1/5 β€” Denoising...")
136
  denoise1 = denoiser.process(
137
  audio_path, out_dir,
138
  remove_fillers=False, remove_stutters=False,
 
142
  clean1 = denoise1['audio_path']
143
  stats = denoise1['stats']
144
 
145
+ yield progress(2, "⏳ Step 2/5 β€” Transcribing...")
146
  transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
147
  word_segs = transcriber._last_segments
148
 
149
  if (opt_fillers or opt_stutters) and word_segs:
150
+ yield progress(3, "⏳ Step 3/5 β€” Removing fillers & stutters...")
151
  import soundfile as sf
152
  audio_data, sr = sf.read(clean1)
153
  if audio_data.ndim == 2:
 
168
  translation = transcript
169
  tl_method = "same language"
170
  if tgt_lang != "auto" and detected_lang != tgt_lang:
171
+ yield progress(4, "⏳ Step 4/5 β€” Translating...")
172
  translation, tl_method = translator.translate(transcript, detected_lang, tgt_lang)
173
 
174
+ yield progress(5, "⏳ Step 5/5 β€” Summarizing...")
175
  summary = translator.summarize(transcript)
176
 
177
  with open(clean1, "rb") as f:
178
  enhanced_b64 = base64.b64encode(f.read()).decode("utf-8")
179
 
180
+ result = {
181
  "status": "done",
182
  "step": 5,
183
  "message": "βœ… Done!",
184
  "transcript": transcript,
185
  "translation": translation,
186
  "summary": summary,
 
187
  "audioPath": clean1,
188
+ "enhancedAudio": enhanced_b64,
189
  "stats": {
190
  "language": detected_lang.upper(),
191
  "noise_method": stats.get("noise_method", "noisereduce"),
 
201
  "transcript_words": len(transcript.split()),
202
  },
203
  }
204
+
205
+ if job_id:
206
+ _update_job(job_id, status="done", step=5,
207
+ message="βœ… Done!", result=result)
208
+ yield result
209
+
210
  except Exception as e:
211
  logger.error(f"Pipeline failed: {e}", exc_info=True)
212
+ err = {"status": "error", "message": f"❌ Error: {str(e)}"}
213
+ if job_id:
214
+ _update_job(job_id, **err)
215
+ yield err
216
+
217
+
218
+ # ══════════════════════════════════════════════════════════════════════
219
+ # BACKGROUND WORKER
220
+ # ══════════════════════════════════════════════════════════════════════
221
+ def _run_job_in_background(job_id, audio_path, src_lang, tgt_lang,
222
+ opt_fillers, opt_stutters, opt_silences,
223
+ opt_breaths, opt_mouth):
224
+ try:
225
+ for _ in run_pipeline(
226
+ audio_path, src_lang, tgt_lang,
227
+ opt_fillers, opt_stutters, opt_silences,
228
+ opt_breaths, opt_mouth, job_id=job_id
229
+ ):
230
+ pass
231
+ except Exception as e:
232
+ _update_job(job_id, status="error", message=f"❌ {e}")
233
+ finally:
234
+ try:
235
+ os.unlink(audio_path)
236
+ except Exception:
237
+ pass
238
 
239
 
240
  # ══════════════════════════════════════════════════════════════════════
 
247
  yield ("❌ Please upload an audio file.", "", "", None, "", "")
248
  return
249
 
 
250
  if isinstance(audio_path, dict):
251
  audio_path = audio_path.get("name") or audio_path.get("path", "")
252
 
 
253
  audio_path = convert_to_wav(audio_path)
254
+ src_lang = LANGUAGES_DISPLAY.get(in_lang_name, "auto")
255
+ tgt_lang = LANGUAGES_DISPLAY.get(out_lang_name, "te")
256
+
257
+ # Start background job
258
+ job_id = _new_job()
259
+ threading.Thread(
260
+ target=_run_job_in_background,
261
+ args=(job_id, audio_path, src_lang, tgt_lang,
262
+ opt_fillers, opt_stutters, opt_silences,
263
+ opt_breaths, opt_mouth),
264
+ daemon=True,
265
+ ).start()
266
+
267
+ # Poll and stream progress to Gradio UI
268
+ while True:
269
+ time.sleep(2)
270
+ job = _get_job(job_id)
271
+ if not job:
272
+ yield ("❌ Job not found.", "", "", None, "", "")
273
+ return
274
 
275
+ status = job.get("status")
276
+ message = job.get("message", "Processing...")
277
+
278
+ if status in ("queued", "downloading", "processing"):
279
+ yield (message, "", "", None, "", "")
280
+
281
+ elif status == "done":
282
+ result = job.get("result", {})
283
+ s = result.get("stats", {})
284
  stats_str = "\n".join([
285
  f"πŸŽ™οΈ Language : {s.get('language','?')}",
286
  f"πŸ”Š Noise method : {s.get('noise_method','?')}",
 
291
  f"🌐 Translation : {s.get('translation_method','?')}",
292
  f"⏱️ Total time : {s.get('processing_sec', 0):.1f}s",
293
  ])
294
+ yield (result.get("message", "βœ… Done!"),
295
+ result.get("transcript", ""),
296
+ result.get("translation", ""),
297
+ result.get("audioPath"),
298
+ stats_str,
299
+ result.get("summary", ""))
300
+ return
301
+
302
+ elif status == "error":
303
+ yield (job.get("message", "❌ Error"), "", "", None, "Failed.", "")
304
+ return
305
 
306
 
307
  with gr.Blocks(title="ClearWave AI") as demo:
308
+ gr.Markdown("# 🎡 ClearWave AI\n### Professional Audio Enhancement β€” handles 1hr+ audio!")
309
  with gr.Row():
310
  with gr.Column(scale=1):
311
  audio_in = gr.File(
 
357
 
358
 
359
  # ══════════════════════════════════════════════════════════════════════
360
+ # API ROUTES
361
  # ══════════════════════════════════════════════════════════════════════
362
  import json as _json
363
  from fastapi import Request as _Request
364
+ from fastapi.responses import JSONResponse as _JSONResponse
365
+
366
 
367
  @demo.app.get("/api/health")
368
  async def api_health():
369
+ return _JSONResponse({
370
+ "status": "ok",
371
+ "service": "ClearWave AI on HuggingFace",
372
+ "jobs_active": len(_jobs),
373
+ })
374
+
375
 
376
  @demo.app.post("/api/process-url")
377
  async def api_process_url(request: _Request):
378
+ """
379
+ Instantly returns a jobId.
380
+ Client polls GET /api/job/{jobId} for progress and result.
381
+ No timeout issues β€” works for 1hr+ audio.
382
+ """
383
+ data = await request.json()
384
  if "data" in data and isinstance(data["data"], dict):
385
  data = data["data"]
386
+
387
  audio_url = data.get("audioUrl")
388
  audio_id = data.get("audioId", "")
389
  src_lang = data.get("srcLang", "auto")
 
397
  if not audio_url:
398
  return _JSONResponse({"error": "audioUrl is required"}, status_code=400)
399
 
400
+ job_id = _new_job()
401
+ _update_job(job_id, status="downloading", message="Downloading audio...")
 
 
 
 
 
 
402
 
403
+ def _download_and_run():
404
+ tmp_path = None
405
+ audio_path = None
406
  try:
407
+ # Download
408
+ resp = requests.get(audio_url, timeout=300, stream=True)
409
  resp.raise_for_status()
 
410
  url_lower = audio_url.lower()
411
+ if "wav" in url_lower: suffix = ".wav"
412
+ elif "mpeg" in url_lower: suffix = ".mpeg"
413
+ elif "mp4" in url_lower: suffix = ".mp4"
414
+ elif "m4a" in url_lower: suffix = ".m4a"
415
+ else: suffix = ".mp3"
416
+
 
 
417
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
418
+ tmp_path = tmp.name
419
  downloaded = 0
420
  total = int(resp.headers.get("content-length", 0))
421
  for chunk in resp.iter_content(chunk_size=65536):
 
424
  downloaded += len(chunk)
425
  if total:
426
  pct = int(downloaded * 100 / total)
427
+ _update_job(job_id, status="downloading",
428
+ message=f"Downloading... {pct}%")
429
  tmp.close()
 
 
 
430
 
431
+ # Convert format
432
+ audio_path = convert_to_wav(tmp_path)
433
 
434
+ # Run pipeline
435
+ for _ in run_pipeline(
436
+ audio_path, src_lang, tgt_lang,
437
+ opt_fillers, opt_stutters, opt_silences,
438
+ opt_breaths, opt_mouth, job_id=job_id
439
+ ):
440
+ pass
441
 
442
+ # Tag result with audioId
443
+ with _jobs_lock:
444
+ if job_id in _jobs and _jobs[job_id].get("result"):
445
+ _jobs[job_id]["result"]["audioId"] = audio_id
 
 
446
 
447
+ except Exception as e:
448
+ logger.error(f"Job {job_id} failed: {e}", exc_info=True)
449
+ _update_job(job_id, status="error", message=f"❌ Error: {str(e)}")
450
+ finally:
451
+ for p in [tmp_path, audio_path]:
452
+ try:
453
+ if p and os.path.exists(p):
454
+ os.unlink(p)
455
+ except Exception:
456
+ pass
457
+
458
+ threading.Thread(target=_download_and_run, daemon=True).start()
459
+
460
+ return _JSONResponse({
461
+ "jobId": job_id,
462
+ "audioId": audio_id,
463
+ "status": "queued",
464
+ "pollUrl": f"/api/job/{job_id}",
465
+ "message": "Job started! Poll pollUrl for progress.",
466
+ })
467
+
468
+
469
+ @demo.app.get("/api/job/{job_id}")
470
+ async def api_get_job(job_id: str):
471
+ """
472
+ Poll this to get job progress.
473
+ When status=done, result contains full transcript/translation/audio.
474
+ """
475
+ job = _get_job(job_id)
476
+ if not job:
477
+ return _JSONResponse({"error": "Job not found"}, status_code=404)
478
+
479
+ response = {
480
+ "jobId": job_id,
481
+ "status": job.get("status"),
482
+ "step": job.get("step", 0),
483
+ "message": job.get("message", ""),
484
+ }
485
+ if job.get("status") == "done":
486
+ response["result"] = job.get("result", {})
487
+
488
+ return _JSONResponse(response)
489
+
490
+
491
+ @demo.app.get("/api/jobs")
492
+ async def api_list_jobs():
493
+ """List all active jobs."""
494
+ with _jobs_lock:
495
+ summary = {
496
+ k: {"status": v["status"], "step": v.get("step", 0),
497
+ "message": v.get("message", "")}
498
+ for k, v in _jobs.items()
499
+ }
500
+ return _JSONResponse({"jobs": summary, "total": len(summary)})
501
 
 
502
 
503
+ logger.info("βœ… Routes: /api/health, /api/process-url, /api/job/{id}, /api/jobs")
504
+
 
505
  if __name__ == "__main__":
506
  demo.launch()