Spaces:

testingfaces
/

clearwave-ai

Paused

App Files Files Community

testingfaces commited on Mar 20

Commit

a229c78

verified ·

1 Parent(s): 372a9c7

Update app.py

Browse files

Files changed (1) hide show

app.py +251 -94

app.py CHANGED Viewed

@@ -1,9 +1,17 @@
 """
 ClearWave AI — HuggingFace Spaces
-Gradio UI + FastAPI routes for /api/health and /api/process-url
 """
 import os
 import json
 import base64
 import tempfile
@@ -38,39 +46,75 @@ LANGUAGES_DISPLAY = {
 OUT_LANGS = {k: v for k, v in LANGUAGES_DISPLAY.items() if k != "Auto Detect"}
 # ══════════════════════════════════════════════════════════════════════
-# AUDIO FORMAT CONVERTER — supports .mpeg, .mp4, .m4a etc.
 # ══════════════════════════════════════════════════════════════════════
 def convert_to_wav(audio_path: str) -> str:
-    """
-    Convert any audio format (including .mpeg, .mp4, .m4a) to .wav
-    so the pipeline can process it reliably.
-    Returns path to converted .wav file (or original if already .wav).
-    """
     if audio_path is None:
         return audio_path
     ext = os.path.splitext(audio_path)[1].lower()
-    # Already a safe format — no conversion needed
     if ext in [".wav", ".mp3", ".flac", ".ogg", ".aac"]:
         return audio_path
-    # Convert .mpeg / .mp4 / .m4a / .wma / .amr etc. → .wav
     try:
         converted = audio_path + "_converted.wav"
         result = subprocess.run([
             "ffmpeg", "-y", "-i", audio_path,
-            "-ar", "16000",
-            "-ac", "1",
-            "-acodec", "pcm_s16le",
-            converted
         ], capture_output=True)
         if result.returncode == 0 and os.path.exists(converted):
-            logger.info(f"Converted {ext} → .wav successfully")
             return converted
-        else:
-            logger.warning(f"Conversion failed: {result.stderr.decode()}")
-            return audio_path
     except Exception as e:
         logger.warning(f"Conversion error: {e}")
-        return audio_path
 # ══════════════════════════════════════════════════════════════════════
@@ -78,10 +122,17 @@ def convert_to_wav(audio_path: str) -> str:
 # ══════════════════════════════════════════════════════════════════════
 def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
                  opt_fillers=True, opt_stutters=True, opt_silences=True,
-                 opt_breaths=True, opt_mouth=True):
     out_dir = tempfile.mkdtemp()
     try:
-        yield {"status": "processing", "step": 1, "message": "⏳ Step 1/5 — Denoising..."}
         denoise1 = denoiser.process(
             audio_path, out_dir,
             remove_fillers=False, remove_stutters=False,
@@ -91,12 +142,12 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
         clean1 = denoise1['audio_path']
         stats  = denoise1['stats']
-        yield {"status": "processing", "step": 2, "message": "⏳ Step 2/5 — Transcribing..."}
         transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
         word_segs = transcriber._last_segments
         if (opt_fillers or opt_stutters) and word_segs:
-            yield {"status": "processing", "step": 3, "message": "⏳ Step 3/5 — Removing fillers & stutters..."}
             import soundfile as sf
             audio_data, sr = sf.read(clean1)
             if audio_data.ndim == 2:
@@ -117,24 +168,24 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
         translation = transcript
         tl_method   = "same language"
         if tgt_lang != "auto" and detected_lang != tgt_lang:
-            yield {"status": "processing", "step": 4, "message": "⏳ Step 4/5 — Translating..."}
             translation, tl_method = translator.translate(transcript, detected_lang, tgt_lang)
-        yield {"status": "processing", "step": 5, "message": "⏳ Step 5/5 — Summarizing..."}
         summary = translator.summarize(transcript)
         with open(clean1, "rb") as f:
             enhanced_b64 = base64.b64encode(f.read()).decode("utf-8")
-        yield {
             "status":        "done",
             "step":          5,
             "message":       "✅ Done!",
             "transcript":    transcript,
             "translation":   translation,
             "summary":       summary,
-            "enhancedAudio": enhanced_b64,
             "audioPath":     clean1,
             "stats": {
                 "language":             detected_lang.upper(),
                 "noise_method":         stats.get("noise_method", "noisereduce"),
@@ -150,9 +201,40 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
                 "transcript_words":     len(transcript.split()),
             },
         }
     except Exception as e:
         logger.error(f"Pipeline failed: {e}", exc_info=True)
-        yield {"status": "error", "message": f"❌ Error: {str(e)}"}
 # ══════════════════════════════════════════════════════════════════════
@@ -165,22 +247,40 @@ def process_audio_gradio(audio_path, in_lang_name, out_lang_name,
         yield ("❌ Please upload an audio file.", "", "", None, "", "")
         return
-    # gr.File returns a dict with 'name' or 'path' key
     if isinstance(audio_path, dict):
         audio_path = audio_path.get("name") or audio_path.get("path", "")
-    # ✅ Auto-convert .mpeg / .mp4 / .m4a and any unsupported format → .wav
     audio_path = convert_to_wav(audio_path)
-    src_lang = LANGUAGES_DISPLAY.get(in_lang_name, "auto")
-    tgt_lang = LANGUAGES_DISPLAY.get(out_lang_name, "te")
-    for result in run_pipeline(audio_path, src_lang, tgt_lang,
-                                opt_fillers, opt_stutters, opt_silences,
-                                opt_breaths, opt_mouth):
-        if result["status"] == "processing":
-            yield (result["message"], "", "", None, "", "")
-        elif result["status"] == "done":
-            s = result.get("stats", {})
             stats_str = "\n".join([
                 f"🎙️  Language      : {s.get('language','?')}",
                 f"🔊  Noise method  : {s.get('noise_method','?')}",
@@ -191,15 +291,21 @@ def process_audio_gradio(audio_path, in_lang_name, out_lang_name,
                 f"🌐  Translation   : {s.get('translation_method','?')}",
                 f"⏱️  Total time    : {s.get('processing_sec', 0):.1f}s",
             ])
-            yield (result["message"], result.get("transcript",""),
-                   result.get("translation",""), result.get("audioPath"),
-                   stats_str, result.get("summary",""))
-        elif result["status"] == "error":
-            yield (result["message"], "", "", None, "Failed.", "")
 with gr.Blocks(title="ClearWave AI") as demo:
-    gr.Markdown("# 🎵 ClearWave AI\n### Professional Audio Enhancement")
     with gr.Row():
         with gr.Column(scale=1):
             audio_in = gr.File(
@@ -251,22 +357,33 @@ with gr.Blocks(title="ClearWave AI") as demo:
 # ══════════════════════════════════════════════════════════════════════
-# API ROUTES — registered directly on demo.app (Gradio's FastAPI)
 # ══════════════════════════════════════════════════════════════════════
 import json as _json
 from fastapi import Request as _Request
-from fastapi.responses import StreamingResponse as _StreamingResponse, JSONResponse as _JSONResponse
 @demo.app.get("/api/health")
 async def api_health():
-    return _JSONResponse({"status": "ok", "service": "ClearWave AI on HuggingFace"})
 @demo.app.post("/api/process-url")
 async def api_process_url(request: _Request):
-    data         = await request.json()
-    # Handle both plain JSON and Gradio-wrapped {"data": {...}}
     if "data" in data and isinstance(data["data"], dict):
         data = data["data"]
     audio_url    = data.get("audioUrl")
     audio_id     = data.get("audioId",     "")
     src_lang     = data.get("srcLang",     "auto")
@@ -280,29 +397,25 @@ async def api_process_url(request: _Request):
     if not audio_url:
         return _JSONResponse({"error": "audioUrl is required"}, status_code=400)
-    async def generate():
-        import sys
-        def sse(obj):
-            sys.stdout.flush()
-            return "data: " + _json.dumps(obj) + "\n\n"
-        yield sse({"status": "processing", "step": 0, "message": "Downloading audio..."})
         try:
-            resp = requests.get(audio_url, timeout=60, stream=True)
             resp.raise_for_status()
-            # ✅ Detect correct suffix from URL
             url_lower = audio_url.lower()
-            if "wav" in url_lower:
-                suffix = ".wav"
-            elif "mpeg" in url_lower:
-                suffix = ".mpeg"
-            elif "mp4" in url_lower:
-                suffix = ".mp4"
-            else:
-                suffix = ".mp3"
             tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
             downloaded = 0
             total = int(resp.headers.get("content-length", 0))
             for chunk in resp.iter_content(chunk_size=65536):
@@ -311,39 +424,83 @@ async def api_process_url(request: _Request):
                     downloaded += len(chunk)
                     if total:
                         pct = int(downloaded * 100 / total)
-                        yield sse({"status": "processing", "step": 0,
-                                   "message": "Downloading... " + str(pct) + "%"})
             tmp.close()
-        except Exception as e:
-            yield sse({"status": "error", "message": "Download failed: " + str(e)})
-            return
-        # ✅ Convert to wav if needed
-        converted_path = convert_to_wav(tmp.name)
-        for result in run_pipeline(converted_path, src_lang, tgt_lang,
-                                   opt_fillers, opt_stutters, opt_silences,
-                                   opt_breaths, opt_mouth):
-            result["audioId"] = audio_id
-            yield sse(result)
-        try:
-            os.unlink(tmp.name)
-            if converted_path != tmp.name:
-                os.unlink(converted_path)
-        except Exception:
-            pass
-    return _StreamingResponse(
-        generate(),
-        media_type="text/event-stream",
-        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
-    )
-logger.info("✅ /api/health and /api/process-url registered on demo.app")
-# ══════════════════════════════════════════════════════════════════════
-# LAUNCH
-# ══════════════════════════════════════════════════════════════════════
 if __name__ == "__main__":
     demo.launch()

 """
 ClearWave AI — HuggingFace Spaces
+Gradio UI + FastAPI routes
+BACKGROUND JOB SYSTEM:
+  - POST /api/process-url  → returns {jobId} instantly (no timeout)
+  - GET  /api/job/{jobId}  → poll for progress / result
+  - Jobs run in background threads — handles 1hr+ audio safely
+  - Job results stored in memory for 1 hour then auto-cleaned
+  - Gradio UI uses same background thread approach
 """
 import os
+import uuid
 import json
 import base64
 import tempfile
 OUT_LANGS = {k: v for k, v in LANGUAGES_DISPLAY.items() if k != "Auto Detect"}
 # ══════════════════════════════════════════════════════════════════════
+# JOB STORE — in-memory job registry
+# ══════════════════════════════════════════════════════════════════════
+_jobs: dict      = {}
+_jobs_lock       = threading.Lock()
+JOB_TTL_SEC      = 3600  # keep results for 1 hour
+def _new_job() -> str:
+    job_id = str(uuid.uuid4())
+    with _jobs_lock:
+        _jobs[job_id] = {
+            "status":     "queued",
+            "step":       0,
+            "message":    "Queued...",
+            "result":     None,
+            "created_at": time.time(),
+        }
+    return job_id
+def _update_job(job_id: str, **kwargs):
+    with _jobs_lock:
+        if job_id in _jobs:
+            _jobs[job_id].update(kwargs)
+def _get_job(job_id: str) -> dict:
+    with _jobs_lock:
+        return dict(_jobs.get(job_id, {}))
+def _cleanup_loop():
+    """Remove jobs older than JOB_TTL_SEC — runs every 5 minutes."""
+    while True:
+        time.sleep(300)
+        now = time.time()
+        with _jobs_lock:
+            expired = [k for k, v in _jobs.items()
+                       if now - v.get("created_at", 0) > JOB_TTL_SEC]
+            for k in expired:
+                del _jobs[k]
+        if expired:
+            logger.info(f"[Jobs] Cleaned {len(expired)} expired jobs")
+threading.Thread(target=_cleanup_loop, daemon=True).start()
+# ══════════════════════════════════════════════════════════════════════
+# AUDIO FORMAT CONVERTER
 # ══════════════════════════════════════════════════════════════════════
 def convert_to_wav(audio_path: str) -> str:
     if audio_path is None:
         return audio_path
     ext = os.path.splitext(audio_path)[1].lower()
     if ext in [".wav", ".mp3", ".flac", ".ogg", ".aac"]:
         return audio_path
     try:
         converted = audio_path + "_converted.wav"
         result = subprocess.run([
             "ffmpeg", "-y", "-i", audio_path,
+            "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", converted
         ], capture_output=True)
         if result.returncode == 0 and os.path.exists(converted):
+            logger.info(f"Converted {ext} → .wav")
             return converted
     except Exception as e:
         logger.warning(f"Conversion error: {e}")
+    return audio_path
 # ══════════════════════════════════════════════════════════════════════
 # ══════════════════════════════════════════════════════════════════════
 def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
                  opt_fillers=True, opt_stutters=True, opt_silences=True,
+                 opt_breaths=True, opt_mouth=True, job_id=None):
+    def progress(step, message):
+        update = {"status": "processing", "step": step, "message": message}
+        if job_id:
+            _update_job(job_id, **update)
+        return update
     out_dir = tempfile.mkdtemp()
     try:
+        yield progress(1, "⏳ Step 1/5 — Denoising...")
         denoise1 = denoiser.process(
             audio_path, out_dir,
             remove_fillers=False, remove_stutters=False,
         clean1 = denoise1['audio_path']
         stats  = denoise1['stats']
+        yield progress(2, "⏳ Step 2/5 — Transcribing...")
         transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
         word_segs = transcriber._last_segments
         if (opt_fillers or opt_stutters) and word_segs:
+            yield progress(3, "⏳ Step 3/5 — Removing fillers & stutters...")
             import soundfile as sf
             audio_data, sr = sf.read(clean1)
             if audio_data.ndim == 2:
         translation = transcript
         tl_method   = "same language"
         if tgt_lang != "auto" and detected_lang != tgt_lang:
+            yield progress(4, "⏳ Step 4/5 — Translating...")
             translation, tl_method = translator.translate(transcript, detected_lang, tgt_lang)
+        yield progress(5, "⏳ Step 5/5 — Summarizing...")
         summary = translator.summarize(transcript)
         with open(clean1, "rb") as f:
             enhanced_b64 = base64.b64encode(f.read()).decode("utf-8")
+        result = {
             "status":        "done",
             "step":          5,
             "message":       "✅ Done!",
             "transcript":    transcript,
             "translation":   translation,
             "summary":       summary,
             "audioPath":     clean1,
+            "enhancedAudio": enhanced_b64,
             "stats": {
                 "language":             detected_lang.upper(),
                 "noise_method":         stats.get("noise_method", "noisereduce"),
                 "transcript_words":     len(transcript.split()),
             },
         }
+        if job_id:
+            _update_job(job_id, status="done", step=5,
+                        message="✅ Done!", result=result)
+        yield result
     except Exception as e:
         logger.error(f"Pipeline failed: {e}", exc_info=True)
+        err = {"status": "error", "message": f"❌ Error: {str(e)}"}
+        if job_id:
+            _update_job(job_id, **err)
+        yield err
+# ══════════════════════════════════════════════════════════════════════
+# BACKGROUND WORKER
+# ══════════════════════════════════════════════════════════════════════
+def _run_job_in_background(job_id, audio_path, src_lang, tgt_lang,
+                            opt_fillers, opt_stutters, opt_silences,
+                            opt_breaths, opt_mouth):
+    try:
+        for _ in run_pipeline(
+            audio_path, src_lang, tgt_lang,
+            opt_fillers, opt_stutters, opt_silences,
+            opt_breaths, opt_mouth, job_id=job_id
+        ):
+            pass
+    except Exception as e:
+        _update_job(job_id, status="error", message=f"❌ {e}")
+    finally:
+        try:
+            os.unlink(audio_path)
+        except Exception:
+            pass
 # ══════════════════════════════════════════════════════════════════════
         yield ("❌ Please upload an audio file.", "", "", None, "", "")
         return
     if isinstance(audio_path, dict):
         audio_path = audio_path.get("name") or audio_path.get("path", "")
     audio_path = convert_to_wav(audio_path)
+    src_lang   = LANGUAGES_DISPLAY.get(in_lang_name, "auto")
+    tgt_lang   = LANGUAGES_DISPLAY.get(out_lang_name, "te")
+    # Start background job
+    job_id = _new_job()
+    threading.Thread(
+        target=_run_job_in_background,
+        args=(job_id, audio_path, src_lang, tgt_lang,
+              opt_fillers, opt_stutters, opt_silences,
+              opt_breaths, opt_mouth),
+        daemon=True,
+    ).start()
+    # Poll and stream progress to Gradio UI
+    while True:
+        time.sleep(2)
+        job = _get_job(job_id)
+        if not job:
+            yield ("❌ Job not found.", "", "", None, "", "")
+            return
+        status  = job.get("status")
+        message = job.get("message", "Processing...")
+        if status in ("queued", "downloading", "processing"):
+            yield (message, "", "", None, "", "")
+        elif status == "done":
+            result = job.get("result", {})
+            s      = result.get("stats", {})
             stats_str = "\n".join([
                 f"🎙️  Language      : {s.get('language','?')}",
                 f"🔊  Noise method  : {s.get('noise_method','?')}",
                 f"🌐  Translation   : {s.get('translation_method','?')}",
                 f"⏱️  Total time    : {s.get('processing_sec', 0):.1f}s",
             ])
+            yield (result.get("message", "✅ Done!"),
+                   result.get("transcript", ""),
+                   result.get("translation", ""),
+                   result.get("audioPath"),
+                   stats_str,
+                   result.get("summary", ""))
+            return
+        elif status == "error":
+            yield (job.get("message", "❌ Error"), "", "", None, "Failed.", "")
+            return
 with gr.Blocks(title="ClearWave AI") as demo:
+    gr.Markdown("# 🎵 ClearWave AI\n### Professional Audio Enhancement — handles 1hr+ audio!")
     with gr.Row():
         with gr.Column(scale=1):
             audio_in = gr.File(
 # ══════════════════════════════════════════════════════════════════════
+# API ROUTES
 # ══════════════════════════════════════════════════════════════════════
 import json as _json
 from fastapi import Request as _Request
+from fastapi.responses import JSONResponse as _JSONResponse
 @demo.app.get("/api/health")
 async def api_health():
+    return _JSONResponse({
+        "status":      "ok",
+        "service":     "ClearWave AI on HuggingFace",
+        "jobs_active": len(_jobs),
+    })
 @demo.app.post("/api/process-url")
 async def api_process_url(request: _Request):
+    """
+    Instantly returns a jobId.
+    Client polls GET /api/job/{jobId} for progress and result.
+    No timeout issues — works for 1hr+ audio.
+    """
+    data = await request.json()
     if "data" in data and isinstance(data["data"], dict):
         data = data["data"]
     audio_url    = data.get("audioUrl")
     audio_id     = data.get("audioId",     "")
     src_lang     = data.get("srcLang",     "auto")
     if not audio_url:
         return _JSONResponse({"error": "audioUrl is required"}, status_code=400)
+    job_id = _new_job()
+    _update_job(job_id, status="downloading", message="Downloading audio...")
+    def _download_and_run():
+        tmp_path = None
+        audio_path = None
         try:
+            # Download
+            resp = requests.get(audio_url, timeout=300, stream=True)
             resp.raise_for_status()
             url_lower = audio_url.lower()
+            if   "wav"  in url_lower: suffix = ".wav"
+            elif "mpeg" in url_lower: suffix = ".mpeg"
+            elif "mp4"  in url_lower: suffix = ".mp4"
+            elif "m4a"  in url_lower: suffix = ".m4a"
+            else:                     suffix = ".mp3"
             tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
+            tmp_path = tmp.name
             downloaded = 0
             total = int(resp.headers.get("content-length", 0))
             for chunk in resp.iter_content(chunk_size=65536):
                     downloaded += len(chunk)
                     if total:
                         pct = int(downloaded * 100 / total)
+                        _update_job(job_id, status="downloading",
+                                    message=f"Downloading... {pct}%")
             tmp.close()
+            # Convert format
+            audio_path = convert_to_wav(tmp_path)
+            # Run pipeline
+            for _ in run_pipeline(
+                audio_path, src_lang, tgt_lang,
+                opt_fillers, opt_stutters, opt_silences,
+                opt_breaths, opt_mouth, job_id=job_id
+            ):
+                pass
+            # Tag result with audioId
+            with _jobs_lock:
+                if job_id in _jobs and _jobs[job_id].get("result"):
+                    _jobs[job_id]["result"]["audioId"] = audio_id
+        except Exception as e:
+            logger.error(f"Job {job_id} failed: {e}", exc_info=True)
+            _update_job(job_id, status="error", message=f"❌ Error: {str(e)}")
+        finally:
+            for p in [tmp_path, audio_path]:
+                try:
+                    if p and os.path.exists(p):
+                        os.unlink(p)
+                except Exception:
+                    pass
+    threading.Thread(target=_download_and_run, daemon=True).start()
+    return _JSONResponse({
+        "jobId":   job_id,
+        "audioId": audio_id,
+        "status":  "queued",
+        "pollUrl": f"/api/job/{job_id}",
+        "message": "Job started! Poll pollUrl for progress.",
+    })
+@demo.app.get("/api/job/{job_id}")
+async def api_get_job(job_id: str):
+    """
+    Poll this to get job progress.
+    When status=done, result contains full transcript/translation/audio.
+    """
+    job = _get_job(job_id)
+    if not job:
+        return _JSONResponse({"error": "Job not found"}, status_code=404)
+    response = {
+        "jobId":   job_id,
+        "status":  job.get("status"),
+        "step":    job.get("step", 0),
+        "message": job.get("message", ""),
+    }
+    if job.get("status") == "done":
+        response["result"] = job.get("result", {})
+    return _JSONResponse(response)
+@demo.app.get("/api/jobs")
+async def api_list_jobs():
+    """List all active jobs."""
+    with _jobs_lock:
+        summary = {
+            k: {"status": v["status"], "step": v.get("step", 0),
+                "message": v.get("message", "")}
+            for k, v in _jobs.items()
+        }
+    return _JSONResponse({"jobs": summary, "total": len(summary)})
+logger.info("✅ Routes: /api/health, /api/process-url, /api/job/{id}, /api/jobs")
 if __name__ == "__main__":
     demo.launch()