Spaces:

testingfaces
/

clearwave-ai

Paused

App Files Files Community

testingfaces commited on 29 days ago

Commit

9ea4293

verified ·

1 Parent(s): ceabe9e

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -153

app.py CHANGED Viewed

@@ -1,10 +1,16 @@
 """
 ClearWave AI - Cloud Audio Processing Pipeline
-Deployed on Hugging Face Spaces with ZeroGPU
 """
 import gradio as gr
-import spaces
 import os
 import time
 import tempfile
@@ -13,18 +19,15 @@ import shutil
 from denoiser import Denoiser
 from transcriber import Transcriber
 from translator import Translator
-# ─────────────────────────────────────────────
 # Init all 3 departments ONCE at startup
-# ─────────────────────────────────────────────
-print("🚀 ClearWave AI starting up...")
 denoiser = Denoiser()
 transcriber = Transcriber()
 translator = Translator()
-print("✅ All 3 departments ready!")
-# ─────────────────────────────────────────────
-# Language mappings
-# ─────────────────────────────────────────────
 INPUT_LANG_MAP = {
     "Auto Detect": "auto",
     "English":     "en",
@@ -43,218 +46,139 @@ OUTPUT_LANG_MAP = {
 }
 LANG_BADGES = {
-    "en":   "🇬🇧 English",
-    "te":   "🇮🇳 Telugu",
-    "hi":   "🇮🇳 Hindi",
-    "ta":   "🇮🇳 Tamil",
-    "kn":   "🇮🇳 Kannada",
-    "auto": "🔍 Auto-detected",
 }
-# ─────────────────────────────────────────────
-# Core pipeline
-# ─────────────────────────────────────────────
-@spaces.GPU
 def process_audio(audio_path, input_lang_label, output_lang_label, progress=gr.Progress()):
     if audio_path is None:
-        return None, "⚠️ Please upload an audio file.", "", "", "❌ No audio uploaded"
     input_lang  = INPUT_LANG_MAP.get(input_lang_label, "auto")
     output_lang = OUTPUT_LANG_MAP.get(output_lang_label, "te")
-    temp_dir   = tempfile.mkdtemp(prefix="clearwave_")
-    timings    = {}
     total_start = time.time()
     try:
-        # ─── Dept 1: Denoise ─────────────────────────
-        progress(0.05, desc="🎙️ Dept 1 — Denoising audio with DeepFilterNet3…")
         t0 = time.time()
         denoised_path = denoiser.process(audio_path, temp_dir)
         timings["denoise"] = time.time() - t0
-        progress(0.40, desc=f"✅ Denoised in {timings['denoise']:.1f}s")
-        # ─── Dept 2: Transcribe ───────────────────────
-        progress(0.45, desc="📝 Dept 2 — Transcribing with Groq Whisper large-v3…")
         t0 = time.time()
         transcript, detected_lang, tx_method = transcriber.transcribe(
             denoised_path, language=input_lang
         )
         timings["transcribe"] = time.time() - t0
-        progress(0.75, desc=f"✅ Transcribed in {timings['transcribe']:.1f}s [{tx_method}]")
-        # ─── Dept 3: Translate ────────────────────────
-        progress(0.80, desc="🌐 Dept 3 — Translating with NLLB-200…")
         t0 = time.time()
         effective_src = detected_lang if input_lang == "auto" else input_lang
         if effective_src == output_lang:
-            translated    = transcript
-            tr_method     = "skipped (same language)"
         else:
             translated, tr_method = translator.translate(
                 transcript, src_lang=effective_src, tgt_lang=output_lang
             )
         timings["translate"] = time.time() - t0
-        progress(0.95, desc=f"✅ Translated in {timings['translate']:.1f}s [{tr_method}]")
         total_time = time.time() - total_start
-        # ─── Format outputs ───────────────────────────
-        src_badge = LANG_BADGES.get(effective_src, "🔍 Unknown")
-        tgt_badge = LANG_BADGES.get(output_lang, "🌐")
         transcript_md = f"**{src_badge}**\n\n{transcript}"
         translated_md = f"**{tgt_badge}**\n\n{translated}"
         timing_md = (
-            f"### ⏱️ Processing Times\n\n"
             f"| Department | Time | Method |\n"
             f"|---|---|---|\n"
-            f"| 🎙️ Denoiser (Dept 1) | `{timings['denoise']:.1f}s` | DeepFilterNet3 |\n"
-            f"| 📝 Transcriber (Dept 2) | `{timings['transcribe']:.1f}s` | {tx_method} |\n"
-            f"| 🌐 Translator (Dept 3) | `{timings['translate']:.1f}s` | {tr_method} |\n"
-            f"| **⚡ Total** | **`{total_time:.1f}s`** | 3-dept pipeline |\n\n"
-            f"> Running on Hugging Face ZeroGPU (A10G 24GB) — 100% free"
         )
-        progress(1.0, desc=f"🎉 Complete! {total_time:.1f}s total")
-        # Copy denoised file to stable output path
         out_audio = os.path.join(temp_dir, "clearwave_denoised.wav")
         shutil.copy(denoised_path, out_audio)
-        return (
-            out_audio,
-            transcript_md,
-            translated_md,
-            timing_md,
-            f"✅ Pipeline complete in {total_time:.1f}s"
-        )
     except Exception as e:
         import traceback
         err = traceback.format_exc()
-        print(f"[ClearWave] Pipeline error:\n{err}")
-        # Clean up temp on error
         shutil.rmtree(temp_dir, ignore_errors=True)
-        return (
-            None,
-            f"❌ Error: {str(e)}",
-            "",
-            f"**Error details:**\n```\n{err}\n```",
-            f"❌ Failed — {str(e)}"
-        )
-# ─────────────────────────────────────────────
-# UI
-# ─────────────────────────────────────────────
-CSS = """
-body, .gradio-container { background:#0d1117 !important; color:#e6edf3 !important; }
-.header-wrap {
-    background: linear-gradient(135deg,#161b22,#1c2128);
-    border:1px solid #30363d; border-radius:12px;
-    padding:28px 32px; margin-bottom:18px; text-align:center;
-}
-.header-wrap h1 {
-    font-size:2.2em; font-weight:700; margin:0 0 6px;
-    background:linear-gradient(90deg,#58a6ff,#3fb950,#f78166);
-    -webkit-background-clip:text; -webkit-text-fill-color:transparent;
-}
-.header-wrap p { color:#8b949e; font-size:0.98em; margin:0; }
-.pipe-strip {
-    display:flex; gap:8px; justify-content:center; flex-wrap:wrap; margin-bottom:14px;
-}
-.dept-pill {
-    background:#21262d; border:1px solid #30363d;
-    border-radius:20px; padding:5px 14px;
-    font-size:0.82em; color:#8b949e;
-}
-.panel { background:#161b22 !important; border:1px solid #30363d !important; border-radius:10px !important; }
-footer { display:none !important; }
-"""
-with gr.Blocks(css=CSS, title="ClearWave AI", theme=gr.themes.Base()) as demo:
-    # Header
-    gr.HTML("""
-    <div class="header-wrap">
-        <h1>🎵 ClearWave AI</h1>
-        <p>Professional 3-Department Audio Processing Pipeline · ZeroGPU · 100% Free</p>
-    </div>
-    <div class="pipe-strip">
-        <span class="dept-pill">🎙️ Dept 1 · DeepFilterNet3 Denoiser</span>
-        <span class="dept-pill">📝 Dept 2 · Groq Whisper large-v3</span>
-        <span class="dept-pill">🌐 Dept 3 · NLLB-200 Translator</span>
-    </div>
     """)
-    with gr.Row(equal_height=False):
-        # ── Left: Input controls ──────────────────────
-        with gr.Column(scale=1, min_width=280):
             audio_in = gr.Audio(
-                label="🎤 Upload or Record Audio",
                 type="filepath",
                 sources=["upload", "microphone"],
             )
-            with gr.Group():
-                input_lang  = gr.Dropdown(
-                    label="Input Language",
-                    choices=list(INPUT_LANG_MAP.keys()),
-                    value="Auto Detect",
-                )
-                output_lang = gr.Dropdown(
-                    label="Output Language",
-                    choices=list(OUTPUT_LANG_MAP.keys()),
-                    value="Telugu",
-                )
-            run_btn    = gr.Button("⚡ Process Audio", variant="primary", size="lg")
-            status_md  = gr.Markdown("*Upload audio and press Process.*")
-        # ── Right: Results ────────────────────────────
         with gr.Column(scale=2):
             with gr.Tabs():
-                with gr.Tab("📝 Text Results"):
                     with gr.Row():
                         with gr.Column():
                             gr.Markdown("#### Original Transcript")
-                            transcript_out = gr.Markdown("*Will appear here…*")
                         with gr.Column():
                             gr.Markdown("#### Translation")
-                            translation_out = gr.Markdown("*Will appear here…*")
-                with gr.Tab("🎵 Clean Audio"):
                     audio_out = gr.Audio(
-                        label="Denoised Audio (download)",
                         type="filepath",
                         interactive=False,
                     )
-                    gr.Markdown(
-                        "*Noise-cancelled with DeepFilterNet3, "
-                        "normalized to EBU R128 broadcast standard.*"
-                    )
-                with gr.Tab("⏱️ Timings"):
-                    timing_out = gr.Markdown("*Timings will appear after processing…*")
-    # Footer
-    gr.HTML("""
-    <div style="text-align:center;padding:16px;color:#484f58;font-size:0.8em;
-                border-top:1px solid #21262d;margin-top:16px;">
-        ClearWave AI · DeepFilterNet3 + Groq Whisper large-v3 + NLLB-200-distilled-600M ·
-        Hugging Face ZeroGPU (A10G 24GB)
-    </div>
-    """)
-    # Wire up
     run_btn.click(
         fn=process_audio,
         inputs=[audio_in, input_lang, output_lang],
@@ -263,9 +187,4 @@ with gr.Blocks(css=CSS, title="ClearWave AI", theme=gr.themes.Base()) as demo:
     )
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True,
-        max_file_size="100mb",
-    )

+# ── Python 3.13 compatibility patch (must be first) ──
+import sys
+import types
+_audioop = types.ModuleType('audioop')
+sys.modules['audioop'] = _audioop
+sys.modules['pyaudioop'] = _audioop
 """
 ClearWave AI - Cloud Audio Processing Pipeline
+Deployed on Hugging Face Spaces
 """
 import gradio as gr
 import os
 import time
 import tempfile
 from denoiser import Denoiser
 from transcriber import Transcriber
 from translator import Translator
 # Init all 3 departments ONCE at startup
+print("ClearWave AI starting up...")
 denoiser = Denoiser()
 transcriber = Transcriber()
 translator = Translator()
+print("All 3 departments ready!")
+# ── Language mappings ─────────────────────────────────
 INPUT_LANG_MAP = {
     "Auto Detect": "auto",
     "English":     "en",
 }
 LANG_BADGES = {
+    "en":   "English",
+    "te":   "Telugu",
+    "hi":   "Hindi",
+    "ta":   "Tamil",
+    "kn":   "Kannada",
+    "auto": "Auto-detected",
 }
+# ── Core pipeline ─────────────────────────────────────
 def process_audio(audio_path, input_lang_label, output_lang_label, progress=gr.Progress()):
     if audio_path is None:
+        return None, "Please upload an audio file.", "", "", "No audio uploaded"
     input_lang  = INPUT_LANG_MAP.get(input_lang_label, "auto")
     output_lang = OUTPUT_LANG_MAP.get(output_lang_label, "te")
+    temp_dir    = tempfile.mkdtemp(prefix="clearwave_")
+    timings     = {}
     total_start = time.time()
     try:
+        # Dept 1: Denoise
+        progress(0.05, desc="Dept 1 - Denoising audio...")
         t0 = time.time()
         denoised_path = denoiser.process(audio_path, temp_dir)
         timings["denoise"] = time.time() - t0
+        progress(0.40, desc=f"Denoised in {timings['denoise']:.1f}s")
+        # Dept 2: Transcribe
+        progress(0.45, desc="Dept 2 - Transcribing with Groq Whisper...")
         t0 = time.time()
         transcript, detected_lang, tx_method = transcriber.transcribe(
             denoised_path, language=input_lang
         )
         timings["transcribe"] = time.time() - t0
+        progress(0.75, desc=f"Transcribed in {timings['transcribe']:.1f}s [{tx_method}]")
+        # Dept 3: Translate
+        progress(0.80, desc="Dept 3 - Translating with NLLB-200...")
         t0 = time.time()
         effective_src = detected_lang if input_lang == "auto" else input_lang
         if effective_src == output_lang:
+            translated = transcript
+            tr_method  = "skipped (same language)"
         else:
             translated, tr_method = translator.translate(
                 transcript, src_lang=effective_src, tgt_lang=output_lang
             )
         timings["translate"] = time.time() - t0
+        progress(0.95, desc=f"Translated in {timings['translate']:.1f}s [{tr_method}]")
         total_time = time.time() - total_start
+        src_badge = LANG_BADGES.get(effective_src, "Unknown")
+        tgt_badge = LANG_BADGES.get(output_lang, "Unknown")
         transcript_md = f"**{src_badge}**\n\n{transcript}"
         translated_md = f"**{tgt_badge}**\n\n{translated}"
         timing_md = (
+            f"### Processing Times\n\n"
             f"| Department | Time | Method |\n"
             f"|---|---|---|\n"
+            f"| Denoiser (Dept 1) | `{timings['denoise']:.1f}s` | noisereduce |\n"
+            f"| Transcriber (Dept 2) | `{timings['transcribe']:.1f}s` | {tx_method} |\n"
+            f"| Translator (Dept 3) | `{timings['translate']:.1f}s` | {tr_method} |\n"
+            f"| **Total** | **`{total_time:.1f}s`** | 3-dept pipeline |"
         )
+        progress(1.0, desc=f"Complete! {total_time:.1f}s")
         out_audio = os.path.join(temp_dir, "clearwave_denoised.wav")
         shutil.copy(denoised_path, out_audio)
+        return out_audio, transcript_md, translated_md, timing_md, f"Done in {total_time:.1f}s"
     except Exception as e:
         import traceback
         err = traceback.format_exc()
+        print(f"Pipeline error:\n{err}")
         shutil.rmtree(temp_dir, ignore_errors=True)
+        return None, f"Error: {str(e)}", "", f"```\n{err}\n```", f"Failed: {str(e)}"
+# ── Gradio UI ─────────────────────────────────────────
+with gr.Blocks(title="ClearWave AI") as demo:
+    gr.Markdown("""
+    # ClearWave AI
+    **3-Department Audio Pipeline: Denoise → Transcribe → Translate**
     """)
+    with gr.Row():
+        with gr.Column(scale=1):
             audio_in = gr.Audio(
+                label="Upload or Record Audio",
                 type="filepath",
                 sources=["upload", "microphone"],
             )
+            input_lang = gr.Dropdown(
+                label="Input Language",
+                choices=list(INPUT_LANG_MAP.keys()),
+                value="Auto Detect",
+            )
+            output_lang = gr.Dropdown(
+                label="Output Language",
+                choices=list(OUTPUT_LANG_MAP.keys()),
+                value="Telugu",
+            )
+            run_btn   = gr.Button("Process Audio", variant="primary", size="lg")
+            status_md = gr.Markdown("Upload audio and press Process.")
         with gr.Column(scale=2):
             with gr.Tabs():
+                with gr.Tab("Text Results"):
                     with gr.Row():
                         with gr.Column():
                             gr.Markdown("#### Original Transcript")
+                            transcript_out = gr.Markdown("Will appear here...")
                         with gr.Column():
                             gr.Markdown("#### Translation")
+                            translation_out = gr.Markdown("Will appear here...")
+                with gr.Tab("Clean Audio"):
                     audio_out = gr.Audio(
+                        label="Denoised Audio",
                         type="filepath",
                         interactive=False,
                     )
+                with gr.Tab("Timings"):
+                    timing_out = gr.Markdown("Timings will appear after processing...")
     run_btn.click(
         fn=process_audio,
         inputs=[audio_in, input_lang, output_lang],
     )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)