Spaces:

testingfaces
/

clearwave-ai

Paused

App Files Files Community

testingfaces commited on Mar 7

Commit

7c3a40a

verified ·

1 Parent(s): 6c1060d

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -156

app.py CHANGED Viewed

@@ -1,190 +1,131 @@
-# ── Python 3.13 compatibility patch (must be first) ──
 import sys
 import types
-_audioop = types.ModuleType('audioop')
-sys.modules['audioop'] = _audioop
-sys.modules['pyaudioop'] = _audioop
-"""
-ClearWave AI - Cloud Audio Processing Pipeline
-Deployed on Hugging Face Spaces
-"""
 import gradio as gr
 import os
 import time
 import tempfile
 import shutil
-from denoiser import Denoiser
-from transcriber import Transcriber
-from translator import Translator
-# Init all 3 departments ONCE at startup
-print("ClearWave AI starting up...")
-denoiser = Denoiser()
-transcriber = Transcriber()
-translator = Translator()
-print("All 3 departments ready!")
-# ── Language mappings ─────────────────────────────────
-INPUT_LANG_MAP = {
-    "Auto Detect": "auto",
-    "English":     "en",
-    "Telugu":      "te",
-    "Hindi":       "hi",
-    "Tamil":       "ta",
-    "Kannada":     "kn",
-}
-OUTPUT_LANG_MAP = {
-    "Telugu":  "te",
-    "Hindi":   "hi",
-    "Tamil":   "ta",
-    "English": "en",
-    "Kannada": "kn",
 }
-LANG_BADGES = {
-    "en":   "English",
-    "te":   "Telugu",
-    "hi":   "Hindi",
-    "ta":   "Tamil",
-    "kn":   "Kannada",
-    "auto": "Auto-detected",
-}
-# ── Core pipeline ─────────────────────────────────────
-def process_audio(audio_path, input_lang_label, output_lang_label, progress=gr.Progress()):
     if audio_path is None:
-        return None, "Please upload an audio file.", "", "", "No audio uploaded"
-    input_lang  = INPUT_LANG_MAP.get(input_lang_label, "auto")
-    output_lang = OUTPUT_LANG_MAP.get(output_lang_label, "te")
-    temp_dir    = tempfile.mkdtemp(prefix="clearwave_")
-    timings     = {}
-    total_start = time.time()
     try:
-        # Dept 1: Denoise
-        progress(0.05, desc="Dept 1 - Denoising audio...")
         t0 = time.time()
-        denoised_path = denoiser.process(audio_path, temp_dir)
-        timings["denoise"] = time.time() - t0
-        progress(0.40, desc=f"Denoised in {timings['denoise']:.1f}s")
-        # Dept 2: Transcribe
-        progress(0.45, desc="Dept 2 - Transcribing with Groq Whisper...")
         t0 = time.time()
-        transcript, detected_lang, tx_method = transcriber.transcribe(
-            denoised_path, language=input_lang
-        )
-        timings["transcribe"] = time.time() - t0
-        progress(0.75, desc=f"Transcribed in {timings['transcribe']:.1f}s [{tx_method}]")
-        # Dept 3: Translate
-        progress(0.80, desc="Dept 3 - Translating with NLLB-200...")
         t0 = time.time()
-        effective_src = detected_lang if input_lang == "auto" else input_lang
-        if effective_src == output_lang:
-            translated = transcript
-            tr_method  = "skipped (same language)"
-        else:
-            translated, tr_method = translator.translate(
-                transcript, src_lang=effective_src, tgt_lang=output_lang
-            )
-        timings["translate"] = time.time() - t0
-        progress(0.95, desc=f"Translated in {timings['translate']:.1f}s [{tr_method}]")
-        total_time = time.time() - total_start
-        src_badge = LANG_BADGES.get(effective_src, "Unknown")
-        tgt_badge = LANG_BADGES.get(output_lang, "Unknown")
-        transcript_md = f"**{src_badge}**\n\n{transcript}"
-        translated_md = f"**{tgt_badge}**\n\n{translated}"
-        timing_md = (
-            f"### Processing Times\n\n"
-            f"| Department | Time | Method |\n"
-            f"|---|---|---|\n"
-            f"| Denoiser (Dept 1) | `{timings['denoise']:.1f}s` | noisereduce |\n"
-            f"| Transcriber (Dept 2) | `{timings['transcribe']:.1f}s` | {tx_method} |\n"
-            f"| Translator (Dept 3) | `{timings['translate']:.1f}s` | {tr_method} |\n"
-            f"| **Total** | **`{total_time:.1f}s`** | 3-dept pipeline |"
-        )
-        progress(1.0, desc=f"Complete! {total_time:.1f}s")
-        out_audio = os.path.join(temp_dir, "clearwave_denoised.wav")
-        shutil.copy(denoised_path, out_audio)
-        return out_audio, transcript_md, translated_md, timing_md, f"Done in {total_time:.1f}s"
     except Exception as e:
         import traceback
-        err = traceback.format_exc()
-        print(f"Pipeline error:\n{err}")
-        shutil.rmtree(temp_dir, ignore_errors=True)
-        return None, f"Error: {str(e)}", "", f"```\n{err}\n```", f"Failed: {str(e)}"
-# ── Gradio UI ─────────────────────────────────────────
 with gr.Blocks(title="ClearWave AI") as demo:
-    gr.Markdown("""
-    # ClearWave AI
-    **3-Department Audio Pipeline: Denoise → Transcribe → Translate**
-    """)
     with gr.Row():
         with gr.Column(scale=1):
-            audio_in = gr.Audio(
-                label="Upload or Record Audio",
-                type="filepath",
-                sources=["upload", "microphone"],
-            )
-            input_lang = gr.Dropdown(
-                label="Input Language",
-                choices=list(INPUT_LANG_MAP.keys()),
-                value="Auto Detect",
-            )
-            output_lang = gr.Dropdown(
-                label="Output Language",
-                choices=list(OUTPUT_LANG_MAP.keys()),
-                value="Telugu",
-            )
-            run_btn   = gr.Button("Process Audio", variant="primary", size="lg")
-            status_md = gr.Markdown("Upload audio and press Process.")
         with gr.Column(scale=2):
             with gr.Tabs():
-                with gr.Tab("Text Results"):
                     with gr.Row():
                         with gr.Column():
-                            gr.Markdown("#### Original Transcript")
-                            transcript_out = gr.Markdown("Will appear here...")
                         with gr.Column():
                             gr.Markdown("#### Translation")
-                            translation_out = gr.Markdown("Will appear here...")
                 with gr.Tab("Clean Audio"):
-                    audio_out = gr.Audio(
-                        label="Denoised Audio",
-                        type="filepath",
-                        interactive=False,
-                    )
                 with gr.Tab("Timings"):
-                    timing_out = gr.Markdown("Timings will appear after processing...")
-    run_btn.click(
-        fn=process_audio,
-        inputs=[audio_in, input_lang, output_lang],
-        outputs=[audio_out, transcript_out, translation_out, timing_out, status_md],
-        show_progress=True,
-    )
-if __name__ == "__main__":
-    demo.launch()

+# Fix pydub on Python 3.13
 import sys
 import types
+_a = types.ModuleType('audioop')
+sys.modules['audioop'] = _a
+sys.modules['pyaudioop'] = _a
 import gradio as gr
 import os
 import time
 import tempfile
 import shutil
+import subprocess
+import numpy as np
+print("ClearWave AI starting...")
+INPUT_LANGS  = ["Auto Detect","English","Telugu","Hindi","Tamil","Kannada"]
+OUTPUT_LANGS = ["Telugu","Hindi","Tamil","English","Kannada"]
+LANG_CODES = {
+    "Auto Detect":"auto","English":"en","Telugu":"te",
+    "Hindi":"hi","Tamil":"ta","Kannada":"kn"
 }
+def denoise(audio_path, out_dir):
+    import soundfile as sf
+    import noisereduce as nr
+    wav = os.path.join(out_dir, "input.wav")
+    subprocess.run(["ffmpeg","-y","-i",audio_path,"-ar","16000","-ac","1","-f","wav",wav],capture_output=True)
+    data, sr = sf.read(wav)
+    data = data.astype(np.float32)
+    cleaned = nr.reduce_noise(y=data, sr=sr).astype(np.float32)
+    peak = np.abs(cleaned).max()
+    if peak > 0:
+        cleaned = cleaned / peak * 0.9
+    out = os.path.join(out_dir, "denoised.wav")
+    sf.write(out, cleaned, sr)
+    return out
+def transcribe(audio_path, language="auto"):
+    groq_key = os.environ.get("GROQ_API_KEY","")
+    if not groq_key:
+        return "No GROQ_API_KEY set.", "en", "no key"
+    from groq import Groq
+    client = Groq(api_key=groq_key)
+    with open(audio_path, "rb") as f:
+        kwargs = dict(file=f, model="whisper-large-v3", response_format="verbose_json", temperature=0.0)
+        if language and language != "auto":
+            kwargs["language"] = language
+        resp = client.audio.transcriptions.create(**kwargs)
+    text = resp.text.strip()
+    lang = getattr(resp, "language", language or "en") or "en"
+    lang_map = {"english":"en","telugu":"te","hindi":"hi","tamil":"ta","kannada":"kn"}
+    lang = lang_map.get(lang.lower(), lang[:2].lower() if len(lang)>=2 else lang)
+    return text, lang, "Groq Whisper large-v3"
+def translate(text, src, tgt):
+    if src == tgt or not text.strip():
+        return text, "skipped"
+    try:
+        from deep_translator import GoogleTranslator
+        result = GoogleTranslator(source=src, target=tgt).translate(text)
+        return result, "Google Translate"
+    except Exception as e:
+        return f"Translation failed: {e}", "error"
+def process(audio_path, in_lang_label, out_lang_label, progress=gr.Progress()):
     if audio_path is None:
+        return None, "Please upload audio.", "", "", "No audio"
+    in_lang  = LANG_CODES.get(in_lang_label, "auto")
+    out_lang = LANG_CODES.get(out_lang_label, "te")
+    tmp = tempfile.mkdtemp()
+    t_total = time.time()
     try:
+        progress(0.1, desc="Denoising...")
         t0 = time.time()
+        clean = denoise(audio_path, tmp)
+        t1 = time.time() - t0
+        progress(0.4, desc="Transcribing with Groq...")
         t0 = time.time()
+        transcript, detected, tx_m = transcribe(clean, in_lang)
+        t2 = time.time() - t0
+        progress(0.75, desc="Translating...")
         t0 = time.time()
+        src = detected if in_lang == "auto" else in_lang
+        translated, tr_m = translate(transcript, src, out_lang)
+        t3 = time.time() - t0
+        total = time.time() - t_total
+        progress(1.0, desc=f"Done in {total:.1f}s!")
+        timing = (f"| Step | Time | Method |\n|---|---|---|\n"
+                  f"| Denoise | {t1:.1f}s | noisereduce |\n"
+                  f"| Transcribe | {t2:.1f}s | {tx_m} |\n"
+                  f"| Translate | {t3:.1f}s | {tr_m} |\n"
+                  f"| **Total** | **{total:.1f}s** | |")
+        out_audio = os.path.join(tmp, "output.wav")
+        shutil.copy(clean, out_audio)
+        return out_audio, transcript, translated, timing, f"Done in {total:.1f}s"
     except Exception as e:
         import traceback
+        return None, f"Error: {e}", "", traceback.format_exc(), "Failed"
 with gr.Blocks(title="ClearWave AI") as demo:
+    gr.Markdown("# ClearWave AI\n**Denoise → Transcribe → Translate**")
     with gr.Row():
         with gr.Column(scale=1):
+            audio_in = gr.Audio(label="Upload Audio", type="filepath", sources=["upload","microphone"])
+            in_lang = gr.Dropdown(INPUT_LANGS, value="Auto Detect", label="Input Language")
+            out_lang = gr.Dropdown(OUTPUT_LANGS, value="Telugu", label="Output Language")
+            run_btn = gr.Button("Process Audio", variant="primary", size="lg")
+            status = gr.Markdown("Upload audio and click Process.")
         with gr.Column(scale=2):
             with gr.Tabs():
+                with gr.Tab("Text"):
                     with gr.Row():
                         with gr.Column():
+                            gr.Markdown("#### Transcript")
+                            transcript_out = gr.Markdown("...")
                         with gr.Column():
                             gr.Markdown("#### Translation")
+                            translation_out = gr.Markdown("...")
                 with gr.Tab("Clean Audio"):
+                    audio_out = gr.Audio(label="Denoised", type="filepath", interactive=False)
                 with gr.Tab("Timings"):
+                    timing_out = gr.Markdown("...")
+    run_btn.click(fn=process, inputs=[audio_in, in_lang, out_lang],
+                  outputs=[audio_out, transcript_out, translation_out, timing_out, status],
+                  show_progress=True)
+print("ClearWave AI ready!")
+demo.launch()