Spaces:

testingfaces
/

clearwave-ai

Paused

App Files Files Community

testingfaces commited on 30 days ago

Commit

933850f

verified ·

1 Parent(s): ffe4c21

Upload 3 files

Browse files

Files changed (3) hide show

README.md +51 -6
app.py +272 -0
requirements.txt +9 -0

README.md CHANGED Viewed

@@ -1,14 +1,59 @@
 ---
-title: Clearwave Ai
-emoji: 🌖
 colorFrom: blue
-colorTo: red
 sdk: gradio
-sdk_version: 6.8.0
 app_file: app.py
 pinned: false
 license: mit
-short_description: 'AI audio pipeline: denoise, transcribe, and translate audio '
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: ClearWave AI
+emoji: 🎵
 colorFrom: blue
+colorTo: green
 sdk: gradio
+sdk_version: "4.0.0"
 app_file: app.py
 pinned: false
 license: mit
 ---
+# 🎵 ClearWave AI
+**Professional 3-Department Audio Processing Pipeline**
+Runs 100% free on Hugging Face ZeroGPU (A10G · 24 GB VRAM)
+## What It Does
+Upload any audio file and ClearWave AI runs it through three AI departments:
+| Dept | Model | What it does |
+|------|-------|--------------|
+| 🎙️ Denoiser | DeepFilterNet3 | Removes background noise, EBU R128 normalisation |
+| 📝 Transcriber | Groq Whisper large-v3 | Speech-to-text, 10-20x faster than local Whisper |
+| 🌐 Translator | NLLB-200-distilled-600M | Offline translation, 200 languages |
+**Example:**
+```
+Input  : English audio "Hello this is a test"
+Original (EN)  : Hello this is a test
+Translated (TE): హలో ఇది ఒక పరీక్ష
+Total time     : ~6 seconds
+```
+## Setting Your Groq API Key
+1. Get a free key at [console.groq.com](https://console.groq.com)
+2. In your Space: **Settings → Variables and secrets → New secret**
+3. Name: `GROQ_API_KEY`, Value: your key (`gsk_...`)
+4. Save — Space restarts automatically
+Without a key, the app falls back to local Whisper small (still works, slower).
+## How to Use
+1. Upload any audio file (MP3, WAV, AAC, OGG, M4A, FLAC, M4A, OPUS...)
+2. Set Input Language (or leave as Auto Detect)
+3. Set Output Language
+4. Click **Process Audio**
+5. View results in the Text Results, Clean Audio, and Timings tabs
+## Supported Languages
+English · Telugu · Hindi · Tamil · Kannada (+ 195 more via NLLB-200)
+## Cost
+**$0** — Hugging Face ZeroGPU + Groq free tier (14,400s audio/day)

app.py ADDED Viewed

	@@ -0,0 +1,272 @@

+"""
+ClearWave AI - Cloud Audio Processing Pipeline
+Deployed on Hugging Face Spaces with ZeroGPU
+"""
+import gradio as gr
+import spaces
+import os
+import time
+import tempfile
+import shutil
+from services.denoiser import Denoiser
+from services.transcriber import Transcriber
+from services.translator import Translator
+# ─────────────────────────────────────────────
+# Init all 3 departments ONCE at startup
+# ─────────────────────────────────────────────
+print("🚀 ClearWave AI starting up...")
+denoiser = Denoiser()
+transcriber = Transcriber()
+translator = Translator()
+print("✅ All 3 departments ready!")
+# ─────────────────────────────────────────────
+# Language mappings
+# ─────────────────────────────────────────────
+INPUT_LANG_MAP = {
+    "Auto Detect": "auto",
+    "English":     "en",
+    "Telugu":      "te",
+    "Hindi":       "hi",
+    "Tamil":       "ta",
+    "Kannada":     "kn",
+}
+OUTPUT_LANG_MAP = {
+    "Telugu":  "te",
+    "Hindi":   "hi",
+    "Tamil":   "ta",
+    "English": "en",
+    "Kannada": "kn",
+}
+LANG_BADGES = {
+    "en":   "🇬🇧 English",
+    "te":   "🇮🇳 Telugu",
+    "hi":   "🇮🇳 Hindi",
+    "ta":   "🇮🇳 Tamil",
+    "kn":   "🇮🇳 Kannada",
+    "auto": "🔍 Auto-detected",
+}
+# ─────────────────────────────────────────────
+# Core pipeline
+# ─────────────────────────────────────────────
+@spaces.GPU
+def process_audio(audio_path, input_lang_label, output_lang_label, progress=gr.Progress()):
+    if audio_path is None:
+        return None, "⚠️ Please upload an audio file.", "", "", "❌ No audio uploaded"
+    input_lang  = INPUT_LANG_MAP.get(input_lang_label, "auto")
+    output_lang = OUTPUT_LANG_MAP.get(output_lang_label, "te")
+    temp_dir   = tempfile.mkdtemp(prefix="clearwave_")
+    timings    = {}
+    total_start = time.time()
+    try:
+        # ─── Dept 1: Denoise ─────────────────────────
+        progress(0.05, desc="🎙️ Dept 1 — Denoising audio with DeepFilterNet3…")
+        t0 = time.time()
+        denoised_path = denoiser.process(audio_path, temp_dir)
+        timings["denoise"] = time.time() - t0
+        progress(0.40, desc=f"✅ Denoised in {timings['denoise']:.1f}s")
+        # ─── Dept 2: Transcribe ───────────────────────
+        progress(0.45, desc="📝 Dept 2 — Transcribing with Groq Whisper large-v3…")
+        t0 = time.time()
+        transcript, detected_lang, tx_method = transcriber.transcribe(
+            denoised_path, language=input_lang
+        )
+        timings["transcribe"] = time.time() - t0
+        progress(0.75, desc=f"✅ Transcribed in {timings['transcribe']:.1f}s [{tx_method}]")
+        # ─── Dept 3: Translate ────────────────────────
+        progress(0.80, desc="🌐 Dept 3 — Translating with NLLB-200…")
+        t0 = time.time()
+        effective_src = detected_lang if input_lang == "auto" else input_lang
+        if effective_src == output_lang:
+            translated    = transcript
+            tr_method     = "skipped (same language)"
+        else:
+            translated, tr_method = translator.translate(
+                transcript, src_lang=effective_src, tgt_lang=output_lang
+            )
+        timings["translate"] = time.time() - t0
+        progress(0.95, desc=f"✅ Translated in {timings['translate']:.1f}s [{tr_method}]")
+        total_time = time.time() - total_start
+        # ─── Format outputs ───────────────────────────
+        src_badge = LANG_BADGES.get(effective_src, "🔍 Unknown")
+        tgt_badge = LANG_BADGES.get(output_lang, "🌐")
+        transcript_md = f"**{src_badge}**\n\n{transcript}"
+        translated_md = f"**{tgt_badge}**\n\n{translated}"
+        timing_md = (
+            f"### ⏱️ Processing Times\n\n"
+            f"| Department | Time | Method |\n"
+            f"|---|---|---|\n"
+            f"| 🎙️ Denoiser (Dept 1) | `{timings['denoise']:.1f}s` | DeepFilterNet3 |\n"
+            f"| 📝 Transcriber (Dept 2) | `{timings['transcribe']:.1f}s` | {tx_method} |\n"
+            f"| 🌐 Translator (Dept 3) | `{timings['translate']:.1f}s` | {tr_method} |\n"
+            f"| **⚡ Total** | **`{total_time:.1f}s`** | 3-dept pipeline |\n\n"
+            f"> Running on Hugging Face ZeroGPU (A10G 24GB) — 100% free"
+        )
+        progress(1.0, desc=f"🎉 Complete! {total_time:.1f}s total")
+        # Copy denoised file to stable output path
+        out_audio = os.path.join(temp_dir, "clearwave_denoised.wav")
+        shutil.copy(denoised_path, out_audio)
+        return (
+            out_audio,
+            transcript_md,
+            translated_md,
+            timing_md,
+            f"✅ Pipeline complete in {total_time:.1f}s"
+        )
+    except Exception as e:
+        import traceback
+        err = traceback.format_exc()
+        print(f"[ClearWave] Pipeline error:\n{err}")
+        # Clean up temp on error
+        shutil.rmtree(temp_dir, ignore_errors=True)
+        return (
+            None,
+            f"❌ Error: {str(e)}",
+            "",
+            f"**Error details:**\n```\n{err}\n```",
+            f"❌ Failed — {str(e)}"
+        )
+# ─────────────────────────────────────────────
+# UI
+# ─────────────────────────────────────────────
+CSS = """
+body, .gradio-container { background:#0d1117 !important; color:#e6edf3 !important; }
+.header-wrap {
+    background: linear-gradient(135deg,#161b22,#1c2128);
+    border:1px solid #30363d; border-radius:12px;
+    padding:28px 32px; margin-bottom:18px; text-align:center;
+}
+.header-wrap h1 {
+    font-size:2.2em; font-weight:700; margin:0 0 6px;
+    background:linear-gradient(90deg,#58a6ff,#3fb950,#f78166);
+    -webkit-background-clip:text; -webkit-text-fill-color:transparent;
+}
+.header-wrap p { color:#8b949e; font-size:0.98em; margin:0; }
+.pipe-strip {
+    display:flex; gap:8px; justify-content:center; flex-wrap:wrap; margin-bottom:14px;
+}
+.dept-pill {
+    background:#21262d; border:1px solid #30363d;
+    border-radius:20px; padding:5px 14px;
+    font-size:0.82em; color:#8b949e;
+}
+.panel { background:#161b22 !important; border:1px solid #30363d !important; border-radius:10px !important; }
+footer { display:none !important; }
+"""
+with gr.Blocks(css=CSS, title="ClearWave AI", theme=gr.themes.Base()) as demo:
+    # Header
+    gr.HTML("""
+    <div class="header-wrap">
+        <h1>🎵 ClearWave AI</h1>
+        <p>Professional 3-Department Audio Processing Pipeline · ZeroGPU · 100% Free</p>
+    </div>
+    <div class="pipe-strip">
+        <span class="dept-pill">🎙️ Dept 1 · DeepFilterNet3 Denoiser</span>
+        <span class="dept-pill">📝 Dept 2 · Groq Whisper large-v3</span>
+        <span class="dept-pill">🌐 Dept 3 · NLLB-200 Translator</span>
+    </div>
+    """)
+    with gr.Row(equal_height=False):
+        # ── Left: Input controls ──────────────────────
+        with gr.Column(scale=1, min_width=280):
+            audio_in = gr.Audio(
+                label="🎤 Upload or Record Audio",
+                type="filepath",
+                sources=["upload", "microphone"],
+            )
+            with gr.Group():
+                input_lang  = gr.Dropdown(
+                    label="Input Language",
+                    choices=list(INPUT_LANG_MAP.keys()),
+                    value="Auto Detect",
+                )
+                output_lang = gr.Dropdown(
+                    label="Output Language",
+                    choices=list(OUTPUT_LANG_MAP.keys()),
+                    value="Telugu",
+                )
+            run_btn    = gr.Button("⚡ Process Audio", variant="primary", size="lg")
+            status_md  = gr.Markdown("*Upload audio and press Process.*")
+        # ── Right: Results ────────────────────────────
+        with gr.Column(scale=2):
+            with gr.Tabs():
+                with gr.Tab("📝 Text Results"):
+                    with gr.Row():
+                        with gr.Column():
+                            gr.Markdown("#### Original Transcript")
+                            transcript_out = gr.Markdown("*Will appear here…*")
+                        with gr.Column():
+                            gr.Markdown("#### Translation")
+                            translation_out = gr.Markdown("*Will appear here…*")
+                with gr.Tab("🎵 Clean Audio"):
+                    audio_out = gr.Audio(
+                        label="Denoised Audio (download)",
+                        type="filepath",
+                        interactive=False,
+                    )
+                    gr.Markdown(
+                        "*Noise-cancelled with DeepFilterNet3, "
+                        "normalized to EBU R128 broadcast standard.*"
+                    )
+                with gr.Tab("⏱️ Timings"):
+                    timing_out = gr.Markdown("*Timings will appear after processing…*")
+    # Footer
+    gr.HTML("""
+    <div style="text-align:center;padding:16px;color:#484f58;font-size:0.8em;
+                border-top:1px solid #21262d;margin-top:16px;">
+        ClearWave AI · DeepFilterNet3 + Groq Whisper large-v3 + NLLB-200-distilled-600M ·
+        Hugging Face ZeroGPU (A10G 24GB)
+    </div>
+    """)
+    # Wire up
+    run_btn.click(
+        fn=process_audio,
+        inputs=[audio_in, input_lang, output_lang],
+        outputs=[audio_out, transcript_out, translation_out, timing_out, status_md],
+        show_progress=True,
+    )
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True,
+        max_file_size="100mb",
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+deepfilternet
+soundfile
+pyloudnorm
+groq
+faster-whisper
+sentencepiece
+sacremoses
+deep-translator
+gradio>=4.0.0