Spaces:

testingfaces
/

clearwave-ai

Paused

App Files Files Community

testingfaces commited on Mar 7

Commit

dde471f

verified ·

1 Parent(s): d8d0e03

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -70

app.py CHANGED Viewed

@@ -15,98 +15,96 @@ import numpy as np
 print("ClearWave AI starting...")
-INPUT_LANGS  = ["Auto Detect","English","Telugu","Hindi","Tamil","Kannada"]
-OUTPUT_LANGS = ["Telugu","Hindi","Tamil","English","Kannada"]
-LANG_CODES   = {"Auto Detect":"auto","English":"en","Telugu":"te","Hindi":"hi","Tamil":"ta","Kannada":"kn"}
-def denoise(audio_path, out_dir):
-    import soundfile as sf
-    import noisereduce as nr
-    wav = os.path.join(out_dir, "input.wav")
-    subprocess.run(["ffmpeg","-y","-i",audio_path,"-ar","16000","-ac","1",wav], capture_output=True)
-    data, sr = sf.read(wav)
-    data = data.astype(np.float32)
-    try:
-        cleaned = nr.reduce_noise(y=data, sr=sr).astype(np.float32)
-    except Exception:
-        cleaned = data
-    peak = np.abs(cleaned).max()
-    if peak > 0:
-        cleaned = cleaned / peak * 0.9
-    out = os.path.join(out_dir, "denoised.wav")
-    sf.write(out, cleaned, sr)
-    return out
-def transcribe(audio_path, language="auto"):
-    groq_key = os.environ.get("GROQ_API_KEY","")
-    if not groq_key:
-        return "No GROQ_API_KEY set. Add it in Space Settings Secrets.", "en", "no key"
-    from groq import Groq
-    client = Groq(api_key=groq_key)
-    with open(audio_path, "rb") as f:
-        kwargs = dict(file=("audio.wav", f, "audio/wav"),
-                      model="whisper-large-v3",
-                      response_format="verbose_json",
-                      temperature=0.0)
-        if language and language != "auto":
-            kwargs["language"] = language
-        resp = client.audio.transcriptions.create(**kwargs)
-    text = resp.text.strip()
-    lang = getattr(resp, "language", None) or language or "en"
-    lang_map = {"english":"en","telugu":"te","hindi":"hi","tamil":"ta","kannada":"kn"}
-    lang = lang_map.get(lang.lower(), lang[:2].lower() if len(lang) >= 2 else "en")
-    return text, lang, "Groq Whisper large-v3"
-def translate(text, src, tgt):
-    if not text.strip() or src == tgt:
-        return text, "skipped"
-    try:
-        from deep_translator import GoogleTranslator
-        return GoogleTranslator(source=src, target=tgt).translate(text), "Google Translate"
-    except Exception as e:
-        return f"Translation error: {e}", "error"
 def process(audio_path, in_lang_label, out_lang_label, progress=gr.Progress()):
     if audio_path is None:
         return None, "Please upload audio.", "", "", "No audio"
     in_lang  = LANG_CODES.get(in_lang_label, "auto")
     out_lang = LANG_CODES.get(out_lang_label, "te")
-    tmp = tempfile.mkdtemp()
-    t_total = time.time()
     try:
         progress(0.1, desc="Dept 1: Denoising...")
-        t0 = time.time(); clean = denoise(audio_path, tmp); t1 = time.time()-t0
         progress(0.4, desc="Dept 2: Transcribing...")
-        t0 = time.time(); transcript, detected, tx_m = transcribe(clean, in_lang); t2 = time.time()-t0
         progress(0.75, desc="Dept 3: Translating...")
-        src = detected if in_lang == "auto" else in_lang
-        t0 = time.time(); translated, tr_m = translate(transcript, src, out_lang); t3 = time.time()-t0
-        total = time.time()-t_total
         progress(1.0, desc=f"Done in {total:.1f}s!")
-        timing = (f"| Step | Time | Method |\n|---|---|---|\n"
-                  f"| Denoise | {t1:.1f}s | noisereduce |\n"
-                  f"| Transcribe | {t2:.1f}s | {tx_m} |\n"
-                  f"| Translate | {t3:.1f}s | {tr_m} |\n"
-                  f"| **Total** | **{total:.1f}s** | |")
         out_audio = os.path.join(tmp, "output.wav")
         shutil.copy(clean, out_audio)
         return out_audio, transcript, translated, timing, f"Done in {total:.1f}s"
     except Exception as e:
         import traceback
         return None, f"Error: {e}", "", traceback.format_exc(), "Failed"
 with gr.Blocks(title="ClearWave AI", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# ClearWave AI\n**Denoise - Transcribe - Translate**")
     with gr.Row():
         with gr.Column(scale=1):
-            audio_in    = gr.Audio(label="Upload Audio", type="filepath", sources=["upload","microphone"])
-            in_lang     = gr.Dropdown(INPUT_LANGS,  value="Auto Detect", label="Input Language")
-            out_lang    = gr.Dropdown(OUTPUT_LANGS, value="Telugu",      label="Output Language")
-            run_btn     = gr.Button("Process Audio", variant="primary", size="lg")
-            status      = gr.Markdown("Upload audio and click Process.")
         with gr.Column(scale=2):
             with gr.Tabs():
                 with gr.Tab("Text"):
@@ -118,15 +116,20 @@ with gr.Blocks(title="ClearWave AI", theme=gr.themes.Soft()) as demo:
                             gr.Markdown("#### Translation")
                             translation_out = gr.Markdown("...")
                 with gr.Tab("Clean Audio"):
-                    audio_out = gr.Audio(label="Denoised", type="filepath", interactive=False)
                 with gr.Tab("Timings"):
                     timing_out = gr.Markdown("...")
     run_btn.click(
         fn=process,
         inputs=[audio_in, in_lang, out_lang],
         outputs=[audio_out, transcript_out, translation_out, timing_out, status],
         show_progress=True,
-        api_name=False,  # Fixes: TypeError: argument of type 'bool' is not iterable
     )
 print("ClearWave AI ready!")

 print("ClearWave AI starting...")
+# ── Services ────────────────────────────────────────────────────────
+from services.denoiser     import Denoiser
+from services.transcriber  import Transcriber
+# ✅ FIX: Now using the full Translator class (NLLB-1.3B + Google fallback)
+#         Previously app.py had its own inline translate() that only used
+#         Google Translate and completely ignored translator.py
+from services.translator   import Translator
+_denoiser    = Denoiser()
+_transcriber = Transcriber()
+_translator  = Translator()
+# ── Config ───────────────────────────────────────────────────────────
+INPUT_LANGS  = ["Auto Detect", "English", "Telugu", "Hindi", "Tamil", "Kannada"]
+OUTPUT_LANGS = ["Telugu", "Hindi", "Tamil", "English", "Kannada"]
+LANG_CODES   = {
+    "Auto Detect": "auto",
+    "English":     "en",
+    "Telugu":      "te",
+    "Hindi":       "hi",
+    "Tamil":       "ta",
+    "Kannada":     "kn",
+}
+# ── Pipeline ─────────────────────────────────────────────────────────
 def process(audio_path, in_lang_label, out_lang_label, progress=gr.Progress()):
     if audio_path is None:
         return None, "Please upload audio.", "", "", "No audio"
     in_lang  = LANG_CODES.get(in_lang_label, "auto")
     out_lang = LANG_CODES.get(out_lang_label, "te")
+    tmp      = tempfile.mkdtemp()
+    t_total  = time.time()
     try:
+        # Dept 1 — Denoise
         progress(0.1, desc="Dept 1: Denoising...")
+        t0    = time.time()
+        clean = _denoiser.process(audio_path, tmp)
+        t1    = time.time() - t0
+        # Dept 2 — Transcribe
         progress(0.4, desc="Dept 2: Transcribing...")
+        t0                          = time.time()
+        transcript, detected, tx_m  = _transcriber.transcribe(clean, in_lang)
+        t2                          = time.time() - t0
+        # Dept 3 — Translate
         progress(0.75, desc="Dept 3: Translating...")
+        src          = detected if in_lang == "auto" else in_lang
+        t0           = time.time()
+        translated, tr_m = _translator.translate(transcript, src, out_lang)
+        t3           = time.time() - t0
+        total = time.time() - t_total
         progress(1.0, desc=f"Done in {total:.1f}s!")
+        timing = (
+            f"| Step | Time | Method |\n|---|---|---|\n"
+            f"| Denoise    | {t1:.1f}s | noisereduce |\n"
+            f"| Transcribe | {t2:.1f}s | {tx_m} |\n"
+            f"| Translate  | {t3:.1f}s | {tr_m} |\n"
+            f"| **Total**  | **{total:.1f}s** | |"
+        )
         out_audio = os.path.join(tmp, "output.wav")
         shutil.copy(clean, out_audio)
         return out_audio, transcript, translated, timing, f"Done in {total:.1f}s"
     except Exception as e:
         import traceback
         return None, f"Error: {e}", "", traceback.format_exc(), "Failed"
+# ── UI ───────────────────────────────────────────────────────────────
 with gr.Blocks(title="ClearWave AI", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎵 ClearWave AI\n**Denoise · Transcribe · Translate**")
     with gr.Row():
         with gr.Column(scale=1):
+            audio_in = gr.Audio(
+                label="Upload Audio",
+                type="filepath",
+                sources=["upload", "microphone"],
+            )
+            in_lang  = gr.Dropdown(INPUT_LANGS,  value="Auto Detect", label="Input Language")
+            out_lang = gr.Dropdown(OUTPUT_LANGS, value="Telugu",      label="Output Language")
+            run_btn  = gr.Button("Process Audio", variant="primary", size="lg")
+            status   = gr.Markdown("Upload audio and click Process.")
         with gr.Column(scale=2):
             with gr.Tabs():
                 with gr.Tab("Text"):
                             gr.Markdown("#### Translation")
                             translation_out = gr.Markdown("...")
                 with gr.Tab("Clean Audio"):
+                    audio_out = gr.Audio(
+                        label="Denoised",
+                        type="filepath",
+                        interactive=False,
+                    )
                 with gr.Tab("Timings"):
                     timing_out = gr.Markdown("...")
     run_btn.click(
         fn=process,
         inputs=[audio_in, in_lang, out_lang],
         outputs=[audio_out, transcript_out, translation_out, timing_out, status],
         show_progress=True,
+        api_name=False,
     )
 print("ClearWave AI ready!")