Spaces:

testingfaces
/

clearwave-ai

Paused

App Files Files Community

testingfaces commited on 17 days ago

Commit

195eeb1

verified ·

1 Parent(s): 7c63ce0

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -7

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import json
 import base64
 import tempfile
 import logging
 import threading
 import time
 import requests
@@ -36,6 +37,40 @@ LANGUAGES_DISPLAY = {
 }
 OUT_LANGS = {k: v for k, v in LANGUAGES_DISPLAY.items() if k != "Auto Detect"}
 # ══════════════════════════════════════════════════════════════════════
@@ -121,7 +156,7 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
 # ══════════════════════════════════════════════════════════════════════
-# GRADIO UI — internal port 7861
 # ══════════════════════════════════════════════════════════════════════
 def process_audio_gradio(audio_path, in_lang_name, out_lang_name,
                           opt_fillers, opt_stutters, opt_silences,
@@ -129,6 +164,10 @@ def process_audio_gradio(audio_path, in_lang_name, out_lang_name,
     if audio_path is None:
         yield ("❌ Please upload an audio file.", "", "", None, "", "")
         return
     src_lang = LANGUAGES_DISPLAY.get(in_lang_name, "auto")
     tgt_lang = LANGUAGES_DISPLAY.get(out_lang_name, "te")
     for result in run_pipeline(audio_path, src_lang, tgt_lang,
@@ -159,8 +198,11 @@ with gr.Blocks(title="ClearWave AI") as demo:
     gr.Markdown("# 🎵 ClearWave AI\n### Professional Audio Enhancement")
     with gr.Row():
         with gr.Column(scale=1):
-            audio_in = gr.Audio(label="📁 Upload Audio", type="filepath",
-                                sources=["upload", "microphone"],format="mp3")
             with gr.Row():
                 in_lang  = gr.Dropdown(label="Input Language",
                                        choices=list(LANGUAGES_DISPLAY.keys()),
@@ -215,6 +257,9 @@ async def api_health():
 @demo.app.post("/api/process-url")
 async def api_process_url(request: _Request):
     data         = await request.json()
     audio_url    = data.get("audioUrl")
     audio_id     = data.get("audioId",     "")
     src_lang     = data.get("srcLang",     "auto")
@@ -240,7 +285,16 @@ async def api_process_url(request: _Request):
         try:
             resp = requests.get(audio_url, timeout=60, stream=True)
             resp.raise_for_status()
-            suffix = ".wav" if "wav" in audio_url.lower() else ".mp3"
             tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
             downloaded = 0
             total = int(resp.headers.get("content-length", 0))
@@ -250,13 +304,17 @@ async def api_process_url(request: _Request):
                     downloaded += len(chunk)
                     if total:
                         pct = int(downloaded * 100 / total)
-                        yield sse({"status": "processing", "step": 0, "message": "Downloading... " + str(pct) + "%"})
             tmp.close()
         except Exception as e:
             yield sse({"status": "error", "message": "Download failed: " + str(e)})
             return
-        for result in run_pipeline(tmp.name, src_lang, tgt_lang,
                                    opt_fillers, opt_stutters, opt_silences,
                                    opt_breaths, opt_mouth):
             result["audioId"] = audio_id
@@ -264,6 +322,8 @@ async def api_process_url(request: _Request):
         try:
             os.unlink(tmp.name)
         except Exception:
             pass
@@ -276,7 +336,7 @@ async def api_process_url(request: _Request):
 logger.info("✅ /api/health and /api/process-url registered on demo.app")
 # ══════════════════════════════════════════════════════════════════════
-# LAUNCH — same pattern as working HF spaces
 # ══════════════════════════════════════════════════════════════════════
 if __name__ == "__main__":
     demo.launch()

 import base64
 import tempfile
 import logging
+import subprocess
 import threading
 import time
 import requests
 }
 OUT_LANGS = {k: v for k, v in LANGUAGES_DISPLAY.items() if k != "Auto Detect"}
+# ══════════════════════════════════════════════════════════════════════
+# AUDIO FORMAT CONVERTER — supports .mpeg, .mp4, .m4a etc.
+# ══════════════════════════════════════════════════════════════════════
+def convert_to_wav(audio_path: str) -> str:
+    """
+    Convert any audio format (including .mpeg, .mp4, .m4a) to .wav
+    so the pipeline can process it reliably.
+    Returns path to converted .wav file (or original if already .wav).
+    """
+    if audio_path is None:
+        return audio_path
+    ext = os.path.splitext(audio_path)[1].lower()
+    # Already a safe format — no conversion needed
+    if ext in [".wav", ".mp3", ".flac", ".ogg", ".aac"]:
+        return audio_path
+    # Convert .mpeg / .mp4 / .m4a / .wma / .amr etc. → .wav
+    try:
+        converted = audio_path + "_converted.wav"
+        result = subprocess.run([
+            "ffmpeg", "-y", "-i", audio_path,
+            "-ar", "16000",
+            "-ac", "1",
+            "-acodec", "pcm_s16le",
+            converted
+        ], capture_output=True)
+        if result.returncode == 0 and os.path.exists(converted):
+            logger.info(f"Converted {ext} → .wav successfully")
+            return converted
+        else:
+            logger.warning(f"Conversion failed: {result.stderr.decode()}")
+            return audio_path
+    except Exception as e:
+        logger.warning(f"Conversion error: {e}")
+        return audio_path
 # ══════════════════════════════════════════════════════════════════════
 # ══════════════════════════════════════════════════════════════════════
+# GRADIO UI
 # ══════════════════════════════════════════════════════════════════════
 def process_audio_gradio(audio_path, in_lang_name, out_lang_name,
                           opt_fillers, opt_stutters, opt_silences,
     if audio_path is None:
         yield ("❌ Please upload an audio file.", "", "", None, "", "")
         return
+    # ✅ Auto-convert .mpeg / .mp4 / .m4a and any unsupported format → .wav
+    audio_path = convert_to_wav(audio_path)
     src_lang = LANGUAGES_DISPLAY.get(in_lang_name, "auto")
     tgt_lang = LANGUAGES_DISPLAY.get(out_lang_name, "te")
     for result in run_pipeline(audio_path, src_lang, tgt_lang,
     gr.Markdown("# 🎵 ClearWave AI\n### Professional Audio Enhancement")
     with gr.Row():
         with gr.Column(scale=1):
+            audio_in = gr.Audio(
+                label="📁 Upload Audio (MP3, WAV, MPEG, MP4, AAC, OGG, FLAC, AMR...)",
+                type="filepath",
+                sources=["upload", "microphone"],
+            )
             with gr.Row():
                 in_lang  = gr.Dropdown(label="Input Language",
                                        choices=list(LANGUAGES_DISPLAY.keys()),
 @demo.app.post("/api/process-url")
 async def api_process_url(request: _Request):
     data         = await request.json()
+    # Handle both plain JSON and Gradio-wrapped {"data": {...}}
+    if "data" in data and isinstance(data["data"], dict):
+        data = data["data"]
     audio_url    = data.get("audioUrl")
     audio_id     = data.get("audioId",     "")
     src_lang     = data.get("srcLang",     "auto")
         try:
             resp = requests.get(audio_url, timeout=60, stream=True)
             resp.raise_for_status()
+            # ✅ Detect correct suffix from URL
+            url_lower = audio_url.lower()
+            if "wav" in url_lower:
+                suffix = ".wav"
+            elif "mpeg" in url_lower:
+                suffix = ".mpeg"
+            elif "mp4" in url_lower:
+                suffix = ".mp4"
+            else:
+                suffix = ".mp3"
             tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
             downloaded = 0
             total = int(resp.headers.get("content-length", 0))
                     downloaded += len(chunk)
                     if total:
                         pct = int(downloaded * 100 / total)
+                        yield sse({"status": "processing", "step": 0,
+                                   "message": "Downloading... " + str(pct) + "%"})
             tmp.close()
         except Exception as e:
             yield sse({"status": "error", "message": "Download failed: " + str(e)})
             return
+        # ✅ Convert to wav if needed
+        converted_path = convert_to_wav(tmp.name)
+        for result in run_pipeline(converted_path, src_lang, tgt_lang,
                                    opt_fillers, opt_stutters, opt_silences,
                                    opt_breaths, opt_mouth):
             result["audioId"] = audio_id
         try:
             os.unlink(tmp.name)
+            if converted_path != tmp.name:
+                os.unlink(converted_path)
         except Exception:
             pass
 logger.info("✅ /api/health and /api/process-url registered on demo.app")
 # ══════════════════════════════════════════════════════════════════════
+# LAUNCH
 # ══════════════════════════════════════════════════════════════════════
 if __name__ == "__main__":
     demo.launch()