Spaces:

Clearwave48
/

clearwave-api

Sleeping

App Files Files Community

Clearwave48 commited on 9 days ago

Commit

2fcb053

verified ·

1 Parent(s): c42513e

Update main.py

Browse files

Files changed (1) hide show

main.py +64 -153

main.py CHANGED Viewed

@@ -1,11 +1,18 @@
 """
-ClearWave AI — API Space (FastAPI only)
-Handles /api/health and /api/process-url
-Audio enhancement  : Cleanvoice API (noise, fillers, stutters, silences, breaths)
-Transcription      : Groq Whisper large-v3 (primary) / faster-whisper (fallback)
-Translation        : NLLB-200-1.3B (primary) / Google Translate (fallback)
-Summary            : Extractive (position-scored)
 """
 import os
@@ -20,23 +27,22 @@ from fastapi import FastAPI, Request
 from fastapi.responses import StreamingResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
-# ── Cloudinary config ─────────────────────────────────────────────────────────
 cloudinary.config(
     cloud_name = os.environ.get("CLOUD_NAME"),
     api_key    = os.environ.get("API_KEY"),
     api_secret = os.environ.get("API_SECRET"),
 )
-# ── Cleanvoice config ─────────────────────────────────────────────────────────
-CLEANVOICE_API_KEY = os.environ.get("CLEANVOICE_API_KEY")
-CLEANVOICE_BASE    = "https://api.cleanvoice.ai/v2"
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-from transcriber import Transcriber
-from translator  import Translator
 transcriber = Transcriber()
 translator  = Translator()
@@ -50,126 +56,6 @@ app.add_middleware(
 )
-# ══════════════════════════════════════════════════════════════════════════════
-# CLEANVOICE HELPER
-# ══════════════════════════════════════════════════════════════════════════════
-def cleanvoice_enhance(audio_path: str, out_dir: str,
-                       opt_fillers: bool  = True,
-                       opt_stutters: bool = True,
-                       opt_silences: bool = True,
-                       opt_breaths: bool  = True,
-                       opt_mouth: bool    = True) -> dict:
-    """
-    Full Cleanvoice enhancement pipeline:
-      1. Upload audio file  → get signed URL
-      2. Submit edit job    → configure which features to enable
-      3. Poll until done    → max 30 attempts × 10s = 5 minutes
-      4. Download result    → save to out_dir
-    Returns: {"audio_path": str, "stats": dict}
-    Raises RuntimeError on failure so run_pipeline() can catch and report it.
-    """
-    if not CLEANVOICE_API_KEY:
-        raise RuntimeError("CLEANVOICE_API_KEY is not set in HF Space secrets.")
-    headers = {"X-API-Key": CLEANVOICE_API_KEY}
-    # ── Step 1: Upload ────────────────────────────────────────────────────────
-    logger.info("[Cleanvoice] Uploading audio...")
-    with open(audio_path, "rb") as f:
-        up_resp = requests.post(
-            f"{CLEANVOICE_BASE}/uploads",
-            headers=headers,
-            files={"file": (os.path.basename(audio_path), f)},
-            timeout=120,
-        )
-    up_resp.raise_for_status()
-    file_url = up_resp.json().get("url") or up_resp.json().get("signedUrl")
-    if not file_url:
-        raise RuntimeError(f"Cleanvoice upload gave no URL: {up_resp.json()}")
-    logger.info(f"[Cleanvoice] Upload done → {file_url[:60]}...")
-    # ── Step 2: Submit edit job ───────────────────────────────────────────────
-    # Cleanvoice config flags — map your pipeline options to Cleanvoice features
-    config = {
-        "enhance_speech":      True,           # always on — core noise removal
-        "remove_filler_words": opt_fillers,    # um, uh, like, basically...
-        "remove_stutters":     opt_stutters,   # word repetitions
-        "remove_silence":      opt_silences,   # long pauses
-        "remove_breathing":    opt_breaths,    # breath sounds
-        "remove_mouth_sounds": opt_mouth,      # clicks, pops, smacks
-    }
-    logger.info(f"[Cleanvoice] Submitting edit job with config: {config}")
-    edit_resp = requests.post(
-        f"{CLEANVOICE_BASE}/edits",
-        headers={**headers, "Content-Type": "application/json"},
-        json={"input": {"files": [file_url], "config": config}},
-        timeout=30,
-    )
-    edit_resp.raise_for_status()
-    edit_data = edit_resp.json()
-    edit_id   = edit_data.get("id") or edit_data.get("editId")
-    if not edit_id:
-        raise RuntimeError(f"Cleanvoice edit job gave no ID: {edit_data}")
-    logger.info(f"[Cleanvoice] Edit job submitted → id={edit_id}")
-    # ── Step 3: Poll until done ───────────────────────────────────────────────
-    max_attempts = 36   # 36 × 10s = 6 minutes max
-    for attempt in range(1, max_attempts + 1):
-        time.sleep(10)
-        status_resp = requests.get(
-            f"{CLEANVOICE_BASE}/edits/{edit_id}",
-            headers=headers,
-            timeout=15,
-        )
-        status_resp.raise_for_status()
-        status_data = status_resp.json()
-        status      = status_data.get("status", "unknown")
-        logger.info(f"[Cleanvoice] Poll {attempt}/{max_attempts} → status={status}")
-        if status == "completed":
-            # Grab the output URL — try common key names
-            output      = status_data.get("output") or {}
-            enhanced_dl = (
-                output.get("url")
-                or output.get("downloadUrl")
-                or status_data.get("downloadUrl")
-            )
-            if not enhanced_dl:
-                raise RuntimeError(f"Cleanvoice completed but no download URL: {status_data}")
-            # ── Step 4: Download enhanced audio ──────────────────────────────
-            logger.info(f"[Cleanvoice] Downloading result from {enhanced_dl[:60]}...")
-            dl = requests.get(enhanced_dl, timeout=120)
-            dl.raise_for_status()
-            # Preserve original extension if possible, default to .mp3
-            ext      = os.path.splitext(enhanced_dl.split("?")[0])[-1] or ".mp3"
-            out_path = os.path.join(out_dir, f"cleanvoice_enhanced{ext}")
-            with open(out_path, "wb") as f:
-                f.write(dl.content)
-            logger.info(f"[Cleanvoice] ✅ Enhanced audio saved → {out_path}")
-            return {
-                "audio_path": out_path,
-                "stats": {
-                    "noise_method":         "Cleanvoice API",
-                    "fillers_removed":      "yes" if opt_fillers  else "no",
-                    "stutters_removed":     "yes" if opt_stutters else "no",
-                    "silences_removed_sec": "yes" if opt_silences else "no",
-                    "breaths_reduced":      opt_breaths,
-                    "mouth_sounds_removed": "yes" if opt_mouth    else "no",
-                },
-            }
-        elif status in ("error", "failed"):
-            raise RuntimeError(f"Cleanvoice job failed: {status_data.get('message', status_data)}")
-        # still processing — keep polling
-    raise RuntimeError(f"Cleanvoice timed out after {max_attempts * 10}s (edit_id={edit_id})")
 # ══════════════════════════════════════════════════════════════════════════════
 # PIPELINE
 # ══════════════════════════════════════════════════════════════════════════════
@@ -177,9 +63,12 @@ def cleanvoice_enhance(audio_path: str, out_dir: str,
 def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
                  opt_fillers=True, opt_stutters=True, opt_silences=True,
                  opt_breaths=True, opt_mouth=True):
-    out_dir  = tempfile.mkdtemp()
-    stats    = {}
     word_segs = []
     try:
@@ -187,16 +76,23 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
         yield {"status": "processing", "step": 1,
                "message": "Step 1/4 — Enhancing audio with Cleanvoice..."}
         try:
-            result = cleanvoice_enhance(
                 audio_path, out_dir,
-                opt_fillers=opt_fillers,
-                opt_stutters=opt_stutters,
-                opt_silences=opt_silences,
-                opt_breaths=opt_breaths,
-                opt_mouth=opt_mouth,
             )
             clean1 = result["audio_path"]
-            stats  = result["stats"]
             logger.info("[Pipeline] Cleanvoice enhancement complete")
         except Exception as e:
             # Cleanvoice failed — log it and continue with original audio
@@ -216,6 +112,11 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
                "message": "Step 2/4 — Transcribing..."}
         transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
         word_segs = transcriber._last_segments
         logger.info(f"[Pipeline] Transcription done: {len(transcript.split())} words, lang={detected_lang}")
         # ── Step 3: Translate ─────────────────────────────────────────────────
@@ -235,6 +136,7 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
                "message": "Step 4/4 — Summarizing & uploading..."}
         summary = translator.summarize(transcript)
         try:
             upload_result = cloudinary.uploader.upload(
                 clean1,
@@ -245,7 +147,6 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
             logger.info(f"[Pipeline] Cloudinary upload done: {enhanced_url}")
         except Exception as e:
             logger.error(f"[Pipeline] Cloudinary upload failed: {e}")
-            enhanced_url = None
         # ── Done ──────────────────────────────────────────────────────────────
         yield {
@@ -266,14 +167,13 @@ def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
                 "mouth_sounds_removed": stats.get("mouth_sounds_removed", 0),
                 "transcription_method": t_method,
                 "translation_method":   tl_method,
-                "processing_sec":       0,
                 "word_segments":        len(word_segs),
                 "transcript_words":     len(transcript.split()),
             },
         }
     except Exception as e:
-        logger.error(f"Pipeline failed: {e}", exc_info=True)
         yield {"status": "error", "message": f"Error: {str(e)}"}
@@ -311,13 +211,23 @@ async def process_url(request: Request):
         yield sse({"status": "processing", "step": 0, "message": "Downloading audio..."})
         try:
             resp = requests.get(audio_url, timeout=60, stream=True)
             resp.raise_for_status()
-            suffix = ".wav" if "wav" in audio_url.lower() else ".mp3"
-            tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
             downloaded = 0
-            total = int(resp.headers.get("content-length", 0))
             for chunk in resp.iter_content(chunk_size=65536):
                 if chunk:
                     tmp.write(chunk)
@@ -325,12 +235,13 @@ async def process_url(request: Request):
                     if total:
                         pct = int(downloaded * 100 / total)
                         yield sse({"status": "processing", "step": 0,
-                                   "message": "Downloading... " + str(pct) + "%"})
             tmp.close()
         except Exception as e:
-            yield sse({"status": "error", "message": "Download failed: " + str(e)})
             return
         for result in run_pipeline(tmp.name, src_lang, tgt_lang,
                                    opt_fillers, opt_stutters, opt_silences,
                                    opt_breaths, opt_mouth):

 """
+ClearWave AI — API Space (FastAPI)
+===================================
+Endpoints: /api/health  |  /api/process-url
+Pipeline:
+  1. Download audio from URL
+  2. Denoise / enhance  → Denoiser (Cleanvoice SDK)
+  3. Transcribe         → Groq Whisper large-v3 (primary) / faster-whisper (fallback)
+  4. Translate          → NLLB-200-1.3B (primary) / Google Translate (fallback)
+  5. Summarize          → Extractive (position-scored)
+  6. Upload result      → Cloudinary
+All secrets read from HF Space environment variables:
+  CLEANVOICE_API_KEY, CLOUD_NAME, API_KEY, API_SECRET, GROQ_API_KEY
 """
 import os
 from fastapi.responses import StreamingResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
+from denoiser    import Denoiser
+from transcriber import Transcriber
+from translator  import Translator
+# ── Cloudinary config ──────────────────────────────────────────────────────────
 cloudinary.config(
     cloud_name = os.environ.get("CLOUD_NAME"),
     api_key    = os.environ.get("API_KEY"),
     api_secret = os.environ.get("API_SECRET"),
 )
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# ── Singleton instances (loaded once at startup) ───────────────────────────────
+denoiser    = Denoiser()
 transcriber = Transcriber()
 translator  = Translator()
 )
 # ══════════════════════════════════════════════════════════════════════════════
 # PIPELINE
 # ══════════════════════════════════════════════════════════════════════════════
 def run_pipeline(audio_path, src_lang="auto", tgt_lang="te",
                  opt_fillers=True, opt_stutters=True, opt_silences=True,
                  opt_breaths=True, opt_mouth=True):
+    """
+    Generator — yields SSE-style dicts at each step.
+    Caller wraps each dict in  "data: <json>\n\n"
+    """
+    out_dir   = tempfile.mkdtemp()
+    stats     = {}
     word_segs = []
     try:
         yield {"status": "processing", "step": 1,
                "message": "Step 1/4 — Enhancing audio with Cleanvoice..."}
         try:
+            result = denoiser.process(
                 audio_path, out_dir,
+                fillers=opt_fillers,
+                stutters=opt_stutters,
+                long_silences=opt_silences,
+                breaths=opt_breaths,
+                mouth_sounds=opt_mouth,
             )
             clean1 = result["audio_path"]
+            stats  = {
+                "noise_method":         "Cleanvoice API",
+                "fillers_removed":      "yes" if opt_fillers  else "no",
+                "stutters_removed":     "yes" if opt_stutters else "no",
+                "silences_removed_sec": "yes" if opt_silences else "no",
+                "breaths_reduced":      opt_breaths,
+                "mouth_sounds_removed": "yes" if opt_mouth    else "no",
+            }
             logger.info("[Pipeline] Cleanvoice enhancement complete")
         except Exception as e:
             # Cleanvoice failed — log it and continue with original audio
                "message": "Step 2/4 — Transcribing..."}
         transcript, detected_lang, t_method = transcriber.transcribe(clean1, src_lang)
         word_segs = transcriber._last_segments
+        # Clean filler words from transcript text too
+        if opt_fillers:
+            transcript = denoiser.clean_transcript_fillers(transcript)
         logger.info(f"[Pipeline] Transcription done: {len(transcript.split())} words, lang={detected_lang}")
         # ── Step 3: Translate ─────────────────────────────────────────────────
                "message": "Step 4/4 — Summarizing & uploading..."}
         summary = translator.summarize(transcript)
+        enhanced_url = None
         try:
             upload_result = cloudinary.uploader.upload(
                 clean1,
             logger.info(f"[Pipeline] Cloudinary upload done: {enhanced_url}")
         except Exception as e:
             logger.error(f"[Pipeline] Cloudinary upload failed: {e}")
         # ── Done ──────────────────────────────────────────────────────────────
         yield {
                 "mouth_sounds_removed": stats.get("mouth_sounds_removed", 0),
                 "transcription_method": t_method,
                 "translation_method":   tl_method,
                 "word_segments":        len(word_segs),
                 "transcript_words":     len(transcript.split()),
             },
         }
     except Exception as e:
+        logger.error(f"[Pipeline] Fatal error: {e}", exc_info=True)
         yield {"status": "error", "message": f"Error: {str(e)}"}
         yield sse({"status": "processing", "step": 0, "message": "Downloading audio..."})
+        # ── Download audio from URL ───────────────────────────────────────────
         try:
             resp = requests.get(audio_url, timeout=60, stream=True)
             resp.raise_for_status()
+            # Detect extension — support WhatsApp .opus and common formats
+            lower_url = audio_url.lower().split("?")[0]
+            if   ".opus" in lower_url: suffix = ".opus"
+            elif ".ogg"  in lower_url: suffix = ".ogg"
+            elif ".aac"  in lower_url: suffix = ".aac"
+            elif ".m4a"  in lower_url: suffix = ".m4a"
+            elif ".wav"  in lower_url: suffix = ".wav"
+            else:                      suffix = ".mp3"
+            tmp      = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
             downloaded = 0
+            total    = int(resp.headers.get("content-length", 0))
             for chunk in resp.iter_content(chunk_size=65536):
                 if chunk:
                     tmp.write(chunk)
                     if total:
                         pct = int(downloaded * 100 / total)
                         yield sse({"status": "processing", "step": 0,
+                                   "message": f"Downloading... {pct}%"})
             tmp.close()
         except Exception as e:
+            yield sse({"status": "error", "message": f"Download failed: {e}"})
             return
+        # ── Run pipeline ──────────────────────────────────────────────────────
         for result in run_pipeline(tmp.name, src_lang, tgt_lang,
                                    opt_fillers, opt_stutters, opt_silences,
                                    opt_breaths, opt_mouth):