Spaces:

Clearwave48
/

clearwave-api

Sleeping

App Files Files Community

Clearwave48 commited on 9 days ago

Commit

c42513e

verified ·

1 Parent(s): 5b73e2d

Create denoiser.py

Browse files

Files changed (1) hide show

denoiser.py +143 -0

denoiser.py ADDED Viewed

	@@ -0,0 +1,143 @@

+"""
+ClearWave — Denoiser  (Cleanvoice SDK edition)
+================================================
+✅ Pre-converts any format (.opus, .aac, .m4a, .mp3, .wav) to standard WAV via ffmpeg
+✅ Sends to Cleanvoice API (noise, fillers, stutters, silences, breaths, mouth sounds)
+✅ Downloads and returns the enhanced audio
+✅ clean_transcript_fillers() — removes filler words from transcript text
+API key is read from HF Space secret: CLEANVOICE_API_KEY
+"""
+import os
+import re
+import logging
+import subprocess
+import requests
+logger = logging.getLogger(__name__)
+# Filler words (English + Telugu + Hindi) — used for transcript text cleaning
+FILLER_WORDS = {
+    "um", "umm", "ummm", "uh", "uhh", "uhhh",
+    "hmm", "hm", "hmmm",
+    "er", "err", "errr",
+    "eh", "ahh", "ah",
+    "like", "basically", "literally",
+    "you know", "i mean", "so",
+    "right", "okay", "ok",
+    # Telugu
+    "ante", "ane", "mane", "arey", "enti",
+    # Hindi
+    "matlab", "yani", "bas", "acha",
+}
+class Denoiser:
+    def __init__(self):
+        self.api_key = os.environ.get("CLEANVOICE_API_KEY", "")
+        if not self.api_key:
+            logger.warning("[Denoiser] ⚠️  CLEANVOICE_API_KEY not set — enhancement will fail")
+        else:
+            print("[Denoiser] ☁️  Cleanvoice SDK ready (WhatsApp/any-format support active)")
+    # ══════════════════════════════════════════════════════════════════
+    # MAIN ENTRY POINT
+    # ══════════════════════════════════════════════════════════════════
+    def process(self, audio_path: str, out_dir: str,
+                fillers: bool       = True,
+                stutters: bool      = True,
+                long_silences: bool = True,
+                breaths: bool       = True,
+                mouth_sounds: bool  = True,
+                **kwargs) -> dict:
+        """
+        Full Cleanvoice enhancement pipeline.
+        Steps:
+          A. Pre-convert input to 16kHz mono WAV (handles .opus, .aac, .m4a, etc.)
+          B. Send to Cleanvoice API via SDK
+          C. Download processed audio
+          D. Return {'audio_path': str}
+        Raises on failure so run_pipeline() in main.py can catch and report.
+        """
+        if not self.api_key:
+            raise RuntimeError("CLEANVOICE_API_KEY is not set in HF Space secrets.")
+        # ── Step A: Pre-convert to standard WAV ─────────────────────────────
+        standard_input = os.path.join(out_dir, "input_converted.wav")
+        result = subprocess.run([
+            "ffmpeg", "-y", "-i", audio_path,
+            "-ar", "16000", "-ac", "1", standard_input
+        ], capture_output=True)
+        if result.returncode != 0:
+            # ffmpeg failed — try passing original path directly
+            logger.warning(f"[Denoiser] ffmpeg pre-convert failed, using original file: "
+                           f"{result.stderr.decode(errors='replace')[-200:]}")
+            standard_input = audio_path
+        # ── Step B: Process via Cleanvoice SDK ──────────────────────────────
+        try:
+            from cleanvoice import Cleanvoice
+            cv = Cleanvoice({"api_key": self.api_key})
+            logger.info("[Denoiser] Submitting to Cleanvoice API...")
+            cv_result = cv.process(
+                standard_input,
+                remove_noise=True,
+                studio_sound=True,
+                remove_filler_words=fillers,
+                remove_stutters=stutters,
+                remove_silence=long_silences,
+                remove_breathing=breaths,
+                remove_mouth_sounds=mouth_sounds,
+            )
+            # ── Step C: Download processed audio ────────────────────────────
+            download_url = cv_result.audio.url
+            logger.info(f"[Denoiser] Downloading result from {download_url[:60]}...")
+            audio_data = requests.get(download_url, timeout=120).content
+            final_wav = os.path.join(out_dir, "clean_output.wav")
+            with open(final_wav, "wb") as f:
+                f.write(audio_data)
+            logger.info(f"[Denoiser] ✅ Enhanced audio saved → {final_wav}")
+            return {"audio_path": final_wav}
+        except Exception as e:
+            logger.error(f"[Denoiser] Cleanvoice SDK error: {e}")
+            raise RuntimeError(f"Cleanvoice enhancement failed: {e}") from e
+    # ══════════════════════════════════════════════════════════════════
+    # TRANSCRIPT FILLER CLEANER
+    # ══════════════════════════════════════════════════════════════════
+    def clean_transcript_fillers(self, transcript: str) -> str:
+        """
+        Remove filler words from transcript TEXT to match the cleaned audio.
+        Handles both single-word fillers ("um", "like") and
+        two-word fillers ("you know", "i mean").
+        """
+        words  = transcript.split()
+        result = []
+        i      = 0
+        while i < len(words):
+            w = re.sub(r'[^a-z\s]', '', words[i].lower()).strip()
+            # Check two-word filler first
+            if i + 1 < len(words):
+                two = w + " " + re.sub(r'[^a-z\s]', '', words[i + 1].lower()).strip()
+                if two in FILLER_WORDS:
+                    i += 2
+                    continue
+            if w in FILLER_WORDS:
+                i += 1
+                continue
+            result.append(words[i])
+            i += 1
+        return " ".join(result)