Spaces:

testingfaces
/

clearwave-ai

Paused

App Files Files Community

testingfaces commited on 28 days ago

Commit

de2fb00

verified ·

1 Parent(s): 19073e1

Update denoiser.py

Browse files

Files changed (1) hide show

denoiser.py +71 -28

denoiser.py CHANGED Viewed

@@ -1,6 +1,12 @@
 """
 Department 1 - Denoiser
-Uses noisereduce for noise removal (no Rust/C++ required).
 """
 import os
@@ -12,70 +18,107 @@ import logging
 logger = logging.getLogger(__name__)
-TARGET_SR = 16000
-TARGET_LOUDNESS = -23.0
 class Denoiser:
     def __init__(self):
-        print("[Denoiser] Ready (noisereduce)")
     def process(self, audio_path: str, out_dir: str) -> str:
         t0 = time.time()
-        # Step 1: Convert to WAV
         wav_path = os.path.join(out_dir, "input.wav")
         self._convert_to_wav(audio_path, wav_path)
-        # Step 2: Read
         audio, sr = sf.read(wav_path, always_2d=True)
-        # Step 3: Stereo to mono
-        if audio.shape[1] > 1:
-            audio = audio.mean(axis=1)
         else:
-            audio = audio.squeeze()
-        audio = audio.astype(np.float32)
-        # Step 4: Denoise with noisereduce
-        try:
-            import noisereduce as nr
-            audio = nr.reduce_noise(y=audio, sr=sr).astype(np.float32)
-        except Exception as e:
-            logger.warning(f"[Denoiser] noisereduce failed: {e}, using raw audio")
-        # Step 5: Normalise loudness
         audio = self._normalise(audio, sr)
-        # Step 6: Save
         out_path = os.path.join(out_dir, "denoised.wav")
-        sf.write(out_path, audio, sr, subtype="PCM_16")
-        logger.info(f"[Denoiser] Done in {time.time()-t0:.2f}s")
         return out_path
     def _convert_to_wav(self, src: str, dst: str):
-        cmd = ["ffmpeg", "-y", "-i", src, "-acodec", "pcm_s16le",
-               "-ar", str(TARGET_SR), "-ac", "1", dst]
         result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode != 0:
             try:
                 data, sr = sf.read(src, always_2d=True)
-                sf.write(dst, data, sr, subtype="PCM_16")
             except Exception as e:
                 raise RuntimeError(f"Cannot read audio file: {e}")
     def _normalise(self, audio: np.ndarray, sr: int) -> np.ndarray:
         try:
             import pyloudnorm as pyln
             meter    = pyln.Meter(sr)
             loudness = meter.integrated_loudness(audio)
             if np.isfinite(loudness) and loudness < 0:
                 audio = pyln.normalize.loudness(audio, loudness, TARGET_LOUDNESS)
         except Exception:
-            rms = np.sqrt(np.mean(audio ** 2))
             if rms > 1e-9:
                 target = 10 ** (TARGET_LOUDNESS / 20.0)
                 audio  = audio * (target / rms)
         return np.clip(audio, -1.0, 1.0).astype(np.float32)

 """
 Department 1 - Denoiser
+Uses noisereduce for noise removal.
+✅ IMPROVED:
+  - 44100 Hz sample rate (CD quality) instead of 16000 Hz (telephone)
+  - Stereo preserved if original is stereo
+  - Better loudness normalisation target (-18 dB instead of -23 dB)
+  - Stronger noise reduction with stationary noise detection
+  - High quality PCM_24 output instead of PCM_16
 """
 import os
 logger = logging.getLogger(__name__)
+# ✅ UPGRADED: 44100 = CD quality (was 16000 = telephone quality)
+TARGET_SR       = 44100
+# ✅ UPGRADED: -18 dB is louder/clearer (was -23 dB which was too quiet)
+TARGET_LOUDNESS = -18.0
 class Denoiser:
     def __init__(self):
+        print("[Denoiser] Ready (noisereduce — 44100Hz CD quality)")
     def process(self, audio_path: str, out_dir: str) -> str:
         t0 = time.time()
+        # Step 1: Convert to high quality WAV (44100 Hz, stereo preserved)
         wav_path = os.path.join(out_dir, "input.wav")
         self._convert_to_wav(audio_path, wav_path)
+        # Step 2: Read audio
         audio, sr = sf.read(wav_path, always_2d=True)
+        original_channels = audio.shape[1]
+        # Step 3: Process each channel separately to preserve stereo
+        if original_channels > 1:
+            # Stereo — denoise each channel independently
+            denoised_channels = []
+            for ch in range(original_channels):
+                channel = audio[:, ch].astype(np.float32)
+                channel = self._denoise_channel(channel, sr)
+                denoised_channels.append(channel)
+            audio = np.stack(denoised_channels, axis=1)
         else:
+            # Mono
+            audio = audio.squeeze().astype(np.float32)
+            audio = self._denoise_channel(audio, sr)
+        # Step 4: Normalise loudness
         audio = self._normalise(audio, sr)
+        # Step 5: Save at high quality (PCM_24 = better than PCM_16)
         out_path = os.path.join(out_dir, "denoised.wav")
+        sf.write(out_path, audio, sr, subtype="PCM_24")
+        elapsed = time.time() - t0
+        logger.info(f"[Denoiser] Done in {elapsed:.2f}s — {sr}Hz, {original_channels}ch")
+        print(f"[Denoiser] ✅ Done in {elapsed:.2f}s")
         return out_path
+    def _denoise_channel(self, audio: np.ndarray, sr: int) -> np.ndarray:
+        """Denoise a single channel with noisereduce."""
+        try:
+            import noisereduce as nr
+            # ✅ stationary=True is better for consistent background noise
+            # (fans, AC, hum) — more aggressive but cleaner result
+            denoised = nr.reduce_noise(
+                y=audio,
+                sr=sr,
+                stationary=True,        # good for constant background noise
+                prop_decrease=0.85,     # 85% noise reduction (0-1, higher = more aggressive)
+            ).astype(np.float32)
+            return denoised
+        except Exception as e:
+            logger.warning(f"[Denoiser] noisereduce failed: {e}, using raw audio")
+            return audio
     def _convert_to_wav(self, src: str, dst: str):
+        """Convert any audio format to high quality WAV."""
+        cmd = [
+            "ffmpeg", "-y", "-i", src,
+            "-acodec", "pcm_s24le",     # 24-bit depth (better than 16-bit)
+            "-ar", str(TARGET_SR),      # 44100 Hz sample rate
+            # ✅ No -ac 1 here — preserve original channel count (stereo stays stereo)
+            dst
+        ]
         result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode != 0:
+            # Fallback: try reading directly with soundfile
             try:
                 data, sr = sf.read(src, always_2d=True)
+                sf.write(dst, data, sr, subtype="PCM_24")
             except Exception as e:
                 raise RuntimeError(f"Cannot read audio file: {e}")
     def _normalise(self, audio: np.ndarray, sr: int) -> np.ndarray:
+        """Normalise to target loudness so output is clear and audible."""
         try:
             import pyloudnorm as pyln
+            # pyloudnorm needs mono or stereo, handle both
             meter    = pyln.Meter(sr)
             loudness = meter.integrated_loudness(audio)
             if np.isfinite(loudness) and loudness < 0:
                 audio = pyln.normalize.loudness(audio, loudness, TARGET_LOUDNESS)
+                print(f"[Denoiser] Loudness: {loudness:.1f}dB → {TARGET_LOUDNESS}dB")
         except Exception:
+            # Simple RMS normalisation fallback
+            if audio.ndim > 1:
+                rms = np.sqrt(np.mean(audio ** 2))
+            else:
+                rms = np.sqrt(np.mean(audio ** 2))
             if rms > 1e-9:
                 target = 10 ** (TARGET_LOUDNESS / 20.0)
                 audio  = audio * (target / rms)
+                print(f"[Denoiser] RMS normalised to {TARGET_LOUDNESS}dB")
         return np.clip(audio, -1.0, 1.0).astype(np.float32)