Files changed (1) hide show
  1. denoiser.py +130 -48
denoiser.py CHANGED
@@ -1,12 +1,13 @@
1
  """
2
  Department 1 - Denoiser
3
- Uses noisereduce for noise removal.
4
- IMPROVED:
5
- - 44100 Hz sample rate (CD quality) instead of 16000 Hz (telephone)
 
6
  - Stereo preserved if original is stereo
7
- - Better loudness normalisation target (-18 dB instead of -23 dB)
8
- - Stronger noise reduction with stationary noise detection
9
- - High quality PCM_24 output instead of PCM_16
10
  """
11
 
12
  import os
@@ -18,20 +19,33 @@ import logging
18
 
19
  logger = logging.getLogger(__name__)
20
 
21
- # ✅ UPGRADED: 44100 = CD quality (was 16000 = telephone quality)
22
- TARGET_SR = 44100
23
- # ✅ UPGRADED: -18 dB is louder/clearer (was -23 dB which was too quiet)
24
  TARGET_LOUDNESS = -18.0
25
 
26
 
27
  class Denoiser:
28
  def __init__(self):
29
- print("[Denoiser] Ready (noisereduce — 44100Hz CD quality)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  def process(self, audio_path: str, out_dir: str) -> str:
32
  t0 = time.time()
33
 
34
- # Step 1: Convert to high quality WAV (44100 Hz, stereo preserved)
35
  wav_path = os.path.join(out_dir, "input.wav")
36
  self._convert_to_wav(audio_path, wav_path)
37
 
@@ -39,24 +53,16 @@ class Denoiser:
39
  audio, sr = sf.read(wav_path, always_2d=True)
40
  original_channels = audio.shape[1]
41
 
42
- # Step 3: Process each channel separately to preserve stereo
43
- if original_channels > 1:
44
- # Stereo denoise each channel independently
45
- denoised_channels = []
46
- for ch in range(original_channels):
47
- channel = audio[:, ch].astype(np.float32)
48
- channel = self._denoise_channel(channel, sr)
49
- denoised_channels.append(channel)
50
- audio = np.stack(denoised_channels, axis=1)
51
  else:
52
- # Mono
53
- audio = audio.squeeze().astype(np.float32)
54
- audio = self._denoise_channel(audio, sr)
55
 
56
  # Step 4: Normalise loudness
57
  audio = self._normalise(audio, sr)
58
 
59
- # Step 5: Save at high quality (PCM_24 = better than PCM_16)
60
  out_path = os.path.join(out_dir, "denoised.wav")
61
  sf.write(out_path, audio, sr, subtype="PCM_24")
62
 
@@ -65,35 +71,116 @@ class Denoiser:
65
  print(f"[Denoiser] ✅ Done in {elapsed:.2f}s")
66
  return out_path
67
 
68
- def _denoise_channel(self, audio: np.ndarray, sr: int) -> np.ndarray:
69
- """Denoise a single channel with noisereduce."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  try:
71
  import noisereduce as nr
72
- # stationary=True is better for consistent background noise
73
- # (fans, AC, hum) — more aggressive but cleaner result
74
- denoised = nr.reduce_noise(
75
- y=audio,
76
- sr=sr,
77
- stationary=True, # good for constant background noise
78
- prop_decrease=0.85, # 85% noise reduction (0-1, higher = more aggressive)
79
- ).astype(np.float32)
80
- return denoised
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  except Exception as e:
82
- logger.warning(f"[Denoiser] noisereduce failed: {e}, using raw audio")
83
  return audio
84
 
85
  def _convert_to_wav(self, src: str, dst: str):
86
- """Convert any audio format to high quality WAV."""
87
  cmd = [
88
  "ffmpeg", "-y", "-i", src,
89
- "-acodec", "pcm_s24le", # 24-bit depth (better than 16-bit)
90
- "-ar", str(TARGET_SR), # 44100 Hz sample rate
91
- # ✅ No -ac 1 here — preserve original channel count (stereo stays stereo)
92
  dst
93
  ]
94
  result = subprocess.run(cmd, capture_output=True, text=True)
95
  if result.returncode != 0:
96
- # Fallback: try reading directly with soundfile
97
  try:
98
  data, sr = sf.read(src, always_2d=True)
99
  sf.write(dst, data, sr, subtype="PCM_24")
@@ -101,21 +188,16 @@ class Denoiser:
101
  raise RuntimeError(f"Cannot read audio file: {e}")
102
 
103
  def _normalise(self, audio: np.ndarray, sr: int) -> np.ndarray:
104
- """Normalise to target loudness so output is clear and audible."""
105
  try:
106
  import pyloudnorm as pyln
107
- # pyloudnorm needs mono or stereo, handle both
108
  meter = pyln.Meter(sr)
109
  loudness = meter.integrated_loudness(audio)
110
  if np.isfinite(loudness) and loudness < 0:
111
  audio = pyln.normalize.loudness(audio, loudness, TARGET_LOUDNESS)
112
  print(f"[Denoiser] Loudness: {loudness:.1f}dB → {TARGET_LOUDNESS}dB")
113
  except Exception:
114
- # Simple RMS normalisation fallback
115
- if audio.ndim > 1:
116
- rms = np.sqrt(np.mean(audio ** 2))
117
- else:
118
- rms = np.sqrt(np.mean(audio ** 2))
119
  if rms > 1e-9:
120
  target = 10 ** (TARGET_LOUDNESS / 20.0)
121
  audio = audio * (target / rms)
 
1
  """
2
  Department 1 - Denoiser
3
+ Uses DeepFilterNet3 (deep learning) for noise removal.
4
+ UPGRADED from noisereduce → DeepFilterNet3:
5
+ - AI-based speech enhancement (not just signal processing)
6
+ - 48000 Hz full-band audio support
7
  - Stereo preserved if original is stereo
8
+ - Loudness normalisation target (-18 dB)
9
+ - PCM_24 high quality output
10
+ - Fallback to noisereduce if DeepFilterNet3 unavailable
11
  """
12
 
13
  import os
 
19
 
20
  logger = logging.getLogger(__name__)
21
 
22
+ # ✅ 48000 Hz = DeepFilterNet3 native sample rate (full-band)
23
+ TARGET_SR = 48000
 
24
  TARGET_LOUDNESS = -18.0
25
 
26
 
27
  class Denoiser:
28
  def __init__(self):
29
+ self.df_model = None
30
+ self.df_state = None
31
+ self._load_deepfilter()
32
+
33
+ def _load_deepfilter(self):
34
+ """Try to load DeepFilterNet3. Falls back to noisereduce if unavailable."""
35
+ try:
36
+ from df import enhance, init_df
37
+ self.df_model, self.df_state, _ = init_df()
38
+ print("[Denoiser] ✅ DeepFilterNet3 loaded — AI-powered denoising active")
39
+ except ImportError:
40
+ print("[Denoiser] ⚠️ DeepFilterNet3 not installed.")
41
+ print("[Denoiser] Run: pip install deepfilterlib")
42
+ print("[Denoiser] ↩️ Falling back to noisereduce")
43
+ self.df_model = None
44
 
45
  def process(self, audio_path: str, out_dir: str) -> str:
46
  t0 = time.time()
47
 
48
+ # Step 1: Convert to high quality WAV
49
  wav_path = os.path.join(out_dir, "input.wav")
50
  self._convert_to_wav(audio_path, wav_path)
51
 
 
53
  audio, sr = sf.read(wav_path, always_2d=True)
54
  original_channels = audio.shape[1]
55
 
56
+ # Step 3: Denoise DeepFilterNet3 or fallback
57
+ if self.df_model is not None:
58
+ audio = self._denoise_deepfilter(audio, sr, original_channels)
 
 
 
 
 
 
59
  else:
60
+ audio = self._denoise_noisereduce(audio, sr, original_channels)
 
 
61
 
62
  # Step 4: Normalise loudness
63
  audio = self._normalise(audio, sr)
64
 
65
+ # Step 5: Save at high quality (PCM_24)
66
  out_path = os.path.join(out_dir, "denoised.wav")
67
  sf.write(out_path, audio, sr, subtype="PCM_24")
68
 
 
71
  print(f"[Denoiser] ✅ Done in {elapsed:.2f}s")
72
  return out_path
73
 
74
+ # =========================================================
75
+ # PRIMARY: DeepFilterNet3 (AI-based, best quality)
76
+ # =========================================================
77
+ def _denoise_deepfilter(self, audio: np.ndarray, sr: int, channels: int) -> np.ndarray:
78
+ """
79
+ Denoise using DeepFilterNet3.
80
+ DeepFilterNet3 works at 48kHz natively.
81
+ For stereo: process each channel separately, then recombine.
82
+ """
83
+ try:
84
+ from df import enhance
85
+ import torch
86
+
87
+ # Resample to 48kHz if needed (DeepFilterNet3 native rate)
88
+ if sr != TARGET_SR:
89
+ audio = self._resample(audio, sr, TARGET_SR)
90
+ sr = TARGET_SR
91
+
92
+ if channels > 1:
93
+ # Stereo — process each channel independently
94
+ denoised_channels = []
95
+ for ch in range(channels):
96
+ channel = audio[:, ch].astype(np.float32)
97
+ # DeepFilterNet expects (1, samples) tensor
98
+ tensor = torch.from_numpy(channel).unsqueeze(0)
99
+ enhanced = enhance(self.df_model, self.df_state, tensor)
100
+ denoised_channels.append(enhanced.squeeze().numpy())
101
+ audio = np.stack(denoised_channels, axis=1)
102
+ else:
103
+ # Mono
104
+ channel = audio.squeeze().astype(np.float32)
105
+ tensor = torch.from_numpy(channel).unsqueeze(0)
106
+ enhanced = enhance(self.df_model, self.df_state, tensor)
107
+ audio = enhanced.squeeze().numpy()
108
+
109
+ print("[Denoiser] 🤖 DeepFilterNet3 enhancement complete")
110
+ return audio
111
+
112
+ except Exception as e:
113
+ logger.warning(f"[Denoiser] DeepFilterNet3 failed: {e}, falling back to noisereduce")
114
+ return self._denoise_noisereduce(audio, sr, channels)
115
+
116
+ # =========================================================
117
+ # ↩️ FALLBACK: noisereduce (signal processing)
118
+ # =========================================================
119
+ def _denoise_noisereduce(self, audio: np.ndarray, sr: int, channels: int) -> np.ndarray:
120
+ """Fallback denoiser using noisereduce library."""
121
  try:
122
  import noisereduce as nr
123
+ print("[Denoiser] ↩️ Using noisereduce fallback")
124
+
125
+ if channels > 1:
126
+ denoised_channels = []
127
+ for ch in range(channels):
128
+ channel = audio[:, ch].astype(np.float32)
129
+ denoised = nr.reduce_noise(
130
+ y=channel,
131
+ sr=sr,
132
+ stationary=True,
133
+ prop_decrease=0.75, # less aggressive to preserve voice
134
+ ).astype(np.float32)
135
+ denoised_channels.append(denoised)
136
+ audio = np.stack(denoised_channels, axis=1)
137
+ else:
138
+ audio = audio.squeeze().astype(np.float32)
139
+ audio = nr.reduce_noise(
140
+ y=audio,
141
+ sr=sr,
142
+ stationary=True,
143
+ prop_decrease=0.75,
144
+ ).astype(np.float32)
145
+
146
+ return audio
147
+
148
+ except Exception as e:
149
+ logger.warning(f"[Denoiser] noisereduce also failed: {e}, returning raw audio")
150
+ return audio
151
+
152
+ # =========================================================
153
+ # 🔧 HELPERS
154
+ # =========================================================
155
+ def _resample(self, audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
156
+ """Resample audio to target sample rate using scipy."""
157
+ try:
158
+ from scipy.signal import resample_poly
159
+ from math import gcd
160
+ g = gcd(orig_sr, target_sr)
161
+ up, down = target_sr // g, orig_sr // g
162
+ if audio.ndim > 1:
163
+ resampled = np.stack(
164
+ [resample_poly(audio[:, ch], up, down) for ch in range(audio.shape[1])],
165
+ axis=1
166
+ )
167
+ else:
168
+ resampled = resample_poly(audio, up, down)
169
+ return resampled.astype(np.float32)
170
  except Exception as e:
171
+ logger.warning(f"[Denoiser] Resample failed: {e}")
172
  return audio
173
 
174
  def _convert_to_wav(self, src: str, dst: str):
175
+ """Convert any audio format to high quality WAV at 48kHz."""
176
  cmd = [
177
  "ffmpeg", "-y", "-i", src,
178
+ "-acodec", "pcm_s24le",
179
+ "-ar", str(TARGET_SR), # 48kHz for DeepFilterNet3
 
180
  dst
181
  ]
182
  result = subprocess.run(cmd, capture_output=True, text=True)
183
  if result.returncode != 0:
 
184
  try:
185
  data, sr = sf.read(src, always_2d=True)
186
  sf.write(dst, data, sr, subtype="PCM_24")
 
188
  raise RuntimeError(f"Cannot read audio file: {e}")
189
 
190
  def _normalise(self, audio: np.ndarray, sr: int) -> np.ndarray:
191
+ """Normalise to target loudness."""
192
  try:
193
  import pyloudnorm as pyln
 
194
  meter = pyln.Meter(sr)
195
  loudness = meter.integrated_loudness(audio)
196
  if np.isfinite(loudness) and loudness < 0:
197
  audio = pyln.normalize.loudness(audio, loudness, TARGET_LOUDNESS)
198
  print(f"[Denoiser] Loudness: {loudness:.1f}dB → {TARGET_LOUDNESS}dB")
199
  except Exception:
200
+ rms = np.sqrt(np.mean(audio ** 2))
 
 
 
 
201
  if rms > 1e-9:
202
  target = 10 ** (TARGET_LOUDNESS / 20.0)
203
  audio = audio * (target / rms)