testingfaces commited on
Commit
de2fb00
Β·
verified Β·
1 Parent(s): 19073e1

Update denoiser.py

Browse files
Files changed (1) hide show
  1. denoiser.py +71 -28
denoiser.py CHANGED
@@ -1,6 +1,12 @@
1
  """
2
  Department 1 - Denoiser
3
- Uses noisereduce for noise removal (no Rust/C++ required).
 
 
 
 
 
 
4
  """
5
 
6
  import os
@@ -12,70 +18,107 @@ import logging
12
 
13
  logger = logging.getLogger(__name__)
14
 
15
- TARGET_SR = 16000
16
- TARGET_LOUDNESS = -23.0
 
 
17
 
18
 
19
  class Denoiser:
20
  def __init__(self):
21
- print("[Denoiser] Ready (noisereduce)")
22
 
23
  def process(self, audio_path: str, out_dir: str) -> str:
24
  t0 = time.time()
25
 
26
- # Step 1: Convert to WAV
27
  wav_path = os.path.join(out_dir, "input.wav")
28
  self._convert_to_wav(audio_path, wav_path)
29
 
30
- # Step 2: Read
31
  audio, sr = sf.read(wav_path, always_2d=True)
32
-
33
- # Step 3: Stereo to mono
34
- if audio.shape[1] > 1:
35
- audio = audio.mean(axis=1)
 
 
 
 
 
 
 
36
  else:
37
- audio = audio.squeeze()
38
-
39
- audio = audio.astype(np.float32)
40
-
41
- # Step 4: Denoise with noisereduce
42
- try:
43
- import noisereduce as nr
44
- audio = nr.reduce_noise(y=audio, sr=sr).astype(np.float32)
45
- except Exception as e:
46
- logger.warning(f"[Denoiser] noisereduce failed: {e}, using raw audio")
47
 
48
- # Step 5: Normalise loudness
49
  audio = self._normalise(audio, sr)
50
 
51
- # Step 6: Save
52
  out_path = os.path.join(out_dir, "denoised.wav")
53
- sf.write(out_path, audio, sr, subtype="PCM_16")
54
 
55
- logger.info(f"[Denoiser] Done in {time.time()-t0:.2f}s")
 
 
56
  return out_path
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def _convert_to_wav(self, src: str, dst: str):
59
- cmd = ["ffmpeg", "-y", "-i", src, "-acodec", "pcm_s16le",
60
- "-ar", str(TARGET_SR), "-ac", "1", dst]
 
 
 
 
 
 
61
  result = subprocess.run(cmd, capture_output=True, text=True)
62
  if result.returncode != 0:
 
63
  try:
64
  data, sr = sf.read(src, always_2d=True)
65
- sf.write(dst, data, sr, subtype="PCM_16")
66
  except Exception as e:
67
  raise RuntimeError(f"Cannot read audio file: {e}")
68
 
69
  def _normalise(self, audio: np.ndarray, sr: int) -> np.ndarray:
 
70
  try:
71
  import pyloudnorm as pyln
 
72
  meter = pyln.Meter(sr)
73
  loudness = meter.integrated_loudness(audio)
74
  if np.isfinite(loudness) and loudness < 0:
75
  audio = pyln.normalize.loudness(audio, loudness, TARGET_LOUDNESS)
 
76
  except Exception:
77
- rms = np.sqrt(np.mean(audio ** 2))
 
 
 
 
78
  if rms > 1e-9:
79
  target = 10 ** (TARGET_LOUDNESS / 20.0)
80
  audio = audio * (target / rms)
 
 
81
  return np.clip(audio, -1.0, 1.0).astype(np.float32)
 
1
  """
2
  Department 1 - Denoiser
3
+ Uses noisereduce for noise removal.
4
+ βœ… IMPROVED:
5
+ - 44100 Hz sample rate (CD quality) instead of 16000 Hz (telephone)
6
+ - Stereo preserved if original is stereo
7
+ - Better loudness normalisation target (-18 dB instead of -23 dB)
8
+ - Stronger noise reduction with stationary noise detection
9
+ - High quality PCM_24 output instead of PCM_16
10
  """
11
 
12
  import os
 
18
 
19
  logger = logging.getLogger(__name__)
20
 
21
+ # βœ… UPGRADED: 44100 = CD quality (was 16000 = telephone quality)
22
+ TARGET_SR = 44100
23
+ # βœ… UPGRADED: -18 dB is louder/clearer (was -23 dB which was too quiet)
24
+ TARGET_LOUDNESS = -18.0
25
 
26
 
27
  class Denoiser:
28
  def __init__(self):
29
+ print("[Denoiser] Ready (noisereduce β€” 44100Hz CD quality)")
30
 
31
  def process(self, audio_path: str, out_dir: str) -> str:
32
  t0 = time.time()
33
 
34
+ # Step 1: Convert to high quality WAV (44100 Hz, stereo preserved)
35
  wav_path = os.path.join(out_dir, "input.wav")
36
  self._convert_to_wav(audio_path, wav_path)
37
 
38
+ # Step 2: Read audio
39
  audio, sr = sf.read(wav_path, always_2d=True)
40
+ original_channels = audio.shape[1]
41
+
42
+ # Step 3: Process each channel separately to preserve stereo
43
+ if original_channels > 1:
44
+ # Stereo β€” denoise each channel independently
45
+ denoised_channels = []
46
+ for ch in range(original_channels):
47
+ channel = audio[:, ch].astype(np.float32)
48
+ channel = self._denoise_channel(channel, sr)
49
+ denoised_channels.append(channel)
50
+ audio = np.stack(denoised_channels, axis=1)
51
  else:
52
+ # Mono
53
+ audio = audio.squeeze().astype(np.float32)
54
+ audio = self._denoise_channel(audio, sr)
 
 
 
 
 
 
 
55
 
56
+ # Step 4: Normalise loudness
57
  audio = self._normalise(audio, sr)
58
 
59
+ # Step 5: Save at high quality (PCM_24 = better than PCM_16)
60
  out_path = os.path.join(out_dir, "denoised.wav")
61
+ sf.write(out_path, audio, sr, subtype="PCM_24")
62
 
63
+ elapsed = time.time() - t0
64
+ logger.info(f"[Denoiser] Done in {elapsed:.2f}s β€” {sr}Hz, {original_channels}ch")
65
+ print(f"[Denoiser] βœ… Done in {elapsed:.2f}s")
66
  return out_path
67
 
68
+ def _denoise_channel(self, audio: np.ndarray, sr: int) -> np.ndarray:
69
+ """Denoise a single channel with noisereduce."""
70
+ try:
71
+ import noisereduce as nr
72
+ # βœ… stationary=True is better for consistent background noise
73
+ # (fans, AC, hum) β€” more aggressive but cleaner result
74
+ denoised = nr.reduce_noise(
75
+ y=audio,
76
+ sr=sr,
77
+ stationary=True, # good for constant background noise
78
+ prop_decrease=0.85, # 85% noise reduction (0-1, higher = more aggressive)
79
+ ).astype(np.float32)
80
+ return denoised
81
+ except Exception as e:
82
+ logger.warning(f"[Denoiser] noisereduce failed: {e}, using raw audio")
83
+ return audio
84
+
85
  def _convert_to_wav(self, src: str, dst: str):
86
+ """Convert any audio format to high quality WAV."""
87
+ cmd = [
88
+ "ffmpeg", "-y", "-i", src,
89
+ "-acodec", "pcm_s24le", # 24-bit depth (better than 16-bit)
90
+ "-ar", str(TARGET_SR), # 44100 Hz sample rate
91
+ # βœ… No -ac 1 here β€” preserve original channel count (stereo stays stereo)
92
+ dst
93
+ ]
94
  result = subprocess.run(cmd, capture_output=True, text=True)
95
  if result.returncode != 0:
96
+ # Fallback: try reading directly with soundfile
97
  try:
98
  data, sr = sf.read(src, always_2d=True)
99
+ sf.write(dst, data, sr, subtype="PCM_24")
100
  except Exception as e:
101
  raise RuntimeError(f"Cannot read audio file: {e}")
102
 
103
  def _normalise(self, audio: np.ndarray, sr: int) -> np.ndarray:
104
+ """Normalise to target loudness so output is clear and audible."""
105
  try:
106
  import pyloudnorm as pyln
107
+ # pyloudnorm needs mono or stereo, handle both
108
  meter = pyln.Meter(sr)
109
  loudness = meter.integrated_loudness(audio)
110
  if np.isfinite(loudness) and loudness < 0:
111
  audio = pyln.normalize.loudness(audio, loudness, TARGET_LOUDNESS)
112
+ print(f"[Denoiser] Loudness: {loudness:.1f}dB β†’ {TARGET_LOUDNESS}dB")
113
  except Exception:
114
+ # Simple RMS normalisation fallback
115
+ if audio.ndim > 1:
116
+ rms = np.sqrt(np.mean(audio ** 2))
117
+ else:
118
+ rms = np.sqrt(np.mean(audio ** 2))
119
  if rms > 1e-9:
120
  target = 10 ** (TARGET_LOUDNESS / 20.0)
121
  audio = audio * (target / rms)
122
+ print(f"[Denoiser] RMS normalised to {TARGET_LOUDNESS}dB")
123
+
124
  return np.clip(audio, -1.0, 1.0).astype(np.float32)