ongudidan commited on
Commit
16d8efc
·
verified ·
1 Parent(s): abeff4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -6
app.py CHANGED
@@ -103,16 +103,31 @@ def load_audio_gradio(
103
  return audio, meta
104
 
105
 
 
 
 
 
 
 
 
 
 
106
  def demo_fn(speech_upl: str, noise_type: str, snr: int, mic_input: Optional[str] = None):
107
  if mic_input:
108
  speech_upl = mic_input
 
109
  sr = config("sr", 48000, int, section="df")
110
  logger.info(f"Got parameters speech_upl: {speech_upl}, noise: {noise_type}, snr: {snr}")
111
  snr = int(snr)
112
  noise_fn = NOISES[noise_type]
113
  meta = AudioMetaData(-1, -1, -1, -1, "")
114
- max_s = 10 # limit to 10 seconds
 
 
115
  if speech_upl is not None:
 
 
 
116
  sample, meta = load_audio(speech_upl, sr)
117
  max_len = max_s * sr
118
  if sample.shape[-1] > max_len:
@@ -121,42 +136,49 @@ def demo_fn(speech_upl: str, noise_type: str, snr: int, mic_input: Optional[str]
121
  else:
122
  sample, meta = load_audio("samples/p232_013_clean.wav", sr)
123
  sample = sample[..., : max_s * sr]
 
124
  if sample.dim() > 1 and sample.shape[0] > 1:
125
- assert (
126
- sample.shape[1] > sample.shape[0]
127
- ), f"Expecting channels first, but got {sample.shape}"
128
  sample = sample.mean(dim=0, keepdim=True)
 
129
  logger.info(f"Loaded sample with shape {sample.shape}")
 
130
  if noise_fn is not None:
131
  noise, _ = load_audio(noise_fn, sr) # type: ignore
132
  logger.info(f"Loaded noise with shape {noise.shape}")
133
  _, _, sample = mix_at_snr(sample, noise, snr)
 
134
  logger.info("Start denoising audio")
135
  enhanced = enhance(model, df, sample)
136
  logger.info("Denoising finished")
 
137
  lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
138
  lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
139
  enhanced = enhanced * lim
 
140
  if meta.sample_rate != sr:
141
  enhanced = resample(enhanced, sr, meta.sample_rate)
142
  sample = resample(sample, sr, meta.sample_rate)
143
  sr = meta.sample_rate
 
144
  noisy_wav = tempfile.NamedTemporaryFile(suffix="noisy.wav", delete=False).name
145
  save_audio(noisy_wav, sample, sr)
146
  enhanced_wav = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name
147
  save_audio(enhanced_wav, enhanced, sr)
 
148
  logger.info(f"saved audios: {noisy_wav}, {enhanced_wav}")
 
149
  ax_noisy.clear()
150
  ax_enh.clear()
151
  noisy_im = spec_im(sample, sr=sr, figure=fig_noisy, ax=ax_noisy)
152
  enh_im = spec_im(enhanced, sr=sr, figure=fig_enh, ax=ax_enh)
 
153
  filter = [speech_upl, noisy_wav, enhanced_wav]
154
  if mic_input is not None and mic_input != "":
155
  filter.append(mic_input)
156
  cleanup_tmp(filter)
157
- return noisy_wav, noisy_im, enhanced_wav, enh_im
158
-
159
 
 
160
 
161
  def specshow(
162
  spec,
 
103
  return audio, meta
104
 
105
 
106
+ def ensure_wav(filepath: str) -> str:
107
+ """Convert MP3 (or other formats) to WAV using ffmpeg if needed."""
108
+ if filepath.lower().endswith(".mp3"):
109
+ wav_path = filepath.rsplit(".", 1)[0] + ".wav"
110
+ subprocess.run(["ffmpeg", "-y", "-i", filepath, wav_path], check=True)
111
+ return wav_path
112
+ return filepath
113
+
114
+
115
  def demo_fn(speech_upl: str, noise_type: str, snr: int, mic_input: Optional[str] = None):
116
  if mic_input:
117
  speech_upl = mic_input
118
+
119
  sr = config("sr", 48000, int, section="df")
120
  logger.info(f"Got parameters speech_upl: {speech_upl}, noise: {noise_type}, snr: {snr}")
121
  snr = int(snr)
122
  noise_fn = NOISES[noise_type]
123
  meta = AudioMetaData(-1, -1, -1, -1, "")
124
+
125
+ max_s = 3600 # allow up to 1 hour (3600 seconds)
126
+
127
  if speech_upl is not None:
128
+ # ✅ Ensure compatible WAV input
129
+ speech_upl = ensure_wav(speech_upl)
130
+
131
  sample, meta = load_audio(speech_upl, sr)
132
  max_len = max_s * sr
133
  if sample.shape[-1] > max_len:
 
136
  else:
137
  sample, meta = load_audio("samples/p232_013_clean.wav", sr)
138
  sample = sample[..., : max_s * sr]
139
+
140
  if sample.dim() > 1 and sample.shape[0] > 1:
141
+ assert sample.shape[1] > sample.shape[0], f"Expecting channels first, but got {sample.shape}"
 
 
142
  sample = sample.mean(dim=0, keepdim=True)
143
+
144
  logger.info(f"Loaded sample with shape {sample.shape}")
145
+
146
  if noise_fn is not None:
147
  noise, _ = load_audio(noise_fn, sr) # type: ignore
148
  logger.info(f"Loaded noise with shape {noise.shape}")
149
  _, _, sample = mix_at_snr(sample, noise, snr)
150
+
151
  logger.info("Start denoising audio")
152
  enhanced = enhance(model, df, sample)
153
  logger.info("Denoising finished")
154
+
155
  lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
156
  lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
157
  enhanced = enhanced * lim
158
+
159
  if meta.sample_rate != sr:
160
  enhanced = resample(enhanced, sr, meta.sample_rate)
161
  sample = resample(sample, sr, meta.sample_rate)
162
  sr = meta.sample_rate
163
+
164
  noisy_wav = tempfile.NamedTemporaryFile(suffix="noisy.wav", delete=False).name
165
  save_audio(noisy_wav, sample, sr)
166
  enhanced_wav = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name
167
  save_audio(enhanced_wav, enhanced, sr)
168
+
169
  logger.info(f"saved audios: {noisy_wav}, {enhanced_wav}")
170
+
171
  ax_noisy.clear()
172
  ax_enh.clear()
173
  noisy_im = spec_im(sample, sr=sr, figure=fig_noisy, ax=ax_noisy)
174
  enh_im = spec_im(enhanced, sr=sr, figure=fig_enh, ax=ax_enh)
175
+
176
  filter = [speech_upl, noisy_wav, enhanced_wav]
177
  if mic_input is not None and mic_input != "":
178
  filter.append(mic_input)
179
  cleanup_tmp(filter)
 
 
180
 
181
+ return noisy_wav, noisy_im, enhanced_wav, enh_im
182
 
183
  def specshow(
184
  spec,