ongudidan commited on
Commit
abeff4e
·
verified ·
1 Parent(s): 974a4a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -15
app.py CHANGED
@@ -111,9 +111,7 @@ def demo_fn(speech_upl: str, noise_type: str, snr: int, mic_input: Optional[str]
111
  snr = int(snr)
112
  noise_fn = NOISES[noise_type]
113
  meta = AudioMetaData(-1, -1, -1, -1, "")
114
-
115
- max_s = 3600 # allow up to 1 hour
116
-
117
  if speech_upl is not None:
118
  sample, meta = load_audio(speech_upl, sr)
119
  max_len = max_s * sr
@@ -123,51 +121,43 @@ def demo_fn(speech_upl: str, noise_type: str, snr: int, mic_input: Optional[str]
123
  else:
124
  sample, meta = load_audio("samples/p232_013_clean.wav", sr)
125
  sample = sample[..., : max_s * sr]
126
-
127
  if sample.dim() > 1 and sample.shape[0] > 1:
128
- assert sample.shape[1] > sample.shape[0], f"Expecting channels first, but got {sample.shape}"
 
 
129
  sample = sample.mean(dim=0, keepdim=True)
130
-
131
  logger.info(f"Loaded sample with shape {sample.shape}")
132
-
133
  if noise_fn is not None:
134
  noise, _ = load_audio(noise_fn, sr) # type: ignore
135
  logger.info(f"Loaded noise with shape {noise.shape}")
136
  _, _, sample = mix_at_snr(sample, noise, snr)
137
-
138
  logger.info("Start denoising audio")
139
  enhanced = enhance(model, df, sample)
140
  logger.info("Denoising finished")
141
-
142
  lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
143
  lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
144
  enhanced = enhanced * lim
145
-
146
  if meta.sample_rate != sr:
147
  enhanced = resample(enhanced, sr, meta.sample_rate)
148
  sample = resample(sample, sr, meta.sample_rate)
149
  sr = meta.sample_rate
150
-
151
  noisy_wav = tempfile.NamedTemporaryFile(suffix="noisy.wav", delete=False).name
152
  save_audio(noisy_wav, sample, sr)
153
  enhanced_wav = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name
154
  save_audio(enhanced_wav, enhanced, sr)
155
-
156
  logger.info(f"saved audios: {noisy_wav}, {enhanced_wav}")
157
-
158
  ax_noisy.clear()
159
  ax_enh.clear()
160
  noisy_im = spec_im(sample, sr=sr, figure=fig_noisy, ax=ax_noisy)
161
  enh_im = spec_im(enhanced, sr=sr, figure=fig_enh, ax=ax_enh)
162
-
163
  filter = [speech_upl, noisy_wav, enhanced_wav]
164
  if mic_input is not None and mic_input != "":
165
  filter.append(mic_input)
166
  cleanup_tmp(filter)
167
-
168
  return noisy_wav, noisy_im, enhanced_wav, enh_im
169
 
170
 
 
171
  def specshow(
172
  spec,
173
  ax=None,
 
111
  snr = int(snr)
112
  noise_fn = NOISES[noise_type]
113
  meta = AudioMetaData(-1, -1, -1, -1, "")
114
+ max_s = 10 # limit to 10 seconds
 
 
115
  if speech_upl is not None:
116
  sample, meta = load_audio(speech_upl, sr)
117
  max_len = max_s * sr
 
121
  else:
122
  sample, meta = load_audio("samples/p232_013_clean.wav", sr)
123
  sample = sample[..., : max_s * sr]
 
124
  if sample.dim() > 1 and sample.shape[0] > 1:
125
+ assert (
126
+ sample.shape[1] > sample.shape[0]
127
+ ), f"Expecting channels first, but got {sample.shape}"
128
  sample = sample.mean(dim=0, keepdim=True)
 
129
  logger.info(f"Loaded sample with shape {sample.shape}")
 
130
  if noise_fn is not None:
131
  noise, _ = load_audio(noise_fn, sr) # type: ignore
132
  logger.info(f"Loaded noise with shape {noise.shape}")
133
  _, _, sample = mix_at_snr(sample, noise, snr)
 
134
  logger.info("Start denoising audio")
135
  enhanced = enhance(model, df, sample)
136
  logger.info("Denoising finished")
 
137
  lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
138
  lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
139
  enhanced = enhanced * lim
 
140
  if meta.sample_rate != sr:
141
  enhanced = resample(enhanced, sr, meta.sample_rate)
142
  sample = resample(sample, sr, meta.sample_rate)
143
  sr = meta.sample_rate
 
144
  noisy_wav = tempfile.NamedTemporaryFile(suffix="noisy.wav", delete=False).name
145
  save_audio(noisy_wav, sample, sr)
146
  enhanced_wav = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name
147
  save_audio(enhanced_wav, enhanced, sr)
 
148
  logger.info(f"saved audios: {noisy_wav}, {enhanced_wav}")
 
149
  ax_noisy.clear()
150
  ax_enh.clear()
151
  noisy_im = spec_im(sample, sr=sr, figure=fig_noisy, ax=ax_noisy)
152
  enh_im = spec_im(enhanced, sr=sr, figure=fig_enh, ax=ax_enh)
 
153
  filter = [speech_upl, noisy_wav, enhanced_wav]
154
  if mic_input is not None and mic_input != "":
155
  filter.append(mic_input)
156
  cleanup_tmp(filter)
 
157
  return noisy_wav, noisy_im, enhanced_wav, enh_im
158
 
159
 
160
+
161
  def specshow(
162
  spec,
163
  ax=None,