Spaces:
Runtime error
Runtime error
Hendrik Schroeter
commited on
Resample enhanced audio to input sample rate
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
import math
|
| 2 |
import tempfile
|
|
|
|
| 3 |
|
| 4 |
import gradio
|
| 5 |
import gradio.inputs
|
|
@@ -9,6 +9,7 @@ import markdown
|
|
| 9 |
import numpy as np
|
| 10 |
import torch
|
| 11 |
from df import config
|
|
|
|
| 12 |
from df.enhance import enhance, init_df, load_audio, save_audio
|
| 13 |
|
| 14 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
@@ -61,14 +62,17 @@ def mix_and_denoise(speech_rec, speech_upl, noise, snr):
|
|
| 61 |
if speech_rec is None or "none" in speech_rec:
|
| 62 |
speech = "samples/p232_013_clean.wav"
|
| 63 |
if speech_upl is not None and "none" not in speech_upl:
|
| 64 |
-
print("using
|
| 65 |
speech = speech_upl
|
| 66 |
else:
|
| 67 |
speech = speech_rec
|
| 68 |
sp_kwargs = {"frame_offset": 4800}
|
| 69 |
-
speech,
|
| 70 |
print(f"Loaded speech with shape {speech.shape}")
|
| 71 |
noise, _ = load_audio(noise, sr)
|
|
|
|
|
|
|
|
|
|
| 72 |
print(f"Loaded noise with shape {noise.shape}")
|
| 73 |
speech, noise, noisy = mix_at_snr(speech, noise, snr)
|
| 74 |
print("Start denoising audio")
|
|
@@ -77,6 +81,9 @@ def mix_and_denoise(speech_rec, speech_upl, noise, snr):
|
|
| 77 |
lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
|
| 78 |
lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
|
| 79 |
enhanced = enhanced * lim
|
|
|
|
|
|
|
|
|
|
| 80 |
noisy_fn = tempfile.NamedTemporaryFile(suffix="noisy.wav", delete=False).name
|
| 81 |
save_audio(noisy_fn, noisy, sr)
|
| 82 |
enhanced_fn = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name
|
|
|
|
|
|
|
| 1 |
import tempfile
|
| 2 |
+
from df.utils import resample
|
| 3 |
|
| 4 |
import gradio
|
| 5 |
import gradio.inputs
|
|
|
|
| 9 |
import numpy as np
|
| 10 |
import torch
|
| 11 |
from df import config
|
| 12 |
+
import math
|
| 13 |
from df.enhance import enhance, init_df, load_audio, save_audio
|
| 14 |
|
| 15 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
| 62 |
if speech_rec is None or "none" in speech_rec:
|
| 63 |
speech = "samples/p232_013_clean.wav"
|
| 64 |
if speech_upl is not None and "none" not in speech_upl:
|
| 65 |
+
print("using speech_upl")
|
| 66 |
speech = speech_upl
|
| 67 |
else:
|
| 68 |
speech = speech_rec
|
| 69 |
sp_kwargs = {"frame_offset": 4800}
|
| 70 |
+
speech, meta = load_audio(speech, sr, **sp_kwargs)
|
| 71 |
print(f"Loaded speech with shape {speech.shape}")
|
| 72 |
noise, _ = load_audio(noise, sr)
|
| 73 |
+
if meta.sample_rate != sr:
|
| 74 |
+
# Low pass filter by resampling
|
| 75 |
+
noise = resample(resample(noise, sr, meta.sample_rate), meta.sample_rate, sr)
|
| 76 |
print(f"Loaded noise with shape {noise.shape}")
|
| 77 |
speech, noise, noisy = mix_at_snr(speech, noise, snr)
|
| 78 |
print("Start denoising audio")
|
|
|
|
| 81 |
lim = torch.linspace(0.0, 1.0, int(sr * 0.15)).unsqueeze(0)
|
| 82 |
lim = torch.cat((lim, torch.ones(1, enhanced.shape[1] - lim.shape[1])), dim=1)
|
| 83 |
enhanced = enhanced * lim
|
| 84 |
+
if meta.sample_rate != sr:
|
| 85 |
+
enhanced = resample(enhanced, sr, meta.sample_rate)
|
| 86 |
+
noisy = resample(noisy, sr, meta.sample_rate)
|
| 87 |
noisy_fn = tempfile.NamedTemporaryFile(suffix="noisy.wav", delete=False).name
|
| 88 |
save_audio(noisy_fn, noisy, sr)
|
| 89 |
enhanced_fn = tempfile.NamedTemporaryFile(suffix="enhanced.wav", delete=False).name
|