Spaces:
Runtime error
Runtime error
Hendrik Schroeter
commited on
Initial working space
Browse files- app.py +26 -9
- samples/noise_freesound_573577.wav → clean.wav +2 -2
- enhanced.wav +3 -0
- noisy.wav +3 -0
- samples/p232_013_clean.wav +3 -0
app.py
CHANGED
|
@@ -1,10 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio
|
| 2 |
import gradio.inputs
|
| 3 |
import gradio.outputs
|
| 4 |
import torch
|
| 5 |
-
from df
|
|
|
|
| 6 |
|
| 7 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
def mix_at_snr(clean, noise, snr, eps=1e-10):
|
|
@@ -21,22 +27,34 @@ def mix_at_snr(clean, noise, snr, eps=1e-10):
|
|
| 21 |
mix: 1D Tensor with added clean and noise signals.
|
| 22 |
|
| 23 |
"""
|
| 24 |
-
clean = torch.as_tensor(clean)
|
| 25 |
-
noise = torch.as_tensor(noise)
|
|
|
|
|
|
|
|
|
|
| 26 |
E_speech = torch.mean(clean.pow(2)) + eps
|
| 27 |
E_noise = torch.mean(noise.pow(2))
|
| 28 |
K = torch.sqrt((E_noise / E_speech) * 10 ** (snr / 10) + eps)
|
| 29 |
noise = noise / K
|
| 30 |
mixture = clean + noise
|
| 31 |
-
assert torch.isfinite(mixture)
|
| 32 |
return clean, noise, mixture
|
| 33 |
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
def mix_and_denoise(speech, noise, snr):
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
| 37 |
speech, noise, noisy = mix_at_snr(speech, noise, snr)
|
| 38 |
-
enhanced = enhance(model
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
|
| 42 |
inputs = [
|
|
@@ -49,8 +67,7 @@ inputs = [
|
|
| 49 |
gradio.inputs.Slider(minimum=-10, maximum=40, step=5, default=10),
|
| 50 |
]
|
| 51 |
examples = [
|
| 52 |
-
[],
|
| 53 |
-
["samples/noise_freesound_2530.wav", "samples/noise_freesound_573577.wav"],
|
| 54 |
]
|
| 55 |
outputs = [
|
| 56 |
gradio.outputs.Audio(label="Clean"),
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
import gradio
|
| 5 |
import gradio.inputs
|
| 6 |
import gradio.outputs
|
| 7 |
import torch
|
| 8 |
+
from df import config
|
| 9 |
+
from df.enhance import enhance, init_df, load_audio, save_audio
|
| 10 |
|
| 11 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 12 |
+
model, df, _ = init_df()
|
| 13 |
+
model = model.to(device=device).eval()
|
| 14 |
|
| 15 |
|
| 16 |
def mix_at_snr(clean, noise, snr, eps=1e-10):
|
|
|
|
| 27 |
mix: 1D Tensor with added clean and noise signals.
|
| 28 |
|
| 29 |
"""
|
| 30 |
+
clean = torch.as_tensor(clean).mean(0, keepdim=True)
|
| 31 |
+
noise = torch.as_tensor(noise).mean(0, keepdim=True)
|
| 32 |
+
if noise.shape[1] < clean.shape[1]:
|
| 33 |
+
noise = noise.repeat((1, int(math.ceil(clean.shape[1] / noise.shape[1]))))
|
| 34 |
+
noise = noise[:, : clean.shape[1]]
|
| 35 |
E_speech = torch.mean(clean.pow(2)) + eps
|
| 36 |
E_noise = torch.mean(noise.pow(2))
|
| 37 |
K = torch.sqrt((E_noise / E_speech) * 10 ** (snr / 10) + eps)
|
| 38 |
noise = noise / K
|
| 39 |
mixture = clean + noise
|
| 40 |
+
assert torch.isfinite(mixture).all()
|
| 41 |
return clean, noise, mixture
|
| 42 |
|
| 43 |
+
def as_gradio_audio(x):
|
| 44 |
+
sr = config.get("sr", "df", int)
|
| 45 |
+
return sr, (x/0x7fff).to(torch.int16).cpu().numpy()
|
| 46 |
|
| 47 |
def mix_and_denoise(speech, noise, snr):
|
| 48 |
+
print(speech, noise, snr)
|
| 49 |
+
sr = config.get("sr", "df", int)
|
| 50 |
+
speech, _ = load_audio(speech, sr)
|
| 51 |
+
noise, _ = load_audio(noise, sr)
|
| 52 |
speech, noise, noisy = mix_at_snr(speech, noise, snr)
|
| 53 |
+
enhanced = enhance(model, df, noisy)
|
| 54 |
+
save_audio("clean.wav", speech, sr)
|
| 55 |
+
save_audio("noisy.wav", noisy, sr)
|
| 56 |
+
save_audio("enhanced.wav", enhanced, sr)
|
| 57 |
+
return "clean.wav", "noisy.wav", "enhanced.wav"
|
| 58 |
|
| 59 |
|
| 60 |
inputs = [
|
|
|
|
| 67 |
gradio.inputs.Slider(minimum=-10, maximum=40, step=5, default=10),
|
| 68 |
]
|
| 69 |
examples = [
|
| 70 |
+
["samples/p232_013_clean.wav", "samples/noise_freesound_2530.wav", 10],
|
|
|
|
| 71 |
]
|
| 72 |
outputs = [
|
| 73 |
gradio.outputs.Audio(label="Clean"),
|
samples/noise_freesound_573577.wav → clean.wav
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7a51b4fdfb02657cf9410dbd34b4ea165acbec48581a8a074e1d45fdd3b3334
|
| 3 |
+
size 378612
|
enhanced.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97ff9dc5db07e3a2410f0dd416d9bccdcdc9bd173ed46f415e405208a4105d04
|
| 3 |
+
size 378284
|
noisy.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3b658209be05042ce017aa2b3db444e56e84c3cc6f58535599ff8887c9ee5f7
|
| 3 |
+
size 378612
|
samples/p232_013_clean.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7a51b4fdfb02657cf9410dbd34b4ea165acbec48581a8a074e1d45fdd3b3334
|
| 3 |
+
size 378612
|