Spaces:
Runtime error
Runtime error
| import math | |
| import gradio | |
| import gradio.inputs | |
| import gradio.outputs | |
| import torch | |
| from df import config | |
| from df.enhance import enhance, init_df, load_audio, save_audio | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model, df, _ = init_df() | |
| model = model.to(device=device).eval() | |
| def mix_at_snr(clean, noise, snr, eps=1e-10): | |
| """Mix clean and noise signal at a given SNR. | |
| Args: | |
| clean: 1D Tensor with the clean signal to mix. | |
| noise: 1D Tensor of shape. | |
| snr: Signal to noise ratio. | |
| Returns: | |
| clean: 1D Tensor with gain changed according to the snr. | |
| noise: 1D Tensor with the combined noise channels. | |
| mix: 1D Tensor with added clean and noise signals. | |
| """ | |
| clean = torch.as_tensor(clean).mean(0, keepdim=True) | |
| noise = torch.as_tensor(noise).mean(0, keepdim=True) | |
| if noise.shape[1] < clean.shape[1]: | |
| noise = noise.repeat((1, int(math.ceil(clean.shape[1] / noise.shape[1])))) | |
| noise = noise[:, : clean.shape[1]] | |
| E_speech = torch.mean(clean.pow(2)) + eps | |
| E_noise = torch.mean(noise.pow(2)) | |
| K = torch.sqrt((E_noise / E_speech) * 10 ** (snr / 10) + eps) | |
| noise = noise / K | |
| mixture = clean + noise | |
| assert torch.isfinite(mixture).all() | |
| return clean, noise, mixture | |
| def as_gradio_audio(x): | |
| sr = config.get("sr", "df", int) | |
| return sr, (x/0x7fff).to(torch.int16).cpu().numpy() | |
| def mix_and_denoise(speech, noise, snr): | |
| print(speech, noise, snr) | |
| sr = config.get("sr", "df", int) | |
| speech, _ = load_audio(speech, sr) | |
| noise, _ = load_audio(noise, sr) | |
| speech, noise, noisy = mix_at_snr(speech, noise, snr) | |
| enhanced = enhance(model, df, noisy) | |
| save_audio("clean.wav", speech, sr) | |
| save_audio("noisy.wav", noisy, sr) | |
| save_audio("enhanced.wav", enhanced, sr) | |
| return "clean.wav", "noisy.wav", "enhanced.wav" | |
| inputs = [ | |
| gradio.inputs.Audio( | |
| source="microphone", type="filepath", optional=True, label="Speech" | |
| ), | |
| gradio.inputs.Audio( | |
| source="microphone", type="filepath", optional=True, label="Noise" | |
| ), | |
| gradio.inputs.Slider(minimum=-10, maximum=40, step=5, default=10), | |
| ] | |
| examples = [ | |
| ["samples/p232_013_clean.wav", "samples/noise_freesound_2530.wav", 10], | |
| ["samples/p232_019_clean.wav", "samples/DLIVING_combined.wav", 10], | |
| ] | |
| outputs = [ | |
| gradio.outputs.Audio(label="Clean"), | |
| gradio.outputs.Audio(label="Noisy"), | |
| gradio.outputs.Audio(label="Enhanced"), | |
| ] | |
| iface = gradio.Interface( | |
| fn=mix_and_denoise, inputs=inputs, outputs=outputs, examples=examples | |
| ) | |
| iface.launch() | |