|
|
import gradio as gr |
|
|
import torch |
|
|
import torchaudio |
|
|
from speechbrain.inference.enhancement import SpectralMaskEnhancement |
|
|
|
|
|
|
|
|
enhance_model = SpectralMaskEnhancement.from_hparams( |
|
|
source="speechbrain/metricgan-plus-voicebank", |
|
|
savedir="pretrained_models/metricgan-plus-voicebank", |
|
|
) |
|
|
|
|
|
def enhance_audio(file_path): |
|
|
|
|
|
noisy = enhance_model.load_audio(file_path).unsqueeze(0) |
|
|
|
|
|
|
|
|
lengths = torch.tensor([1.0]) |
|
|
|
|
|
|
|
|
enhanced = enhance_model.enhance_batch(noisy, lengths=lengths) |
|
|
|
|
|
|
|
|
return enhanced.squeeze(0).cpu().numpy(), 16000 |
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=enhance_audio, |
|
|
inputs=gr.File(type="filepath"), |
|
|
outputs=gr.Audio(type="numpy"), |
|
|
title="Speech Enhancement", |
|
|
description="Upload a noisy speech file and get the enhanced output.", |
|
|
) |
|
|
|
|
|
|
|
|
demo.launch(share=True) |
|
|
|