import gradio as gr import torch import torchaudio from speechbrain.inference.enhancement import SpectralMaskEnhancement # 加載增強模型 enhance_model = SpectralMaskEnhancement.from_hparams( source="speechbrain/metricgan-plus-voicebank", savedir="pretrained_models/metricgan-plus-voicebank", ) def enhance_audio(file_path): # 加載音頻文件 noisy = enhance_model.load_audio(file_path).unsqueeze(0) # 假設長度為1.0的相對長度 lengths = torch.tensor([1.0]) # 使用模型進行語音增強 enhanced = enhance_model.enhance_batch(noisy, lengths=lengths) # 將增強的音頻信號返回 return enhanced.squeeze(0).cpu().numpy(), 16000 # 創建Gradio接口 demo = gr.Interface( fn=enhance_audio, inputs=gr.File(type="filepath"), # 接收語音輸入,通過文件上傳 outputs=gr.Audio(type="numpy"), # 返回增強後的語音 title="Speech Enhancement", description="Upload a noisy speech file and get the enhanced output.", ) # 啟動Gradio應用 demo.launch(share=True)