q / app.py
yz1129's picture
Update app.py
5a0fc91 verified
import gradio as gr
import torch
import torchaudio
from speechbrain.inference.enhancement import SpectralMaskEnhancement
# 加載增強模型
enhance_model = SpectralMaskEnhancement.from_hparams(
source="speechbrain/metricgan-plus-voicebank",
savedir="pretrained_models/metricgan-plus-voicebank",
)
def enhance_audio(file_path):
# 加載音頻文件
noisy = enhance_model.load_audio(file_path).unsqueeze(0)
# 假設長度為1.0的相對長度
lengths = torch.tensor([1.0])
# 使用模型進行語音增強
enhanced = enhance_model.enhance_batch(noisy, lengths=lengths)
# 將增強的音頻信號返回
return enhanced.squeeze(0).cpu().numpy(), 16000
# 創建Gradio接口
demo = gr.Interface(
fn=enhance_audio,
inputs=gr.File(type="filepath"), # 接收語音輸入,通過文件上傳
outputs=gr.Audio(type="numpy"), # 返回增強後的語音
title="Speech Enhancement",
description="Upload a noisy speech file and get the enhanced output.",
)
# 啟動Gradio應用
demo.launch(share=True)