|
|
import torch |
|
|
import torchaudio |
|
|
import gradio as gr |
|
|
import requests |
|
|
|
|
|
|
|
|
hubert = torch.hub.load("bshall/hubert:main", "hubert_soft", trust_repo=True).cuda() |
|
|
|
|
|
|
|
|
acoustic_model = torch.hub.load("bshall/acoustic-model:main", "hubert_soft", trust_repo=True).cuda() |
|
|
|
|
|
|
|
|
vocoder = torch.hub.load("bshall/hifigan:main", "hifigan", trust_repo=True).cuda() |
|
|
|
|
|
def voice_conversion(input_audio): |
|
|
|
|
|
waveform, sample_rate = torchaudio.load(input_audio) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
units = hubert(waveform.cuda()) |
|
|
mel_spec = acoustic_model.generate(units) |
|
|
audio_out = vocoder(mel_spec) |
|
|
|
|
|
|
|
|
output_path = "output.wav" |
|
|
torchaudio.save(output_path, audio_out.cpu(), sample_rate) |
|
|
|
|
|
return output_path |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=voice_conversion, |
|
|
inputs=gr.inputs.Audio(source="upload", type="filepath"), |
|
|
outputs=gr.outputs.Audio(type="file"), |
|
|
title="Voice Conversion Demo", |
|
|
description="Upload an audio file to convert its voice using HuBERT and other models." |
|
|
) |
|
|
|
|
|
|
|
|
iface.launch() |