File size: 1,328 Bytes
d6c34b5 ec877d4 d6c34b5 ec877d4 d6c34b5 ec877d4 d6c34b5 a12f3e8 d6c34b5 a12f3e8 d6c34b5 a12f3e8 d6c34b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import torch
import torchaudio
import gradio as gr
import requests
# Download and load the HuBERT content encoder
hubert = torch.hub.load("bshall/hubert:main", "hubert_soft", trust_repo=True).cuda()
# Assuming similar steps for downloading and loading the acoustic model
acoustic_model = torch.hub.load("bshall/acoustic-model:main", "hubert_soft", trust_repo=True).cuda()
# Load the HiFiGAN vocoder (if used in the notebook)
vocoder = torch.hub.load("bshall/hifigan:main", "hifigan", trust_repo=True).cuda()
def voice_conversion(input_audio):
# Load input audio
waveform, sample_rate = torchaudio.load(input_audio)
# Process the audio using the models
with torch.no_grad():
units = hubert(waveform.cuda())
mel_spec = acoustic_model.generate(units)
audio_out = vocoder(mel_spec)
# Save the output audio
output_path = "output.wav"
torchaudio.save(output_path, audio_out.cpu(), sample_rate)
return output_path
# Define Gradio interface
iface = gr.Interface(
fn=voice_conversion,
inputs=gr.inputs.Audio(source="upload", type="filepath"),
outputs=gr.outputs.Audio(type="file"),
title="Voice Conversion Demo",
description="Upload an audio file to convert its voice using HuBERT and other models."
)
# Launch the interface
iface.launch() |