ca-cpu / app.py
lab
Update app.py
d6c34b5 verified
import torch
import torchaudio
import gradio as gr
import requests
# Download and load the HuBERT content encoder
hubert = torch.hub.load("bshall/hubert:main", "hubert_soft", trust_repo=True).cuda()
# Assuming similar steps for downloading and loading the acoustic model
acoustic_model = torch.hub.load("bshall/acoustic-model:main", "hubert_soft", trust_repo=True).cuda()
# Load the HiFiGAN vocoder (if used in the notebook)
vocoder = torch.hub.load("bshall/hifigan:main", "hifigan", trust_repo=True).cuda()
def voice_conversion(input_audio):
# Load input audio
waveform, sample_rate = torchaudio.load(input_audio)
# Process the audio using the models
with torch.no_grad():
units = hubert(waveform.cuda())
mel_spec = acoustic_model.generate(units)
audio_out = vocoder(mel_spec)
# Save the output audio
output_path = "output.wav"
torchaudio.save(output_path, audio_out.cpu(), sample_rate)
return output_path
# Define Gradio interface
iface = gr.Interface(
fn=voice_conversion,
inputs=gr.inputs.Audio(source="upload", type="filepath"),
outputs=gr.outputs.Audio(type="file"),
title="Voice Conversion Demo",
description="Upload an audio file to convert its voice using HuBERT and other models."
)
# Launch the interface
iface.launch()