Athagi's picture
Update app.py
286820a verified
import gradio as gr
import torch
import os
import tempfile
import numpy as np
from scipy.io.wavfile import write
from vc_infer_pipeline import VC # Assuming vc_infer_pipeline.py has class VC
from utils import load_checkpoint
import json
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Paths (adjust if needed)
MODEL_DIR = "model"
CONFIG_PATH = os.path.join(MODEL_DIR, "config.json")
RVC_MODEL_PATH = os.path.join(MODEL_DIR, "G_020.pth")
HUBERT_MODEL_PATH = os.path.join(MODEL_DIR, "hubert_base.pt")
# Load config
with open(CONFIG_PATH, "r") as f:
config = json.load(f)
# Load voice conversion pipeline
vc_model = VC(
hubert_model_path=HUBERT_MODEL_PATH,
rvc_model_path=RVC_MODEL_PATH,
config_path=CONFIG_PATH,
device=device,
)
def voice_convert(input_audio, sid=0):
# input_audio is a tuple (sample_rate, numpy array) from gr.Audio
if input_audio is None:
return None
sr, audio = input_audio
# Save input audio to temp wav file
temp_wav_in = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
write(temp_wav_in, sr, audio)
# Run conversion
converted_audio = vc_model.pipeline(
temp_wav_in,
sid=int(sid),
f0_method="pm", # you can add UI for this if needed
)
# Save output wav to temp file
temp_wav_out = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
write(temp_wav_out, 16000, (converted_audio * 32767).astype(np.int16))
return temp_wav_out
with gr.Blocks() as demo:
gr.Markdown("# RVC Voice Conversion Demo")
audio_input = gr.Audio(source="upload", type="numpy", label="Upload audio")
speaker_id = gr.Slider(0, 0, 0, step=1, label="Speaker ID (default 0)")
output_audio = gr.Audio(label="Converted Audio")
btn = gr.Button("Convert")
btn.click(voice_convert, inputs=[audio_input, speaker_id], outputs=output_audio)
if __name__ == "__main__":
demo.launch()