import gradio as gr import torch import os import tempfile import numpy as np from scipy.io.wavfile import write from vc_infer_pipeline import VC # Assuming vc_infer_pipeline.py has class VC from utils import load_checkpoint import json device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Paths (adjust if needed) MODEL_DIR = "model" CONFIG_PATH = os.path.join(MODEL_DIR, "config.json") RVC_MODEL_PATH = os.path.join(MODEL_DIR, "G_020.pth") HUBERT_MODEL_PATH = os.path.join(MODEL_DIR, "hubert_base.pt") # Load config with open(CONFIG_PATH, "r") as f: config = json.load(f) # Load voice conversion pipeline vc_model = VC( hubert_model_path=HUBERT_MODEL_PATH, rvc_model_path=RVC_MODEL_PATH, config_path=CONFIG_PATH, device=device, ) def voice_convert(input_audio, sid=0): # input_audio is a tuple (sample_rate, numpy array) from gr.Audio if input_audio is None: return None sr, audio = input_audio # Save input audio to temp wav file temp_wav_in = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name write(temp_wav_in, sr, audio) # Run conversion converted_audio = vc_model.pipeline( temp_wav_in, sid=int(sid), f0_method="pm", # you can add UI for this if needed ) # Save output wav to temp file temp_wav_out = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name write(temp_wav_out, 16000, (converted_audio * 32767).astype(np.int16)) return temp_wav_out with gr.Blocks() as demo: gr.Markdown("# RVC Voice Conversion Demo") audio_input = gr.Audio(source="upload", type="numpy", label="Upload audio") speaker_id = gr.Slider(0, 0, 0, step=1, label="Speaker ID (default 0)") output_audio = gr.Audio(label="Converted Audio") btn = gr.Button("Convert") btn.click(voice_convert, inputs=[audio_input, speaker_id], outputs=output_audio) if __name__ == "__main__": demo.launch()