import gradio as gr from TTS.api import TTS import tempfile import os tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True) def clone(text, speaker_wav): with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: f.write(speaker_wav) ref = f.name out = ref.replace(".wav", "_out.wav") tts.tts_to_file( text=text, speaker_wav=ref, language="en", file_path=out ) return out ui = gr.Interface( fn=clone, inputs=[ gr.Textbox(label="Text"), gr.Audio(type="numpy", label="Reference Voice") ], outputs=gr.Audio(label="Cloned Voice"), title="XTTS Voice Clone" ) ui.launch()