import streamlit as st from TTS.api import TTS import tempfile import os # Initialize TTS model (only once) @st.cache_resource def load_tts_model(): return TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True) tts = load_tts_model() # App title st.title("🔊 Voice Cloning with XTTS v2") # Text input text_input = st.text_area("Enter the text you want to synthesize", height=150) # Speaker file uploader speaker_file = st.file_uploader("Upload a speaker WAV file", type=["wav"]) # Button to generate if st.button("Generate Speech"): if not text_input: st.error("Please enter text.") elif not speaker_file: st.error("Please upload a speaker WAV file.") else: try: with st.spinner("Generating voice..."): # Save uploaded speaker audio temporarily with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as speaker_temp: speaker_temp.write(speaker_file.read()) speaker_path = speaker_temp.name # Temporary file to store output output_path = os.path.join(tempfile.gettempdir(), "output.wav") # Generate speech tts.tts_to_file( text=text_input, file_path=output_path, speaker_wav=speaker_path, language="en" ) # Playback st.audio(output_path, format="audio/wav") # Download link with open(output_path, "rb") as f: st.download_button( label="Download Audio", data=f, file_name="cloned_voice.wav", mime="audio/wav" ) # Clean up os.remove(speaker_path) except Exception as e: st.error(f"An error occurred: {e}")