Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from TTS.api import TTS | |
| import tempfile | |
| import os | |
| from pydub import AudioSegment | |
| # Initialize the TTS model | |
| tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2") | |
| # Streamlit UI | |
| st.title("XTTS v2 Speech Synthesis") | |
| st.write("Enter text below to generate speech.") | |
| # Sidebar for reference voice | |
| st.sidebar.title("Voice Cloning") | |
| reference_audio = st.sidebar.file_uploader("Upload a reference audio (any format)", type=["wav", "mp3", "ogg", "flac", "m4a"]) | |
| # Function to convert audio to WAV format | |
| def convert_to_wav(audio_file): | |
| temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
| audio = AudioSegment.from_file(audio_file) | |
| audio.export(temp_audio.name, format="wav") | |
| return temp_audio.name | |
| if reference_audio: | |
| ref_audio_path = convert_to_wav(reference_audio) | |
| else: | |
| ref_audio_path = None | |
| text_input = st.text_area("Text to convert to speech:", "Hello, this is an AI-generated voice.") | |
| if st.button("Generate Speech"): | |
| with st.spinner("Generating audio..."): | |
| # Define output path | |
| output_path = "output.wav" | |
| # Generate speech using XTTS v2 | |
| tts.tts_to_file( | |
| text=text_input, | |
| file_path=output_path, | |
| speaker_wav=ref_audio_path if ref_audio_path else None, | |
| language="en" | |
| ) | |
| # Play the audio in the Streamlit app | |
| st.audio(output_path, format="audio/wav") | |
| st.success("Speech generated successfully!") | |
| # Clean up temporary files | |
| if ref_audio_path: | |
| os.remove(ref_audio_path) | |