Spaces:
Build error
Build error
| import streamlit as st | |
| import torch | |
| import tempfile | |
| import soundfile as sf | |
| from TTS.api import TTS | |
| from streamlit_audiorecorder import audiorecorder | |
| st.set_page_config(page_title="Voice Clone TTS", layout="centered") | |
| st.title("🎙️ Voice-Cloning Text-to-Speech") | |
| st.markdown( | |
| """ | |
| 1. **Record** your voice or **upload** an existing audio file (WAV/MP3). | |
| 2. Enter the **text** you want spoken in _your_ voice. | |
| 3. (Optional) Paste an **API key** if required by your model/service. | |
| 4. Click **Generate** to hear the cloned speech. | |
| """ | |
| ) | |
| # 1) AUDIO INPUT: record or upload | |
| st.header("1. Provide your voice sample") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.write("**Record in-page**") | |
| audio_bytes = audiorecorder("Click to record", "Recording…") | |
| if isinstance(audio_bytes, bytes): | |
| st.audio(audio_bytes, format="audio/wav") | |
| with col2: | |
| st.write("**Or upload file**") | |
| upload = st.file_uploader("Upload WAV/MP3", type=["wav", "mp3"]) | |
| if upload is not None: | |
| audio_bytes = upload.read() | |
| st.audio(audio_bytes, format=upload.type) | |
| if 'audio_bytes' not in locals() or not isinstance(audio_bytes, (bytes, bytearray)): | |
| st.warning("Please record or upload a valid audio sample before proceeding.") | |
| st.stop() | |
| # 2) USER TEXT & (optional) KEY | |
| st.header("2. Text & API key") | |
| text_input = st.text_area("Enter text to speak in your voice", value="Hello, this is my cloned voice!", height=120) | |
| api_key = st.text_input("API Key (if your model needs one)", type="password") | |
| # 3) LOAD & CACHE THE TTS PIPELINE | |
| def load_tts_model(): | |
| # replace with your chosen multispeaker/cloning model | |
| model_name = "IndexTeam/IndexTTS-1.5" | |
| # Coqui TTS uses its own GPU flag | |
| return TTS(model_name=model_name, progress_bar=False, gpu=torch.cuda.is_available()) | |
| tts = load_tts_model() | |
| # 4) GENERATE | |
| if st.button("▶️ Generate Speech"): | |
| if not text_input.strip(): | |
| st.error("Please enter some text to synthesize.") | |
| st.stop() | |
| with st.spinner("Cloning your voice…"): | |
| # save the reference audio to a temp WAV | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: | |
| sf.write(tmp.name, sf.read(io.BytesIO(audio_bytes))[0], samplerate=sf.read(io.BytesIO(audio_bytes))[1]) | |
| ref_path = tmp.name | |
| # do the TTS with your voice as reference | |
| wav = tts.tts(text=text_input, speaker_wav=ref_path) | |
| # save output and play | |
| out_path = ref_path.replace(".wav", "_out.wav") | |
| sf.write(out_path, wav, samplerate=tts.synthesizer.output_sample_rate) | |
| st.success("✅ Done!") | |
| st.audio(out_path, format="audio/wav") | |