Spaces:

VictorKola
/

Index_tts_streamlit

Build error

App Files Files Community

Index_tts_streamlit / src /streamlit_app.py

VictorKola

Update src/streamlit_app.py

eff35ed verified 8 months ago

raw

history blame contribute delete

2.74 kB

	import streamlit as st
	import torch
	import tempfile
	import soundfile as sf

	from TTS.api import TTS
	from streamlit_audiorecorder import audiorecorder

	st.set_page_config(page_title="Voice Clone TTS", layout="centered")
	st.title("🎙️ Voice-Cloning Text-to-Speech")
	st.markdown(
	"""
	1. Record your voice or upload an existing audio file (WAV/MP3).
	2. Enter the text you want spoken in _your_ voice.
	3. (Optional) Paste an API key if required by your model/service.
	4. Click Generate to hear the cloned speech.
	"""
	)

	# 1) AUDIO INPUT: record or upload
	st.header("1. Provide your voice sample")
	col1, col2 = st.columns(2)

	with col1:
	st.write("Record in-page")
	audio_bytes = audiorecorder("Click to record", "Recording…")
	if isinstance(audio_bytes, bytes):
	st.audio(audio_bytes, format="audio/wav")

	with col2:
	st.write("Or upload file")
	upload = st.file_uploader("Upload WAV/MP3", type=["wav", "mp3"])
	if upload is not None:
	audio_bytes = upload.read()
	st.audio(audio_bytes, format=upload.type)

	if 'audio_bytes' not in locals() or not isinstance(audio_bytes, (bytes, bytearray)):
	st.warning("Please record or upload a valid audio sample before proceeding.")
	st.stop()

	# 2) USER TEXT & (optional) KEY
	st.header("2. Text & API key")
	text_input = st.text_area("Enter text to speak in your voice", value="Hello, this is my cloned voice!", height=120)
	api_key = st.text_input("API Key (if your model needs one)", type="password")

	# 3) LOAD & CACHE THE TTS PIPELINE
	@st.cache_resource(show_spinner=False)
	def load_tts_model():
	# replace with your chosen multispeaker/cloning model
	model_name = "IndexTeam/IndexTTS-1.5"
	# Coqui TTS uses its own GPU flag
	return TTS(model_name=model_name, progress_bar=False, gpu=torch.cuda.is_available())

	tts = load_tts_model()

	# 4) GENERATE
	if st.button("▶️ Generate Speech"):
	if not text_input.strip():
	st.error("Please enter some text to synthesize.")
	st.stop()

	with st.spinner("Cloning your voice…"):
	# save the reference audio to a temp WAV
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
	sf.write(tmp.name, sf.read(io.BytesIO(audio_bytes))[0], samplerate=sf.read(io.BytesIO(audio_bytes))[1])
	ref_path = tmp.name

	# do the TTS with your voice as reference
	wav = tts.tts(text=text_input, speaker_wav=ref_path)

	# save output and play
	out_path = ref_path.replace(".wav", "_out.wav")
	sf.write(out_path, wav, samplerate=tts.synthesizer.output_sample_rate)
	st.success("✅ Done!")
	st.audio(out_path, format="audio/wav")