import streamlit as st from TTS.api import TTS import torch from scipy.io.wavfile import write from io import BytesIO import numpy as np # Initialize the TTS model device = "cuda" if torch.cuda.is_available() else "cpu" tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=True, gpu=device == "cuda") # App title st.title("🐸 Coqui TTS - Text to Speech App") # Text input text = st.text_area("Enter the text to convert to speech", "Imagine being able to offer your clients curated travel insights at their fingertips.") # Submit button if st.button("Generate Speech"): if text.strip() == "": st.warning("Please enter some text.") else: # Run TTS st.text("Generating speech...") try: # Generate audio with the default voice wav = tts.tts(text=text) # Convert the list of amplitude values to a binary WAV file sample_rate = 22050 # Default sample rate used by Coqui TTS wav = np.array(wav) # Ensure it's a NumPy array buffer = BytesIO() write(buffer, sample_rate, wav.astype(np.float32)) # Write WAV data to buffer buffer.seek(0) # Display audio player st.audio(buffer, format="audio/wav") # Option to download the audio file st.download_button( "Download Audio", buffer, file_name="output.wav", mime="audio/wav" ) except Exception as e: st.error(f"An error occurred: {e}")