Spaces:
Paused
Paused
| import os | |
| import streamlit as st | |
| import time | |
| from kokoro import KPipeline | |
| import soundfile as sf | |
| import io | |
| st.title("Text-to-Speech with Kokoro Pipeline") | |
| st.markdown("Enter your text and configure options to generate audio segments.") | |
| # Text input for the content to be synthesized | |
| text = st.text_area( | |
| "Enter text", | |
| value="The sky above the port was the color of television, tuned to a dead channel.", | |
| height=150, | |
| ) | |
| # Voice selection - add more voice options as needed | |
| voice_options = { | |
| "American English (af_heart)": "af_heart", | |
| # You can add more voices here, for example: | |
| # "British English (b_voice)": "b_voice", | |
| # "Japanese (j_voice)": "j_voice", | |
| } | |
| voice_choice = st.selectbox("Select Voice", options=list(voice_options.keys())) | |
| voice = voice_options[voice_choice] | |
| # Slider for speech speed | |
| speed = st.slider("Speech Speed", min_value=0.5, max_value=2.0, value=1.0) | |
| if st.button("Generate Audio"): | |
| if not text.strip(): | |
| st.error("Please enter some text!") | |
| else: | |
| try: | |
| # Initialize the Kokoro pipeline. | |
| # Ensure that lang_code matches your chosen voice. | |
| with st.spinner("Initializing TTS pipeline..."): | |
| pipeline = KPipeline(lang_code='a') | |
| # Generate audio segments using the pipeline. | |
| with st.spinner("Generating audio..."): | |
| generator = pipeline( | |
| text, | |
| voice=voice, | |
| speed=speed, | |
| split_pattern=r'\n+' | |
| ) | |
| # Process and display each generated segment. | |
| segment_index = 0 | |
| for gs, ps, audio in generator: | |
| st.markdown(f"**Segment {segment_index}**") | |
| st.write("**Graphemes/Text:**", gs) | |
| st.write("**Phonemes:**", ps) | |
| # Convert the generated audio (assumed to be a numpy array) | |
| # to a WAV file in-memory so it can be played in Streamlit. | |
| audio_buffer = io.BytesIO() | |
| sf.write(audio_buffer, audio, 24000, format='WAV') | |
| audio_buffer.seek(0) | |
| st.audio(audio_buffer, format="audio/wav") | |
| segment_index += 1 | |
| st.success("Audio generation complete!") | |
| except Exception as e: | |
| st.error("An error occurred during audio generation.") | |
| st.exception(e) |