import gradio as gr import tempfile import soundfile as sf import numpy as np from kokoro import KPipeline import time import nltk # Download the necessary NLTK data for sentence splitting try: nltk.data.find('tokenizers/punkt_tab') except LookupError: nltk.download('punkt_tab') nltk.download('punkt') from nltk.tokenize import sent_tokenize pipeline = KPipeline(lang_code="a") VOICES = [ "af_heart", "af_bella", "af_nicole", "am_adam", "am_michael", "bf_emma", "bm_george" ] SR = 24000 def tts_stream(text, voice): text = (text or "").strip() if not text: yield None, None, 0, "Please enter text." return # --- IMPROVEMENT HERE --- # Use NLTK to split text into linguistically correct sentences. # This handles "Dr.", "Mr.", "?", "!", and quotes correctly. sentences = sent_tokenize(text) total = len(sentences) audio_chunks = [] # Initialize an empty array for the concatenated audio full_audio = np.array([], dtype=np.float32) print(f"Split into {total} sentences.") for i, sentence in enumerate(sentences): if not sentence.strip(): continue # Run Kokoro on the specific sentence gen = pipeline(sentence, voice=voice) # Kokoro returns a generator, we grab the audio from it for (gs, ps, audio) in gen: audio = np.asarray(audio, dtype=np.float32) audio_chunks.append(audio) # Progress streaming to UI progress = int((i + 1) / total * 100) yield None, None, progress, f"Processing sentence {i+1}/{total}..." # Anti-timeout heartbeat time.sleep(0.05) if audio_chunks: final_audio = np.concatenate(audio_chunks) else: final_audio = np.array([], dtype=np.float32) # Write to a temp file for the download button tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) sf.write(tmp.name, final_audio, SR) # Return the audio to the player and the file for download yield (SR, final_audio), tmp.name, 100, "Completed!" with gr.Blocks(title="Kokoro TTS (Smart Split)") as demo: gr.Markdown("## ⚡ Kokoro TTS – Smart Sentence Splitting") with gr.Row(): with gr.Column(): text = gr.Textbox(lines=12, label="Input text", placeholder="Paste long text here...") voice = gr.Dropdown(VOICES, value="af_heart", label="Voice") run_btn = gr.Button("Generate", variant="primary") with gr.Column(): audio_output = gr.Audio(label="Audio Output", interactive=False) file_download = gr.File(label="Download WAV") progress = gr.Slider(0, 100, step=1, label="Progress", interactive=False) status = gr.Textbox(label="Status", interactive=False) run_btn.click( fn=tts_stream, inputs=[text, voice], outputs=[audio_output, file_download, progress, status], ) demo.queue().launch()