Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import tempfile | |
| import soundfile as sf | |
| import numpy as np | |
| from kokoro import KPipeline | |
| import time | |
| import nltk | |
| # Download the necessary NLTK data for sentence splitting | |
| try: | |
| nltk.data.find('tokenizers/punkt_tab') | |
| except LookupError: | |
| nltk.download('punkt_tab') | |
| nltk.download('punkt') | |
| from nltk.tokenize import sent_tokenize | |
| pipeline = KPipeline(lang_code="a") | |
| VOICES = [ | |
| "af_heart", "af_bella", "af_nicole", | |
| "am_adam", "am_michael", | |
| "bf_emma", "bm_george" | |
| ] | |
| SR = 24000 | |
| def tts_stream(text, voice): | |
| text = (text or "").strip() | |
| if not text: | |
| yield None, None, 0, "Please enter text." | |
| return | |
| # --- IMPROVEMENT HERE --- | |
| # Use NLTK to split text into linguistically correct sentences. | |
| # This handles "Dr.", "Mr.", "?", "!", and quotes correctly. | |
| sentences = sent_tokenize(text) | |
| total = len(sentences) | |
| audio_chunks = [] | |
| # Initialize an empty array for the concatenated audio | |
| full_audio = np.array([], dtype=np.float32) | |
| print(f"Split into {total} sentences.") | |
| for i, sentence in enumerate(sentences): | |
| if not sentence.strip(): | |
| continue | |
| # Run Kokoro on the specific sentence | |
| gen = pipeline(sentence, voice=voice) | |
| # Kokoro returns a generator, we grab the audio from it | |
| for (gs, ps, audio) in gen: | |
| audio = np.asarray(audio, dtype=np.float32) | |
| audio_chunks.append(audio) | |
| # Progress streaming to UI | |
| progress = int((i + 1) / total * 100) | |
| yield None, None, progress, f"Processing sentence {i+1}/{total}..." | |
| # Anti-timeout heartbeat | |
| time.sleep(0.05) | |
| if audio_chunks: | |
| final_audio = np.concatenate(audio_chunks) | |
| else: | |
| final_audio = np.array([], dtype=np.float32) | |
| # Write to a temp file for the download button | |
| tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) | |
| sf.write(tmp.name, final_audio, SR) | |
| # Return the audio to the player and the file for download | |
| yield (SR, final_audio), tmp.name, 100, "Completed!" | |
| with gr.Blocks(title="Kokoro TTS (Smart Split)") as demo: | |
| gr.Markdown("## ⚡ Kokoro TTS – Smart Sentence Splitting") | |
| with gr.Row(): | |
| with gr.Column(): | |
| text = gr.Textbox(lines=12, label="Input text", placeholder="Paste long text here...") | |
| voice = gr.Dropdown(VOICES, value="af_heart", label="Voice") | |
| run_btn = gr.Button("Generate", variant="primary") | |
| with gr.Column(): | |
| audio_output = gr.Audio(label="Audio Output", interactive=False) | |
| file_download = gr.File(label="Download WAV") | |
| progress = gr.Slider(0, 100, step=1, label="Progress", interactive=False) | |
| status = gr.Textbox(label="Status", interactive=False) | |
| run_btn.click( | |
| fn=tts_stream, | |
| inputs=[text, voice], | |
| outputs=[audio_output, file_download, progress, status], | |
| ) | |
| demo.queue().launch() |