""" 🎙️ Kokoro-TTS-only demo – Zero-GPU edition Routes every synthesis to an idle A100. """ import os, tempfile, subprocess, numpy as np import gradio as gr import spaces # Zero-GPU decorator import soundfile as sf # ------------------------------------------------------------------ # 1. Lazy Kokoro loader (runs once per GPU worker) # ------------------------------------------------------------------ kokoro_pipe = None def load_kokoro(): global kokoro_pipe if kokoro_pipe is None: from kokoro import KPipeline kokoro_pipe = KPipeline(lang_code='a') return kokoro_pipe # ------------------------------------------------------------------ # 2. Generation helper # ------------------------------------------------------------------ @spaces.GPU def tts_kokoro(text, voice, speed): pipe = load_kokoro() generator = pipe(text, voice=voice, speed=speed) for gs, ps, audio in generator: return audio raise RuntimeError("Kokoro generation failed") # ------------------------------------------------------------------ # 3. Zero-GPU entry point # ------------------------------------------------------------------ @spaces.GPU def synthesise(text, voice, speed): if not text.strip(): raise gr.Error("Please enter some text.") wav = tts_kokoro(text, voice=voice, speed=speed) fd, tmp = tempfile.mkstemp(suffix='.wav') os.close(fd) sf.write(tmp, wav, 24000) return tmp # ------------------------------------------------------------------ # 4. Gradio UI # ------------------------------------------------------------------ css = """footer {visibility: hidden}""" with gr.Blocks(css=css, title="Kokoro TTS – Zero-GPU") as demo: gr.Markdown("## 🎙️ Kokoro TTS – Zero-GPU Demo") with gr.Row(): with gr.Column(): voice = gr.Dropdown( label="Voice", choices=['af_heart', 'af_sky', 'af_mist', 'af_dusk'], value='af_heart' ) speed = gr.Slider(0.5, 2.0, 1.0, step=0.1, label="Speed") with gr.Column(scale=3): text = gr.Textbox( label="Text to speak", placeholder="Type or paste text here …", lines=6, max_lines=12 ) btn = gr.Button("🎧 Synthesise", variant="primary") audio_out = gr.Audio(label="Generated speech", type="filepath") btn.click(synthesise, inputs=[text, voice, speed], outputs=audio_out) gr.Markdown("### Tips \n" "- **Kokoro** – fast, high-quality English TTS \n" "Audio is returned as 24 kHz WAV.") demo.launch()