import os os.environ["KOKORO_NO_ESPEAK_CHECK"] = "1" import torch, numpy as np, gradio as gr from kokoro import KPipeline device = "cuda" if torch.cuda.is_available() else "cpu" pipeline = KPipeline(lang_code="a", device=device) def synthesize(text, speed, voice): gs, ps, audio = next(pipeline(text, voice=voice, speed=speed)) # ensure `audio` is numpy (KPipeline already returns numpy in HF demo) return (24000, np.asarray(audio)) demo = gr.Interface( fn=synthesize, inputs=[ gr.Textbox(lines=3, label="Text"), gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Speed"), gr.Dropdown( ["af_heart", "af_bella", "af_sarah", "bf_emma", "bm_george"], value="af_heart", label="Voice" ) ], outputs=gr.Audio(type="numpy", label="Speech Output") ) demo.queue().launch(server_name="0.0.0.0", server_port=7860)