import gradio as gr import os import tempfile from kokoro_engine import KokoroEngine from processor import ScriptProcessor import numpy as np # Initialize components engine = KokoroEngine() processor = ScriptProcessor(engine) def tts_process(text, voice, speed, lang, long_script_mode): try: if long_script_mode: audio, sr = processor.process_long_script(text, voice, speed, lang) else: audio, sr = engine.generate(text, voice, speed, lang) # Save to temp file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: processor.save_audio(audio, sr, tmp.name) return tmp.name except Exception as e: return f"Error: {str(e)}" def clone_process(audio_ref): if audio_ref is None: return "Please upload an audio file for cloning." # Placeholder for actual cloning logic voice_id = engine.clone_voice_placeholder(audio_ref) return f"Voice cloned successfully! Reference ID: {voice_id}. You can now use this voice (currently defaults to {voice_id})." # Flatten voice list for dropdown all_voices = [] for category, voices in engine.voices.items(): for v in voices: all_voices.append(v) # Premium CSS for high-end look custom_css = """ .container { max-width: 900px !important; margin: auto !important; padding-top: 2rem !important; } .header { text-align: center; margin-bottom: 2rem; } .header h1 { font-size: 3rem !important; font-weight: 800 !important; background: linear-gradient(90deg, #ff00cc, #3333ff); -webkit-background-clip: text; -webkit-text-fill-color: transparent; margin-bottom: 0.5rem !important; } .header p { font-size: 1.1rem !important; color: #888; } .input-group { border-radius: 12px !important; border: 1px solid #333 !important; background: #111 !important; padding: 1rem !important; margin-bottom: 1.5rem !important; } .footer { visibility: hidden; } button.primary { background: linear-gradient(90deg, #ff00cc, #3333ff) !important; border: none !important; font-weight: bold !important; border-radius: 8px !important; } button.primary:hover { transform: translateY(-2px); box-shadow: 0 4px 15px rgba(255, 0, 204, 0.4); } """ with gr.Blocks(title="Kokoro TTS Premium") as demo: with gr.Column(elem_classes="container"): with gr.Column(elem_classes="header"): gr.Markdown("# 🌸 Kokoro TTS") gr.Markdown("High-fidelity neural speech synthesis powered by Kokoro-82M") with gr.Column(elem_classes="input-group"): text_input = gr.Textbox( label="Script Content", placeholder="Paste your story, script, or text here...", lines=10, elem_id="text-input" ) with gr.Row(): voice_select = gr.Dropdown( choices=all_voices, value="af_heart", label="Voice Archetype", scale=2 ) speed_slider = gr.Slider( minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Pacing (Speed)", scale=1 ) with gr.Accordion("⚙️ Engine Configurations", open=False): with gr.Row(): lang_select = gr.Dropdown( choices=[ ("🇺🇸 English (US)", "en-us"), ("🇬🇧 English (UK)", "en-gb"), ("🇨🇳 Chinese", "zh"), ("🇮🇳 Hindi", "hi"), ("🇯🇵 Japanese", "ja"), ("🇪🇸 Spanish", "es"), ("🇫🇷 French", "fr"), ("🇮🇹 Italian", "it"), ("🇵🇹 Portuguese", "pt") ], value="en-us", label="Linguistic Context" ) long_script_toggle = gr.Checkbox( label="Optimize for Long Duration (Safe Chunking)", value=False ) generate_btn = gr.Button("⚡ Generate Neural Audio", variant="primary", size="lg") with gr.Column(variant="compact"): audio_output = gr.Audio( label="Master Audio Output", type="filepath" ) generate_btn.click( tts_process, inputs=[text_input, voice_select, speed_slider, lang_select, long_script_toggle], outputs=audio_output ) if __name__ == "__main__": demo.launch(ssr_mode=False, css=custom_css)