Spaces:
Running
Running
| import gradio as gr | |
| import os | |
| import tempfile | |
| from kokoro_engine import KokoroEngine | |
| from processor import ScriptProcessor | |
| import numpy as np | |
| # Initialize components | |
| engine = KokoroEngine() | |
| processor = ScriptProcessor(engine) | |
| def tts_process(text, voice, speed, lang, long_script_mode): | |
| try: | |
| if long_script_mode: | |
| audio, sr = processor.process_long_script(text, voice, speed, lang) | |
| else: | |
| audio, sr = engine.generate(text, voice, speed, lang) | |
| # Save to temp file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: | |
| processor.save_audio(audio, sr, tmp.name) | |
| return tmp.name | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| def clone_process(audio_ref): | |
| if audio_ref is None: | |
| return "Please upload an audio file for cloning." | |
| # Placeholder for actual cloning logic | |
| voice_id = engine.clone_voice_placeholder(audio_ref) | |
| return f"Voice cloned successfully! Reference ID: {voice_id}. You can now use this voice (currently defaults to {voice_id})." | |
| # Flatten voice list for dropdown | |
| all_voices = [] | |
| for category, voices in engine.voices.items(): | |
| for v in voices: | |
| all_voices.append(v) | |
| # Premium CSS for high-end look | |
| custom_css = """ | |
| .container { | |
| max-width: 900px !important; | |
| margin: auto !important; | |
| padding-top: 2rem !important; | |
| } | |
| .header { | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| } | |
| .header h1 { | |
| font-size: 3rem !important; | |
| font-weight: 800 !important; | |
| background: linear-gradient(90deg, #ff00cc, #3333ff); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| margin-bottom: 0.5rem !important; | |
| } | |
| .header p { | |
| font-size: 1.1rem !important; | |
| color: #888; | |
| } | |
| .input-group { | |
| border-radius: 12px !important; | |
| border: 1px solid #333 !important; | |
| background: #111 !important; | |
| padding: 1rem !important; | |
| margin-bottom: 1.5rem !important; | |
| } | |
| .footer { | |
| visibility: hidden; | |
| } | |
| button.primary { | |
| background: linear-gradient(90deg, #ff00cc, #3333ff) !important; | |
| border: none !important; | |
| font-weight: bold !important; | |
| border-radius: 8px !important; | |
| } | |
| button.primary:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 4px 15px rgba(255, 0, 204, 0.4); | |
| } | |
| """ | |
| with gr.Blocks(title="Kokoro TTS Premium") as demo: | |
| with gr.Column(elem_classes="container"): | |
| with gr.Column(elem_classes="header"): | |
| gr.Markdown("# 🌸 Kokoro TTS") | |
| gr.Markdown("High-fidelity neural speech synthesis powered by Kokoro-82M") | |
| with gr.Column(elem_classes="input-group"): | |
| text_input = gr.Textbox( | |
| label="Script Content", | |
| placeholder="Paste your story, script, or text here...", | |
| lines=10, | |
| elem_id="text-input" | |
| ) | |
| with gr.Row(): | |
| voice_select = gr.Dropdown( | |
| choices=all_voices, | |
| value="af_heart", | |
| label="Voice Archetype", | |
| scale=2 | |
| ) | |
| speed_slider = gr.Slider( | |
| minimum=0.5, | |
| maximum=2.0, | |
| value=1.0, | |
| step=0.1, | |
| label="Pacing (Speed)", | |
| scale=1 | |
| ) | |
| with gr.Accordion("⚙️ Engine Configurations", open=False): | |
| with gr.Row(): | |
| lang_select = gr.Dropdown( | |
| choices=[ | |
| ("🇺🇸 English (US)", "en-us"), | |
| ("🇬🇧 English (UK)", "en-gb"), | |
| ("🇨🇳 Chinese", "zh"), | |
| ("🇮🇳 Hindi", "hi"), | |
| ("🇯🇵 Japanese", "ja"), | |
| ("🇪🇸 Spanish", "es"), | |
| ("🇫🇷 French", "fr"), | |
| ("🇮🇹 Italian", "it"), | |
| ("🇵🇹 Portuguese", "pt") | |
| ], | |
| value="en-us", | |
| label="Linguistic Context" | |
| ) | |
| long_script_toggle = gr.Checkbox( | |
| label="Optimize for Long Duration (Safe Chunking)", | |
| value=False | |
| ) | |
| generate_btn = gr.Button("⚡ Generate Neural Audio", variant="primary", size="lg") | |
| with gr.Column(variant="compact"): | |
| audio_output = gr.Audio( | |
| label="Master Audio Output", | |
| type="filepath" | |
| ) | |
| generate_btn.click( | |
| tts_process, | |
| inputs=[text_input, voice_select, speed_slider, lang_select, long_script_toggle], | |
| outputs=audio_output | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(ssr_mode=False, css=custom_css) | |