import gradio as gr import os import numpy as np from tts_core import KokoroTTS import time # Initialize the TTS engine tts_engine = KokoroTTS() # CSS for styling the interface css = """ .container { max-width: 900px; margin: auto; padding-top: 1.5rem; } .title { text-align: center; color: #2C3E50; } .subtitle { text-align: center; color: #7F8C8D; margin-bottom: 2rem; } .footer { text-align: center; margin-top: 2rem; color: #7F8C8D; font-size: 0.9rem; } .settings-block { padding: 1rem; border-radius: 8px; background-color: #f8f9fa; margin-bottom: 1rem; } .voice-selector { margin-bottom: 1rem; } .advanced-settings { margin-top: 1rem; } .output-block { margin-top: 1.5rem; } """ # Get all available voices voice_options = [(name, id) for id, name in tts_engine.us_english_voices.items()] def text_to_speech(text, voice, speed, add_pronunciation_guide): """ Convert text to speech using the selected voice and settings """ if not text.strip(): return None, "Please enter some text to convert to speech." # Add pronunciation guide if requested if add_pronunciation_guide: # Add simple pronunciation guide for demonstration text = text.replace("Kokoro", "[Kokoro](/kˈOkəɹO/)") # Generate speech try: start_time = time.time() output_file, sample_rate, audio_data = tts_engine.generate_speech( text=text, voice=voice, speed=float(speed) ) generation_time = time.time() - start_time # Create info message info = f"✅ Generated audio ({len(audio_data)/sample_rate:.2f}s) in {generation_time:.2f}s using voice: {tts_engine.us_english_voices[voice]}" return (sample_rate, audio_data), info except Exception as e: return None, f"❌ Error generating speech: {str(e)}" def create_demo(): """Create the Gradio interface""" with gr.Blocks(css=css) as demo: gr.HTML("""

Kokoro82m Text-to-Speech

A CPU-optimized TTS application with all US English voices

""") with gr.Row(): with gr.Column(scale=2): # Text input area text_input = gr.Textbox( label="Text to convert to speech", placeholder="Enter text here...", lines=10, value="Kokoro is an open-weight TTS model with 82 million parameters. Despite its lightweight architecture, it delivers comparable quality to larger models while being significantly faster and more cost-efficient." ) # Settings with gr.Box(elem_classes=["settings-block"]): gr.Markdown("### Voice Settings") # Voice selection voice_selector = gr.Dropdown( choices=voice_options, value="af_heart", # Default voice label="Select Voice", elem_classes=["voice-selector"] ) with gr.Accordion("Advanced Settings", open=False, elem_classes=["advanced-settings"]): speed_slider = gr.Slider( minimum=0.5, maximum=2.0, value=1.0, step=0.05, label="Speech Speed" ) pronunciation_checkbox = gr.Checkbox( label="Add pronunciation guides for better quality", value=False ) # Generate button generate_btn = gr.Button("Generate Speech", variant="primary") with gr.Column(scale=1): # Output audio audio_output = gr.Audio( label="Generated Speech", type="numpy", elem_classes=["output-block"] ) # Info message info_message = gr.Markdown("") # Set up event handlers generate_btn.click( fn=text_to_speech, inputs=[text_input, voice_selector, speed_slider, pronunciation_checkbox], outputs=[audio_output, info_message] ) # Examples examples = [ ["Hello, my name is Kokoro. I am a text-to-speech model with 82 million parameters.", "af_heart", 1.0, True], ["The quick brown fox jumps over the lazy dog. This is a sample of my voice.", "af_bella", 1.0, False], ["Welcome to the world of artificial intelligence and text-to-speech technology.", "am_fenrir", 1.0, False], ["This is an example of a slower speaking rate for more deliberate speech.", "af_nicole", 0.8, False], ["This is an example of a faster speaking rate for more energetic speech.", "am_michael", 1.3, False] ] gr.Examples( examples=examples, inputs=[text_input, voice_selector, speed_slider, pronunciation_checkbox], outputs=[audio_output, info_message], fn=text_to_speech, cache_examples=True ) gr.HTML(""" """) return demo # Create and launch the demo demo = create_demo() # For Hugging Face Spaces if __name__ == "__main__": demo.launch()