Spaces:
Running
Running
| import gradio as gr | |
| import numpy as np | |
| import os | |
| from kittentts import KittenTTS | |
| SAMPLE_RATE = 24000 | |
| MODELS = { | |
| "Nano (15M - Fastest)": "KittenML/kitten-tts-nano-0.8-fp32", | |
| "Micro (40M - Balanced)": "KittenML/kitten-tts-micro-0.8", | |
| "Mini (80M - Best Quality)": "KittenML/kitten-tts-mini-0.8", | |
| } | |
| VOICES = [ | |
| "Bella", | |
| "Jasper", | |
| "Luna", | |
| "Bruno", | |
| "Rosie", | |
| "Hugo", | |
| "Kiki", | |
| "Leo", | |
| ] | |
| # Initialize all models at startup | |
| print("Loading models...") | |
| _model_cache: dict[str, KittenTTS] = {} | |
| for model_name, model_id in MODELS.items(): | |
| print(f"Loading {model_name}...") | |
| _model_cache[model_name] = KittenTTS(model_id) | |
| print("All models loaded!") | |
| def get_model(model_name: str) -> KittenTTS: | |
| return _model_cache[model_name] | |
| def synthesize(text: str, model_name: str, voice: str, speed: float): | |
| if not text or not text.strip(): | |
| raise gr.Error("Please enter some text.") | |
| tts = get_model(model_name) | |
| # Note: speed parameter may not be supported in v0.8 | |
| # If you get an error, remove speed=speed from the generate call | |
| try: | |
| audio = tts.generate(text.strip(), voice=voice, speed=speed) | |
| except TypeError: | |
| # Fallback if speed is not supported | |
| audio = tts.generate(text.strip(), voice=voice) | |
| # audio shape is (1, samples) or (samples,) — normalize to 1-D | |
| audio = np.squeeze(audio) | |
| return (SAMPLE_RATE, audio) | |
| theme = gr.themes.Base( | |
| primary_hue="neutral", | |
| secondary_hue="neutral", | |
| neutral_hue="neutral", | |
| font=gr.themes.GoogleFont("Inter"), | |
| ).set( | |
| body_background_fill="white", | |
| body_background_fill_dark="white", | |
| block_background_fill="white", | |
| block_background_fill_dark="white", | |
| block_border_color="#e5e5e5", | |
| block_border_color_dark="#e5e5e5", | |
| block_shadow="none", | |
| block_shadow_dark="none", | |
| button_primary_background_fill="#111111", | |
| button_primary_background_fill_hover="#333333", | |
| button_primary_text_color="white", | |
| button_primary_border_color="#111111", | |
| input_background_fill="white", | |
| input_background_fill_dark="white", | |
| input_border_color="#e5e5e5", | |
| slider_color="#111111", | |
| table_border_color="#e5e5e5", | |
| table_even_background_fill="white", | |
| table_odd_background_fill="white", | |
| table_row_focus="white", | |
| ) | |
| css = """ | |
| /* Force light mode — prevents OS dark mode from affecting the page */ | |
| :root, html, body { color-scheme: light !important; } | |
| body, .gradio-container, .main { background: white !important; } | |
| .gradio-container { max-width: 860px !important; margin: 40px auto !important; } | |
| footer { display: none !important; } | |
| /* Force all text to black — no accent colors */ | |
| *, *::before, *::after { | |
| color: #111 !important; | |
| --body-text-color: #111 !important; | |
| --block-label-text-color: #111 !important; | |
| --block-title-text-color: #111 !important; | |
| --color-accent: #111 !important; | |
| --link-text-color: #111 !important; | |
| --link-text-color-hover: #111 !important; | |
| --link-text-color-visited: #111 !important; | |
| --link-text-color-active: #111 !important; | |
| } | |
| /* Exceptions — keep button text white */ | |
| button.primary, button[variant="primary"] { color: white !important; } | |
| /* Error toast notification */ | |
| .toast-wrap, .toast-body, [class*="toast"] { | |
| background: white !important; | |
| border: 1px solid #e5e5e5 !important; | |
| box-shadow: 0 4px 12px rgba(0,0,0,0.08) !important; | |
| } | |
| [class*="toast"] .toast-title, [class*="toast"] .error, | |
| .toast-wrap .error, span.error { | |
| color: #b91c1c !important; | |
| font-weight: 600 !important; | |
| } | |
| [class*="toast"] p, [class*="toast"] .toast-text { | |
| color: #555 !important; | |
| } | |
| /* Error badge inside output block */ | |
| .error-wrap, .error { | |
| background: #fef2f2 !important; | |
| border-color: #fca5a5 !important; | |
| color: #b91c1c !important; | |
| } | |
| /* Placeholder text */ | |
| ::placeholder { color: #aaa !important; } | |
| /* Backgrounds */ | |
| .block, .form, .wrap, .panel, .gap, .tabs { background: white !important; } | |
| /* Block label tabs (e.g. "Output" on the audio component) */ | |
| [data-testid="block-label"] { | |
| background: white !important; | |
| color: #111 !important; | |
| border-color: #e5e5e5 !important; | |
| } | |
| [data-testid="block-label"] * { color: #111 !important; } | |
| /* Dropdown closed state — gray on the full inner wrapper with its natural padding */ | |
| input[role="listbox"] { | |
| background: transparent !important; | |
| } | |
| .wrap-inner { | |
| background: #f7f7f7 !important; | |
| border-radius: 4px !important; | |
| } | |
| /* Dropdown popup list */ | |
| ul.options { | |
| background: #f7f7f7 !important; | |
| border: 1px solid #e5e5e5 !important; | |
| box-shadow: 0 4px 12px rgba(0,0,0,0.06) !important; | |
| } | |
| ul.options li { | |
| background: #f7f7f7 !important; | |
| color: #111 !important; | |
| } | |
| ul.options li:hover, ul.options li.selected { | |
| background: #eeeeee !important; | |
| } | |
| /* Examples table — force all borders to match */ | |
| .examples-holder, .table-wrap, table, thead, tbody, tr, td, th { | |
| background: white !important; | |
| border-color: #e5e5e5 !important; | |
| } | |
| .tr-head { box-shadow: none !important; } | |
| tr:hover td { background: #f9f9f9 !important; } | |
| /* Speed number input container and divider */ | |
| .tab-like-container, .tab-like-container *, input[type=number] { | |
| border-color: #e5e5e5 !important; | |
| } | |
| .reset-button { | |
| -webkit-appearance: none !important; | |
| appearance: none !important; | |
| border: none !important; | |
| background: white !important; | |
| } | |
| /* Slider track */ | |
| input[type=range]::-webkit-slider-runnable-track { background: #e5e5e5 !important; } | |
| input[type=range]::-webkit-slider-thumb { background: #111 !important; } | |
| """ | |
| with gr.Blocks(title="KittenTTS Demo") as demo: | |
| gr.Markdown("# KittenTTS Demo") | |
| gr.Markdown('<img width="607" height="255" alt="KittenTTS Banner" src="https://github.com/user-attachments/assets/f4646722-ba78-4b25-8a65-81bacee0d4f6" />') | |
| gr.Markdown("Text-to-speech synthesis with multiple models and voices.") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| text_input = gr.Textbox( | |
| label="Text", | |
| placeholder="Enter text to synthesize…", | |
| lines=5, | |
| ) | |
| with gr.Row(): | |
| model_select = gr.Dropdown( | |
| choices=list(MODELS.keys()), | |
| value="Micro (40M - Balanced)", | |
| label="Model", | |
| ) | |
| voice_select = gr.Dropdown( | |
| choices=VOICES, | |
| value="Jasper", | |
| label="Voice", | |
| ) | |
| speed_slider = gr.Slider( | |
| minimum=0.5, | |
| maximum=2.0, | |
| value=1.0, | |
| step=0.05, | |
| label="Speed", | |
| ) | |
| generate_btn = gr.Button("Generate Speech", variant="primary") | |
| with gr.Column(scale=1): | |
| audio_output = gr.Audio(label="Output", type="numpy") | |
| generate_btn.click( | |
| fn=synthesize, | |
| inputs=[text_input, model_select, voice_select, speed_slider], | |
| outputs=audio_output, | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| [ | |
| "Space is a three-dimensional continuum containing positions and directions.", | |
| "Micro (40M - Balanced)", | |
| "Jasper", | |
| 1.0, | |
| ], | |
| [ | |
| "It begins with an 'Ugh!' Another mysterious stain appears on a favorite shirt. Every trick has been tried, but the stain persists.", | |
| "Mini (80M - Best Quality)", | |
| "Luna", | |
| 1.0, | |
| ], | |
| [ | |
| "Hello! Welcome to the KittenTTS demo. You can choose different voices and models to find the combination you like best.", | |
| "Nano (15M - Fastest)", | |
| "Bella", | |
| 1.1, | |
| ], | |
| ], | |
| inputs=[text_input, model_select, voice_select, speed_slider], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", theme=theme, css=css) | |