Spaces:

KittenML
/

KittenTTS-Demo

Running

App Files Files Community

Your Name commited on Feb 21

Commit

e3b97f2

1 Parent(s): f07d5a1

a

Browse files

Files changed (2) hide show

demo.py +256 -0
requirements.txt +4 -0

demo.py ADDED Viewed

	@@ -0,0 +1,256 @@

+import gradio as gr
+import numpy as np
+import os
+from kittentts import KittenTTS
+SAMPLE_RATE = 24000
+MODELS = {
+    "Nano (15M - Fastest)": "KittenML/kitten-tts-nano-0.8-fp32",
+    "Micro (40M - Balanced)": "KittenML/kitten-tts-micro-0.8",
+    "Mini (80M - Best Quality)": "KittenML/kitten-tts-mini-0.8",
+}
+VOICES = [
+    "Bella",
+    "Jasper",
+    "Luna",
+    "Bruno",
+    "Rosie",
+    "Hugo",
+    "Kiki",
+    "Leo",
+]
+# Initialize all models at startup
+print("Loading models...")
+_model_cache: dict[str, KittenTTS] = {}
+for model_name, model_id in MODELS.items():
+    print(f"Loading {model_name}...")
+    _model_cache[model_name] = KittenTTS(model_id)
+print("All models loaded!")
+def get_model(model_name: str) -> KittenTTS:
+    return _model_cache[model_name]
+def synthesize(text: str, model_name: str, voice: str, speed: float):
+    if not text or not text.strip():
+        raise gr.Error("Please enter some text.")
+    tts = get_model(model_name)
+    # Note: speed parameter may not be supported in v0.8
+    # If you get an error, remove speed=speed from the generate call
+    try:
+        audio = tts.generate(text.strip(), voice=voice, speed=speed)
+    except TypeError:
+        # Fallback if speed is not supported
+        audio = tts.generate(text.strip(), voice=voice)
+    # audio shape is (1, samples) or (samples,) — normalize to 1-D
+    audio = np.squeeze(audio)
+    return (SAMPLE_RATE, audio)
+theme = gr.themes.Base(
+    primary_hue="neutral",
+    secondary_hue="neutral",
+    neutral_hue="neutral",
+    font=gr.themes.GoogleFont("Inter"),
+).set(
+    body_background_fill="white",
+    body_background_fill_dark="white",
+    block_background_fill="white",
+    block_background_fill_dark="white",
+    block_border_color="#e5e5e5",
+    block_border_color_dark="#e5e5e5",
+    block_shadow="none",
+    block_shadow_dark="none",
+    button_primary_background_fill="#111111",
+    button_primary_background_fill_hover="#333333",
+    button_primary_text_color="white",
+    button_primary_border_color="#111111",
+    input_background_fill="white",
+    input_background_fill_dark="white",
+    input_border_color="#e5e5e5",
+    slider_color="#111111",
+    table_border_color="#e5e5e5",
+    table_even_background_fill="white",
+    table_odd_background_fill="white",
+    table_row_focus="white",
+)
+css = """
+/* Force light mode — prevents OS dark mode from affecting the page */
+:root, html, body { color-scheme: light !important; }
+body, .gradio-container, .main { background: white !important; }
+.gradio-container { max-width: 860px !important; margin: 40px auto !important; }
+footer { display: none !important; }
+/* Force all text to black — no accent colors */
+*, *::before, *::after {
+    color: #111 !important;
+    --body-text-color: #111 !important;
+    --block-label-text-color: #111 !important;
+    --block-title-text-color: #111 !important;
+    --color-accent: #111 !important;
+    --link-text-color: #111 !important;
+    --link-text-color-hover: #111 !important;
+    --link-text-color-visited: #111 !important;
+    --link-text-color-active: #111 !important;
+}
+/* Exceptions — keep button text white */
+button.primary, button[variant="primary"] { color: white !important; }
+/* Error toast notification */
+.toast-wrap, .toast-body, [class*="toast"] {
+    background: white !important;
+    border: 1px solid #e5e5e5 !important;
+    box-shadow: 0 4px 12px rgba(0,0,0,0.08) !important;
+}
+[class*="toast"] .toast-title, [class*="toast"] .error,
+.toast-wrap .error, span.error {
+    color: #b91c1c !important;
+    font-weight: 600 !important;
+}
+[class*="toast"] p, [class*="toast"] .toast-text {
+    color: #555 !important;
+}
+/* Error badge inside output block */
+.error-wrap, .error {
+    background: #fef2f2 !important;
+    border-color: #fca5a5 !important;
+    color: #b91c1c !important;
+}
+/* Placeholder text */
+::placeholder { color: #aaa !important; }
+/* Backgrounds */
+.block, .form, .wrap, .panel, .gap, .tabs { background: white !important; }
+/* Block label tabs (e.g. "Output" on the audio component) */
+[data-testid="block-label"] {
+    background: white !important;
+    color: #111 !important;
+    border-color: #e5e5e5 !important;
+}
+[data-testid="block-label"] * { color: #111 !important; }
+/* Dropdown closed state — gray on the full inner wrapper with its natural padding */
+input[role="listbox"] {
+    background: transparent !important;
+}
+.wrap-inner {
+    background: #f7f7f7 !important;
+    border-radius: 4px !important;
+}
+/* Dropdown popup list */
+ul.options {
+    background: #f7f7f7 !important;
+    border: 1px solid #e5e5e5 !important;
+    box-shadow: 0 4px 12px rgba(0,0,0,0.06) !important;
+}
+ul.options li {
+    background: #f7f7f7 !important;
+    color: #111 !important;
+}
+ul.options li:hover, ul.options li.selected {
+    background: #eeeeee !important;
+}
+/* Examples table — force all borders to match */
+.examples-holder, .table-wrap, table, thead, tbody, tr, td, th {
+    background: white !important;
+    border-color: #e5e5e5 !important;
+}
+.tr-head { box-shadow: none !important; }
+tr:hover td { background: #f9f9f9 !important; }
+/* Speed number input container and divider */
+.tab-like-container, .tab-like-container *, input[type=number] {
+    border-color: #e5e5e5 !important;
+}
+.reset-button {
+    -webkit-appearance: none !important;
+    appearance: none !important;
+    border: none !important;
+    background: white !important;
+}
+/* Slider track */
+input[type=range]::-webkit-slider-runnable-track { background: #e5e5e5 !important; }
+input[type=range]::-webkit-slider-thumb { background: #111 !important; }
+"""
+with gr.Blocks(title="KittenTTS Demo") as demo:
+    gr.Markdown("# KittenTTS Demo")
+    gr.Markdown('<img width="607" height="255" alt="KittenTTS Banner" src="https://github.com/user-attachments/assets/f4646722-ba78-4b25-8a65-81bacee0d4f6" />')
+    gr.Markdown("Text-to-speech synthesis with multiple models and voices.")
+    with gr.Row():
+        with gr.Column(scale=2):
+            text_input = gr.Textbox(
+                label="Text",
+                placeholder="Enter text to synthesize…",
+                lines=5,
+            )
+            with gr.Row():
+                model_select = gr.Dropdown(
+                    choices=list(MODELS.keys()),
+                    value="Micro (40M - Balanced)",
+                    label="Model",
+                )
+                voice_select = gr.Dropdown(
+                    choices=VOICES,
+                    value="Jasper",
+                    label="Voice",
+                )
+            speed_slider = gr.Slider(
+                minimum=0.5,
+                maximum=2.0,
+                value=1.0,
+                step=0.05,
+                label="Speed",
+            )
+            generate_btn = gr.Button("Generate Speech", variant="primary")
+        with gr.Column(scale=1):
+            audio_output = gr.Audio(label="Output", type="numpy")
+    generate_btn.click(
+        fn=synthesize,
+        inputs=[text_input, model_select, voice_select, speed_slider],
+        outputs=audio_output,
+    )
+    gr.Examples(
+        examples=[
+            [
+                "Space is a three-dimensional continuum containing positions and directions.",
+                "Micro (40M - Balanced)",
+                "Jasper",
+                1.0,
+            ],
+            [
+                "It begins with an 'Ugh!' Another mysterious stain appears on a favorite shirt. Every trick has been tried, but the stain persists.",
+                "Mini (80M - Best Quality)",
+                "Luna",
+                1.0,
+            ],
+            [
+                "Hello! Welcome to the KittenTTS demo. You can choose different voices and models to find the combination you like best.",
+                "Nano (15M - Fastest)",
+                "Bella",
+                1.1,
+            ],
+        ],
+        inputs=[text_input, model_select, voice_select, speed_slider],
+    )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", theme=theme, css=css)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio
+numpy
+soundfile
+kittentts @ https://github.com/KittenML/KittenTTS/releases/download/0.8/kittentts-0.8.0-py3-none-any.whl