Spaces:

Javedalam
/

KittenTTS

Sleeping

App Files Files Community

Javedalam commited on Jan 29

Commit

fdc5dab

verified ·

1 Parent(s): af46efc

Create app.py

Browse files

Files changed (1) hide show

app.py +85 -0

app.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import gradio as gr
+import numpy as np
+import os
+from huggingface_hub import snapshot_download
+from kittentts import KittenTTS
+SR = 24000
+# Download full repo and auto-discover ONNX
+repo_dir = snapshot_download("KittenML/kitten-tts-nano-0.2")
+onnx_files = [os.path.join(repo_dir, f) for f in os.listdir(repo_dir) if f.endswith(".onnx")]
+MODEL_PATH = onnx_files[0]
+tts = KittenTTS(MODEL_PATH)
+VOICES = [
+    "expr-voice-2-f",
+    "expr-voice-3-m",
+    "expr-voice-4-f",
+]
+EXAMPLES = [
+    ["Small models can sound natural without giant cloud systems.", "expr-voice-2-f", 1.0],
+    ["This demo runs a tiny expressive TTS model on CPU only.", "expr-voice-4-f", 1.05],
+    ["Most AI stacks are bloated. This one is not.", "expr-voice-3-m", 0.9],
+]
+def synthesize(text, voice, speed):
+    if not text.strip():
+        return None
+    audio = tts.generate(text, voice=voice, speed=float(speed))
+    audio = np.asarray(audio, dtype=np.float32)
+    return SR, audio
+with gr.Blocks(title="KittenTTS – Tiny Expressive TTS") as demo:
+    gr.Markdown(
+        """
+        # 🐱 KittenTTS
+        **Tiny, expressive Text-to-Speech (~15M params, CPU-only)**
+        Use the controls below to explore voice and pacing.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=2):
+            text = gr.Textbox(
+                label="Text",
+                lines=4,
+                placeholder="Type text or click an example below…",
+            )
+            voice = gr.Dropdown(
+                choices=VOICES,
+                value="expr-voice-2-f",
+                label="Voice",
+            )
+            speed = gr.Slider(
+                minimum=0.7,
+                maximum=1.3,
+                value=1.0,
+                step=0.05,
+                label="Speaking speed",
+            )
+            generate = gr.Button("Generate Speech", variant="primary")
+        with gr.Column(scale=1):
+            audio = gr.Audio(label="Output", type="numpy")
+    gr.Markdown("### Example prompts")
+    gr.Examples(
+        examples=EXAMPLES,
+        inputs=[text, voice, speed],
+    )
+    generate.click(
+        fn=synthesize,
+        inputs=[text, voice, speed],
+        outputs=audio,
+    )
+demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)