Javedalam commited on
Commit
fdc5dab
·
verified ·
1 Parent(s): af46efc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import os
4
+ from huggingface_hub import snapshot_download
5
+ from kittentts import KittenTTS
6
+
7
+ SR = 24000
8
+
9
+ # Download full repo and auto-discover ONNX
10
+ repo_dir = snapshot_download("KittenML/kitten-tts-nano-0.2")
11
+ onnx_files = [os.path.join(repo_dir, f) for f in os.listdir(repo_dir) if f.endswith(".onnx")]
12
+ MODEL_PATH = onnx_files[0]
13
+
14
+ tts = KittenTTS(MODEL_PATH)
15
+
16
+ VOICES = [
17
+ "expr-voice-2-f",
18
+ "expr-voice-3-m",
19
+ "expr-voice-4-f",
20
+ ]
21
+
22
+ EXAMPLES = [
23
+ ["Small models can sound natural without giant cloud systems.", "expr-voice-2-f", 1.0],
24
+
25
+ ["This demo runs a tiny expressive TTS model on CPU only.", "expr-voice-4-f", 1.05],
26
+ ["Most AI stacks are bloated. This one is not.", "expr-voice-3-m", 0.9],
27
+ ]
28
+
29
+ def synthesize(text, voice, speed):
30
+ if not text.strip():
31
+ return None
32
+ audio = tts.generate(text, voice=voice, speed=float(speed))
33
+ audio = np.asarray(audio, dtype=np.float32)
34
+ return SR, audio
35
+
36
+ with gr.Blocks(title="KittenTTS – Tiny Expressive TTS") as demo:
37
+ gr.Markdown(
38
+ """
39
+ # 🐱 KittenTTS
40
+ **Tiny, expressive Text-to-Speech (~15M params, CPU-only)**
41
+
42
+ Use the controls below to explore voice and pacing.
43
+ """
44
+ )
45
+
46
+ with gr.Row():
47
+ with gr.Column(scale=2):
48
+ text = gr.Textbox(
49
+ label="Text",
50
+ lines=4,
51
+ placeholder="Type text or click an example below…",
52
+ )
53
+
54
+ voice = gr.Dropdown(
55
+ choices=VOICES,
56
+ value="expr-voice-2-f",
57
+ label="Voice",
58
+ )
59
+
60
+ speed = gr.Slider(
61
+ minimum=0.7,
62
+ maximum=1.3,
63
+ value=1.0,
64
+ step=0.05,
65
+ label="Speaking speed",
66
+ )
67
+
68
+ generate = gr.Button("Generate Speech", variant="primary")
69
+
70
+ with gr.Column(scale=1):
71
+ audio = gr.Audio(label="Output", type="numpy")
72
+
73
+ gr.Markdown("### Example prompts")
74
+ gr.Examples(
75
+ examples=EXAMPLES,
76
+ inputs=[text, voice, speed],
77
+ )
78
+
79
+ generate.click(
80
+ fn=synthesize,
81
+ inputs=[text, voice, speed],
82
+ outputs=audio,
83
+ )
84
+
85
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)