File size: 2,580 Bytes
705ae42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import gradio as gr
from kittentts import KittenTTS

# Load model once at startup (downloads ~80 MB, then cached)
print("πŸš€ Loading Kitten TTS Mini 0.8...")
model = KittenTTS("KittenML/kitten-tts-mini-0.8")
print("βœ… Model loaded successfully!")

voices = ["Bella", "Jasper", "Luna", "Bruno", "Rosie", "Hugo", "Kiki", "Leo"]

def generate_speech(text: str, voice: str):
    if not text or not text.strip():
        return None, "❌ Please enter some text!"
    
    try:
        # generate() returns a numpy array @ 24 kHz
        audio_array = model.generate(text.strip(), voice=voice)
        return (24000, audio_array), f"βœ… Generated with voice: **{voice}**"
    except Exception as e:
        return None, f"❌ Error: {str(e)}"

# Beautiful Gradio UI
with gr.Blocks(title="🐱 Kitten TTS Mini 0.8", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🐱 Kitten TTS Mini 0.8")
    gr.Markdown("**80M parameters β€’ Realistic TTS β€’ Runs on CPU (no GPU needed)**")
    
    with gr.Row():
        with gr.Column(scale=3):
            text_input = gr.Textbox(
                label="Enter your text",
                placeholder="Type anything here...",
                lines=5,
                value="Hello world! This is the amazing Kitten TTS Mini 0.8 running on Hugging Face Spaces."
            )
            voice_dropdown = gr.Dropdown(
                choices=voices,
                value="Jasper",
                label="Voice",
                info="8 expressive voices available"
            )
        
        with gr.Column(scale=1):
            generate_btn = gr.Button("πŸ”Š Generate Speech", variant="primary", size="large")
    
    audio_output = gr.Audio(label="Generated Audio", type="numpy")
    status = gr.Markdown("Ready to speak! 🎀")
    
    # Quick examples
    gr.Examples(
        examples=[
            ["The quick brown fox jumps over the lazy dog.", "Luna"],
            ["Artificial intelligence is revolutionizing the way we live and work.", "Bruno"],
            ["Welcome to Hugging Face Spaces! This runs completely on CPU with no GPU.", "Kiki"],
            ["Hi, I'm a super lightweight 80M TTS model made by KittenML.", "Bella"]
        ],
        inputs=[text_input, voice_dropdown],
        outputs=[audio_output, status],
        fn=generate_speech,
        cache_examples=True,   # caches examples so they load instantly
    )
    
    generate_btn.click(
        fn=generate_speech,
        inputs=[text_input, voice_dropdown],
        outputs=[audio_output, status]
    )

if __name__ == "__main__":
    demo.launch()