kitten-tts-mini / app.py
Shadow0482's picture
Create app.py
705ae42 verified
import gradio as gr
from kittentts import KittenTTS
# Load model once at startup (downloads ~80 MB, then cached)
print("πŸš€ Loading Kitten TTS Mini 0.8...")
model = KittenTTS("KittenML/kitten-tts-mini-0.8")
print("βœ… Model loaded successfully!")
voices = ["Bella", "Jasper", "Luna", "Bruno", "Rosie", "Hugo", "Kiki", "Leo"]
def generate_speech(text: str, voice: str):
if not text or not text.strip():
return None, "❌ Please enter some text!"
try:
# generate() returns a numpy array @ 24 kHz
audio_array = model.generate(text.strip(), voice=voice)
return (24000, audio_array), f"βœ… Generated with voice: **{voice}**"
except Exception as e:
return None, f"❌ Error: {str(e)}"
# Beautiful Gradio UI
with gr.Blocks(title="🐱 Kitten TTS Mini 0.8", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🐱 Kitten TTS Mini 0.8")
gr.Markdown("**80M parameters β€’ Realistic TTS β€’ Runs on CPU (no GPU needed)**")
with gr.Row():
with gr.Column(scale=3):
text_input = gr.Textbox(
label="Enter your text",
placeholder="Type anything here...",
lines=5,
value="Hello world! This is the amazing Kitten TTS Mini 0.8 running on Hugging Face Spaces."
)
voice_dropdown = gr.Dropdown(
choices=voices,
value="Jasper",
label="Voice",
info="8 expressive voices available"
)
with gr.Column(scale=1):
generate_btn = gr.Button("πŸ”Š Generate Speech", variant="primary", size="large")
audio_output = gr.Audio(label="Generated Audio", type="numpy")
status = gr.Markdown("Ready to speak! 🎀")
# Quick examples
gr.Examples(
examples=[
["The quick brown fox jumps over the lazy dog.", "Luna"],
["Artificial intelligence is revolutionizing the way we live and work.", "Bruno"],
["Welcome to Hugging Face Spaces! This runs completely on CPU with no GPU.", "Kiki"],
["Hi, I'm a super lightweight 80M TTS model made by KittenML.", "Bella"]
],
inputs=[text_input, voice_dropdown],
outputs=[audio_output, status],
fn=generate_speech,
cache_examples=True, # caches examples so they load instantly
)
generate_btn.click(
fn=generate_speech,
inputs=[text_input, voice_dropdown],
outputs=[audio_output, status]
)
if __name__ == "__main__":
demo.launch()