import gradio as gr from kittentts import KittenTTS import soundfile as sf import os # 1. Initialize the model # Using "." tells the library to look for the .onnx, .npz, and .json files # you just uploaded to the local directory. # (If it throws an error, you can safely change "." to "KittenML/kitten-tts-mini-0.8" # and it will still work perfectly inside your Space). try: model = KittenTTS(".") except Exception: model = KittenTTS("KittenML/kitten-tts-mini-0.8") # Available voices in version 0.8 AVAILABLE_VOICES =['Bella', 'Jasper', 'Luna', 'Bruno', 'Rosie', 'Hugo', 'Kiki', 'Leo'] # 2. Define the synthesis function def synthesize_audio(text, voice): # Generate the raw audio array audio = model.generate(text, voice=voice) # Save it to a .wav file so Gradio can play it output_path = "output.wav" sf.write(output_path, audio, 24000) return output_path # 3. Build the Gradio UI with gr.Blocks(title="Kitten TTS Mini") as demo: gr.Markdown("# 🐱 Kitten TTS Mini 0.8") gr.Markdown("Type some text below to generate speech using your uploaded ONNX model!") with gr.Row(): with gr.Column(): text_input = gr.Textbox(label="Text to Speak", placeholder="Hello, I am a lightweight AI...", lines=4) voice_dropdown = gr.Dropdown(choices=AVAILABLE_VOICES, value="Jasper", label="Select Voice") generate_btn = gr.Button("Generate Speech", variant="primary") with gr.Column(): audio_output = gr.Audio(label="Generated Audio", type="filepath") # Connect the button to the function generate_btn.click( fn=synthesize_audio, inputs=[text_input, voice_dropdown], outputs=audio_output ) # Launch the app if __name__ == "__main__": demo.launch()