Spaces:
Running
Running
| import gradio as gr | |
| import nltk | |
| from tiny_tts import TinyTTS | |
| # Download required NLTK data for g2p-en | |
| try: | |
| nltk.download('averaged_perceptron_tagger_eng', quiet=True) | |
| nltk.download('averaged_perceptron_tagger', quiet=True) | |
| nltk.download('cmudict', quiet=True) | |
| except Exception as e: | |
| print(f"NLTK download warning: {e}") | |
| # Initialize the model (auto-downloads from HF Hub if needed) | |
| print("Initializing TinyTTS...") | |
| tts = TinyTTS() | |
| print("Model loaded successfully!") | |
| def synthesize_audio(text, speed): | |
| output_path = "output.wav" | |
| try: | |
| tts.speak(text, output_path=output_path, speaker="MALE", speed=speed) | |
| return output_path | |
| except Exception as e: | |
| return f"Error: {e}" | |
| COMPARISON_TABLE = """ | |
| ## β‘ Comparison with Other TTS Engines | |
| All numbers are **CPU-only** on the same Intel Core laptop. Text: *"The weather is nice today, and I feel very relaxed."* | |
| | ENGINE | Params | Total (s) | Audio (s) | RTFx | | |
| |:---|---:|---:|---:|---:| | |
| | **TinyTTS (ONNX) π** | **1.6M** | **0.092** | **4.88** | **~53x** | | |
| | Piper (ONNX, 22kHz) | ~63M | 0.112 | 2.91 | ~26x | | |
| | TinyTTS (PyTorch) | 1.6M | 0.272 | 4.88 | ~18x | | |
| | KittenTTS nano | ~10M | 0.286 | 4.87 | ~17x | | |
| | Supertonic (2-step) | ~82M | 0.249 | 3.69 | ~15x | | |
| | Pocket-TTS | 100M | 0.928 | 3.68 | ~4x | | |
| | Kokoro ONNX | 82M | 0.933 | 3.16 | ~3x | | |
| | KittenTTS mini | ~25M | 2.047 | 4.17 | ~2x | | |
| > **RTFx** = Audio Duration Γ· Synthesis Time (higher = faster). | |
| > TinyTTS achieves the **best speed-to-size ratio**: only **1.6M params** / **3.4 MB** ONNX yet ~53Γ real-time at 44.1kHz. | |
| """ | |
| # Create Gradio interface | |
| with gr.Blocks(title="TinyTTS Demo", theme=gr.themes.Soft()) as app: | |
| gr.Markdown("# π£οΈ TinyTTS") | |
| gr.Markdown( | |
| "**Ultra-lightweight English Text-to-Speech β only 1.6M parameters, ~3.4 MB ONNX**\n\n" | |
| "This space runs on CPU efficiently and synthesizes high-quality 44.1kHz audio **~53Γ faster** than real-time." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| text_input = gr.Textbox( | |
| label="Input Text", | |
| placeholder="Enter English text here...", | |
| value="The weather is nice today, and I feel very relaxed.", | |
| lines=4 | |
| ) | |
| speed_slider = gr.Slider( | |
| minimum=0.5, | |
| maximum=2.0, | |
| value=1.0, | |
| step=0.1, | |
| label="Speed (1.0 = normal, >1 = faster, <1 = slower)" | |
| ) | |
| submit_btn = gr.Button("π Synthesize Speech", variant="primary") | |
| with gr.Column(): | |
| audio_output = gr.Audio(label="Output Audio", type="filepath") | |
| # Example prompts | |
| gr.Examples( | |
| examples=[ | |
| ["The weather is nice today, and I feel very relaxed.", 1.0], | |
| ["TinyTTS has only one point six million parameters, making it extremely fast on CPUs.", 1.0], | |
| ["This is a speed test. Speaking at one and a half times the normal rate.", 1.5], | |
| ["Slow and steady wins the race. Let me speak more carefully.", 0.7], | |
| ], | |
| inputs=[text_input, speed_slider], | |
| ) | |
| submit_btn.click( | |
| fn=synthesize_audio, | |
| inputs=[text_input, speed_slider], | |
| outputs=audio_output | |
| ) | |
| # Comparison table | |
| gr.Markdown(COMPARISON_TABLE) | |
| if __name__ == "__main__": | |
| app.launch(server_name="0.0.0.0") | |