Spaces:
Running
Running
File size: 3,506 Bytes
66ce19c 0d43d29 66ce19c 0d43d29 a74b4e6 0d43d29 a74b4e6 66ce19c a74b4e6 66ce19c a74b4e6 66ce19c f499fee 66ce19c a74b4e6 f499fee a74b4e6 66ce19c a74b4e6 66ce19c a74b4e6 66ce19c a74b4e6 66ce19c a74b4e6 66ce19c a74b4e6 66ce19c a74b4e6 66ce19c a74b4e6 66ce19c a74b4e6 66ce19c f499fee 66ce19c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | import gradio as gr
import nltk
from tiny_tts import TinyTTS
# Download required NLTK data for g2p-en
try:
nltk.download('averaged_perceptron_tagger_eng', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk.download('cmudict', quiet=True)
except Exception as e:
print(f"NLTK download warning: {e}")
# Initialize the model (auto-downloads from HF Hub if needed)
print("Initializing TinyTTS...")
tts = TinyTTS()
print("Model loaded successfully!")
def synthesize_audio(text, speed):
output_path = "output.wav"
try:
tts.speak(text, output_path=output_path, speaker="MALE", speed=speed)
return output_path
except Exception as e:
return f"Error: {e}"
COMPARISON_TABLE = """
## ⚡ Comparison with Other TTS Engines
All numbers are **CPU-only** on the same Intel Core laptop. Text: *"The weather is nice today, and I feel very relaxed."*
| ENGINE | Params | Total (s) | Audio (s) | RTFx |
|:---|---:|---:|---:|---:|
| **TinyTTS (ONNX) 🚀** | **1.6M** | **0.092** | **4.88** | **~53x** |
| Piper (ONNX, 22kHz) | ~63M | 0.112 | 2.91 | ~26x |
| TinyTTS (PyTorch) | 1.6M | 0.272 | 4.88 | ~18x |
| KittenTTS nano | ~10M | 0.286 | 4.87 | ~17x |
| Supertonic (2-step) | ~82M | 0.249 | 3.69 | ~15x |
| Pocket-TTS | 100M | 0.928 | 3.68 | ~4x |
| Kokoro ONNX | 82M | 0.933 | 3.16 | ~3x |
| KittenTTS mini | ~25M | 2.047 | 4.17 | ~2x |
> **RTFx** = Audio Duration ÷ Synthesis Time (higher = faster).
> TinyTTS achieves the **best speed-to-size ratio**: only **1.6M params** / **3.4 MB** ONNX yet ~53× real-time at 44.1kHz.
"""
# Create Gradio interface
with gr.Blocks(title="TinyTTS Demo", theme=gr.themes.Soft()) as app:
gr.Markdown("# 🗣️ TinyTTS")
gr.Markdown(
"**Ultra-lightweight English Text-to-Speech — only 1.6M parameters, ~3.4 MB ONNX**\n\n"
"This space runs on CPU efficiently and synthesizes high-quality 44.1kHz audio **~53× faster** than real-time."
)
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Input Text",
placeholder="Enter English text here...",
value="The weather is nice today, and I feel very relaxed.",
lines=4
)
speed_slider = gr.Slider(
minimum=0.5,
maximum=2.0,
value=1.0,
step=0.1,
label="Speed (1.0 = normal, >1 = faster, <1 = slower)"
)
submit_btn = gr.Button("🔊 Synthesize Speech", variant="primary")
with gr.Column():
audio_output = gr.Audio(label="Output Audio", type="filepath")
# Example prompts
gr.Examples(
examples=[
["The weather is nice today, and I feel very relaxed.", 1.0],
["TinyTTS has only one point six million parameters, making it extremely fast on CPUs.", 1.0],
["This is a speed test. Speaking at one and a half times the normal rate.", 1.5],
["Slow and steady wins the race. Let me speak more carefully.", 0.7],
],
inputs=[text_input, speed_slider],
)
submit_btn.click(
fn=synthesize_audio,
inputs=[text_input, speed_slider],
outputs=audio_output
)
# Comparison table
gr.Markdown(COMPARISON_TABLE)
if __name__ == "__main__":
app.launch(server_name="0.0.0.0")
|