backtracking commited on
Commit
f499fee
·
verified ·
1 Parent(s): dcba272

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +24 -1
app.py CHANGED
@@ -25,12 +25,32 @@ def synthesize_audio(text, speed):
25
  return f"Error: {e}"
26
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  # Create Gradio interface
29
  with gr.Blocks(title="TinyTTS Demo", theme=gr.themes.Soft()) as app:
30
  gr.Markdown("# 🗣️ TinyTTS")
31
  gr.Markdown(
32
  "**Ultra-lightweight English Text-to-Speech — only 1.6M parameters, ~3.4 MB ONNX**\n\n"
33
- "Synthesizes high-quality 44.1kHz audio **~53× faster** than real-time on CPU."
34
  )
35
 
36
  with gr.Row():
@@ -70,5 +90,8 @@ with gr.Blocks(title="TinyTTS Demo", theme=gr.themes.Soft()) as app:
70
  outputs=audio_output
71
  )
72
 
 
 
 
73
  if __name__ == "__main__":
74
  app.launch(server_name="0.0.0.0")
 
25
  return f"Error: {e}"
26
 
27
 
28
+ COMPARISON_TABLE = """
29
+ ## ⚡ Comparison with Other TTS Engines
30
+
31
+ All numbers are **CPU-only** on the same Intel Core laptop. Text: *"The weather is nice today, and I feel very relaxed."*
32
+
33
+ | ENGINE | Params | Total (s) | Audio (s) | RTFx |
34
+ |:---|---:|---:|---:|---:|
35
+ | **TinyTTS (ONNX) 🚀** | **1.6M** | **0.092** | **4.88** | **~53x** |
36
+ | Piper (ONNX, 22kHz) | ~63M | 0.112 | 2.91 | ~26x |
37
+ | TinyTTS (PyTorch) | 1.6M | 0.272 | 4.88 | ~18x |
38
+ | KittenTTS nano | ~10M | 0.286 | 4.87 | ~17x |
39
+ | Supertonic (2-step) | ~82M | 0.249 | 3.69 | ~15x |
40
+ | Pocket-TTS | 100M | 0.928 | 3.68 | ~4x |
41
+ | Kokoro ONNX | 82M | 0.933 | 3.16 | ~3x |
42
+ | KittenTTS mini | ~25M | 2.047 | 4.17 | ~2x |
43
+
44
+ > **RTFx** = Audio Duration ÷ Synthesis Time (higher = faster).
45
+ > TinyTTS achieves the **best speed-to-size ratio**: only **1.6M params** / **3.4 MB** ONNX yet ~53× real-time at 44.1kHz.
46
+ """
47
+
48
  # Create Gradio interface
49
  with gr.Blocks(title="TinyTTS Demo", theme=gr.themes.Soft()) as app:
50
  gr.Markdown("# 🗣️ TinyTTS")
51
  gr.Markdown(
52
  "**Ultra-lightweight English Text-to-Speech — only 1.6M parameters, ~3.4 MB ONNX**\n\n"
53
+ "This space runs on CPU efficiently and synthesizes high-quality 44.1kHz audio **~53× faster** than real-time."
54
  )
55
 
56
  with gr.Row():
 
90
  outputs=audio_output
91
  )
92
 
93
+ # Comparison table
94
+ gr.Markdown(COMPARISON_TABLE)
95
+
96
  if __name__ == "__main__":
97
  app.launch(server_name="0.0.0.0")