R-TA commited on
Commit
bb35394
·
verified ·
1 Parent(s): b7f7616

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import html
3
+ import subprocess
4
+ import tempfile
5
+ from typing import Optional
6
+
7
+ import gradio as gr
8
+
9
+
10
+ DESCRIPTION = """
11
+ Mimic 3 TTS on Hugging Face Spaces (Gradio)
12
+
13
+ - Uses the Mimic 3 CLI under-the-hood and returns a WAV file.
14
+ - Leave the Voice Key blank to use the default voice, or provide a specific key (e.g., `en_US/cmu-arctic_low`).
15
+ - You can optionally wrap the input in SSML for rate/pitch by toggling the advanced options.
16
+
17
+ Note: The first run may download voice models and can take longer.
18
+ """
19
+
20
+
21
+ def build_text(text: str, use_ssml: bool, rate: Optional[str], pitch: Optional[str]) -> str:
22
+ text = text or ""
23
+ if not use_ssml or (not rate and not pitch):
24
+ return text
25
+
26
+ # Wrap text with SSML prosody if adjustments were requested.
27
+ # Supported values for rate/pitch follow SSML conventions, e.g. "85%", "+2st", "-10%"
28
+ # We use a conservative default if only the toggle is on without fields.
29
+ rate_attr = f' rate="{rate.strip()}"' if rate else ""
30
+ pitch_attr = f' pitch="{pitch.strip()}"' if pitch else ""
31
+ return f"<speak><prosody{rate_attr}{pitch_attr}>{html.escape(text)}</prosody></speak>"
32
+
33
+
34
+ def synthesize(text: str, voice_key: str, use_ssml: bool, rate: str, pitch: str):
35
+ if not text or not text.strip():
36
+ return None
37
+
38
+ input_text = build_text(text.strip(), use_ssml, rate, pitch)
39
+
40
+ # Prepare the command
41
+ cmd = ["mimic3"]
42
+ if voice_key and voice_key.strip():
43
+ cmd += ["--voice", voice_key.strip()]
44
+ if use_ssml:
45
+ cmd += ["--ssml"]
46
+ cmd += [input_text]
47
+
48
+ try:
49
+ # Run mimic3 and capture the WAV from stdout
50
+ proc = subprocess.run(
51
+ cmd,
52
+ stdout=subprocess.PIPE,
53
+ stderr=subprocess.PIPE,
54
+ check=False,
55
+ )
56
+ if proc.returncode != 0:
57
+ err = proc.stderr.decode(errors="ignore")
58
+ raise gr.Error(f"Mimic 3 failed (code {proc.returncode}).\n\n{err}")
59
+
60
+ # Write the WAV bytes to a temp file for Gradio
61
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
62
+ tmp.write(proc.stdout)
63
+ tmp_path = tmp.name
64
+ return tmp_path
65
+ except FileNotFoundError:
66
+ # The mimic3 CLI was not found; show a helpful error in the UI
67
+ raise gr.Error("mimic3 CLI not found. Ensure package 'mycroft-mimic3-tts' is installed and available in PATH.")
68
+ except Exception as e:
69
+ raise gr.Error(str(e))
70
+
71
+
72
+ with gr.Blocks(title="Mimic 3 TTS") as demo:
73
+ gr.Markdown(f"# Mimic 3 TTS\n{DESCRIPTION}")
74
+
75
+ with gr.Row():
76
+ text = gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4)
77
+
78
+ with gr.Row():
79
+ voice_key = gr.Textbox(
80
+ label="Voice Key (optional)",
81
+ placeholder="e.g., en_US/cmu-arctic_low (leave blank for default)",
82
+ )
83
+
84
+ with gr.Accordion("Advanced (SSML)", open=False):
85
+ use_ssml = gr.Checkbox(label="Use SSML prosody for rate/pitch", value=False)
86
+ with gr.Row():
87
+ rate = gr.Textbox(label="Rate (e.g., 85%, 110%)", placeholder="Optional")
88
+ pitch = gr.Textbox(label="Pitch (e.g., +2st, -2st)", placeholder="Optional")
89
+
90
+ with gr.Row():
91
+ btn = gr.Button("Synthesize", variant="primary")
92
+
93
+ audio = gr.Audio(label="Output Audio", type="filepath")
94
+
95
+ btn.click(
96
+ fn=synthesize,
97
+ inputs=[text, voice_key, use_ssml, rate, pitch],
98
+ outputs=[audio],
99
+ )
100
+
101
+ if __name__ == "__main__":
102
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))