Spaces:

R-TA
/

TTS

Sleeping

App Files Files Community

R-TA commited on Oct 1, 2025

Commit

bb35394

verified ·

1 Parent(s): b7f7616

Create app.py

Browse files

Files changed (1) hide show

app.py +102 -0

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import os
+import html
+import subprocess
+import tempfile
+from typing import Optional
+import gradio as gr
+DESCRIPTION = """
+Mimic 3 TTS on Hugging Face Spaces (Gradio)
+- Uses the Mimic 3 CLI under-the-hood and returns a WAV file.
+- Leave the Voice Key blank to use the default voice, or provide a specific key (e.g., `en_US/cmu-arctic_low`).
+- You can optionally wrap the input in SSML for rate/pitch by toggling the advanced options.
+Note: The first run may download voice models and can take longer.
+"""
+def build_text(text: str, use_ssml: bool, rate: Optional[str], pitch: Optional[str]) -> str:
+    text = text or ""
+    if not use_ssml or (not rate and not pitch):
+        return text
+    # Wrap text with SSML prosody if adjustments were requested.
+    # Supported values for rate/pitch follow SSML conventions, e.g. "85%", "+2st", "-10%"
+    # We use a conservative default if only the toggle is on without fields.
+    rate_attr = f' rate="{rate.strip()}"' if rate else ""
+    pitch_attr = f' pitch="{pitch.strip()}"' if pitch else ""
+    return f"<speak><prosody{rate_attr}{pitch_attr}>{html.escape(text)}</prosody></speak>"
+def synthesize(text: str, voice_key: str, use_ssml: bool, rate: str, pitch: str):
+    if not text or not text.strip():
+        return None
+    input_text = build_text(text.strip(), use_ssml, rate, pitch)
+    # Prepare the command
+    cmd = ["mimic3"]
+    if voice_key and voice_key.strip():
+        cmd += ["--voice", voice_key.strip()]
+    if use_ssml:
+        cmd += ["--ssml"]
+    cmd += [input_text]
+    try:
+        # Run mimic3 and capture the WAV from stdout
+        proc = subprocess.run(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            check=False,
+        )
+        if proc.returncode != 0:
+            err = proc.stderr.decode(errors="ignore")
+            raise gr.Error(f"Mimic 3 failed (code {proc.returncode}).\n\n{err}")
+        # Write the WAV bytes to a temp file for Gradio
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+            tmp.write(proc.stdout)
+            tmp_path = tmp.name
+        return tmp_path
+    except FileNotFoundError:
+        # The mimic3 CLI was not found; show a helpful error in the UI
+        raise gr.Error("mimic3 CLI not found. Ensure package 'mycroft-mimic3-tts' is installed and available in PATH.")
+    except Exception as e:
+        raise gr.Error(str(e))
+with gr.Blocks(title="Mimic 3 TTS") as demo:
+    gr.Markdown(f"# Mimic 3 TTS\n{DESCRIPTION}")
+    with gr.Row():
+        text = gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4)
+    with gr.Row():
+        voice_key = gr.Textbox(
+            label="Voice Key (optional)",
+            placeholder="e.g., en_US/cmu-arctic_low (leave blank for default)",
+        )
+    with gr.Accordion("Advanced (SSML)", open=False):
+        use_ssml = gr.Checkbox(label="Use SSML prosody for rate/pitch", value=False)
+        with gr.Row():
+            rate = gr.Textbox(label="Rate (e.g., 85%, 110%)", placeholder="Optional")
+            pitch = gr.Textbox(label="Pitch (e.g., +2st, -2st)", placeholder="Optional")
+    with gr.Row():
+        btn = gr.Button("Synthesize", variant="primary")
+    audio = gr.Audio(label="Output Audio", type="filepath")
+    btn.click(
+        fn=synthesize,
+        inputs=[text, voice_key, use_ssml, rate, pitch],
+        outputs=[audio],
+    )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))