Text2VoiceGen / app.py
Khalil09's picture
Update app.py
cf85627 verified
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
import json
# โ”€โ”€ Voice catalogue โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
VOICES = {
"๐ŸŒŸ Aria (US โ€“ Female)": "en-US-AriaNeural",
"๐ŸŽ™๏ธ Guy (US โ€“ Male)": "en-US-GuyNeural",
"โœจ Jenny (US โ€“ Female)": "en-US-JennyNeural",
"๐Ÿ”ฅ Davis (US โ€“ Male)": "en-US-DavisNeural",
"๐ŸŒŠ Jane (US โ€“ Female)": "en-US-JaneNeural",
"โšก Tony (US โ€“ Male)": "en-US-TonyNeural",
"๐ŸŒธ Sonia (UK โ€“ Female)": "en-GB-SoniaNeural",
"๐ŸŽฉ Ryan (UK โ€“ Male)": "en-GB-RyanNeural",
"๐Ÿ’ซ Libby (UK โ€“ Female)": "en-GB-LibbyNeural",
"๐ŸŒบ Natasha (AU โ€“ Female)": "en-AU-NatashaNeural",
"๐Ÿฆ˜ William (AU โ€“ Male)": "en-AU-WilliamNeural",
"๐Ÿ Clara (CA โ€“ Female)": "en-CA-ClaraNeural",
"๐ŸŒด Neerja (IN โ€“ Female)": "en-IN-NeerjaNeural",
"๐ŸŽต Prabhat (IN โ€“ Male)": "en-IN-PrabhatNeural",
}
PRESETS = {
"๐ŸŽ™๏ธ Podcast Host": {"rate": "+5%", "pitch": "-2Hz", "volume": "+10%"},
"๐Ÿ“ฐ News Anchor": {"rate": "+0%", "pitch": "+0Hz", "volume": "+5%"},
"๐Ÿง˜ Meditation": {"rate": "-20%", "pitch": "-5Hz", "volume": "-10%"},
"๐Ÿ“š Audiobook": {"rate": "-5%", "pitch": "+0Hz", "volume": "+0%"},
"๐Ÿค– AI Assistant": {"rate": "+10%", "pitch": "+5Hz", "volume": "+15%"},
"๐ŸŽฎ Game Narrator": {"rate": "+15%", "pitch": "-8Hz", "volume": "+20%"},
"๐Ÿ‘ถ Kids Story": {"rate": "-10%", "pitch": "+10Hz", "volume": "+5%"},
"๐Ÿ”ฌ Documentary": {"rate": "-3%", "pitch": "-3Hz", "volume": "+8%"},
}
# โ”€โ”€ TTS core โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
async def _synthesise(text: str, voice: str, rate: str, pitch: str, volume: str) -> str:
communicate = edge_tts.Communicate(
text=text, voice=voice,
rate=rate, pitch=pitch, volume=volume
)
tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
await communicate.save(tmp.name)
return tmp.name
def generate_voice(
text, voice_label, preset_label,
rate_slider, pitch_slider, volume_slider
):
if not text or not text.strip():
raise gr.Error("Please enter some text to convert!")
voice_id = VOICES.get(voice_label, "en-US-AriaNeural")
# Preset overrides sliders when chosen
if preset_label and preset_label != "๐ŸŽ›๏ธ Custom":
p = PRESETS[preset_label]
rate = p["rate"]
pitch = p["pitch"]
volume = p["volume"]
else:
sign_r = "+" if rate_slider >= 0 else ""
sign_p = "+" if pitch_slider >= 0 else ""
sign_v = "+" if volume_slider >= 0 else ""
rate = f"{sign_r}{rate_slider}%"
pitch = f"{sign_p}{pitch_slider}Hz"
volume = f"{sign_v}{volume_slider}%"
audio_path = asyncio.run(_synthesise(text, voice_id, rate, pitch, volume))
word_count = len(text.split())
char_count = len(text)
stats = f"โœ… Generated | {word_count} words | {char_count} chars | Voice: {voice_label}"
return audio_path, stats
def apply_preset(preset_label):
"""Return slider updates when a preset is chosen."""
if preset_label == "๐ŸŽ›๏ธ Custom":
return gr.update(), gr.update(), gr.update()
p = PRESETS[preset_label]
r = int(p["rate"].replace("%","").replace("+",""))
pi = int(p["pitch"].replace("Hz","").replace("+",""))
v = int(p["volume"].replace("%","").replace("+",""))
return gr.update(value=r), gr.update(value=pi), gr.update(value=v)
# โ”€โ”€ Sample texts โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
SAMPLES = [
"Welcome to the future of voice synthesis. This AI-powered engine transforms your words into lifelike speech with stunning clarity and natural rhythm.",
"In the beginning, there was silence. Then came the voice โ€” warm, resonant, and unmistakably human. Today, that voice belongs to you.",
"Breaking news: Scientists have discovered a new exoplanet orbiting a distant star, potentially harboring conditions suitable for life.",
"Close your eyes. Take a deep breath. Let every thought drift away like clouds on a gentle breeze. You are safe. You are at peace.",
]
# โ”€โ”€ Custom CSS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
CSS = """
@import url('https://fonts.googleapis.com/css2?family=Syne:wght@400;600;700;800&family=DM+Sans:ital,wght@0,300;0,400;0,500;1,300&display=swap');
:root {
--bg: #08090d;
--surface: #0f1117;
--surface2: #161820;
--border: #1e2130;
--accent: #6c63ff;
--accent2: #ff6584;
--gold: #f5c842;
--text: #e8e9f0;
--muted: #6b7280;
--glow: rgba(108,99,255,0.35);
}
* { box-sizing: border-box; }
body, .gradio-container {
background: var(--bg) !important;
font-family: 'DM Sans', sans-serif !important;
color: var(--text) !important;
min-height: 100vh;
}
/* โ”€โ”€ Hero Header โ”€โ”€ */
.hero-wrap {
text-align: center;
padding: 52px 24px 36px;
position: relative;
overflow: hidden;
}
.hero-wrap::before {
content: "";
position: absolute;
inset: 0;
background: radial-gradient(ellipse 70% 55% at 50% 0%, rgba(108,99,255,.18) 0%, transparent 70%);
pointer-events: none;
}
.hero-badge {
display: inline-block;
background: linear-gradient(135deg, var(--accent), var(--accent2));
color: #fff;
font-family: 'Syne', sans-serif;
font-size: 11px;
font-weight: 700;
letter-spacing: .12em;
text-transform: uppercase;
padding: 5px 16px;
border-radius: 100px;
margin-bottom: 20px;
}
.hero-title {
font-family: 'Syne', sans-serif !important;
font-size: clamp(2.4rem, 5vw, 4rem) !important;
font-weight: 800 !important;
line-height: 1.1 !important;
background: linear-gradient(135deg, #fff 30%, var(--accent) 70%, var(--accent2) 100%);
-webkit-background-clip: text !important;
-webkit-text-fill-color: transparent !important;
background-clip: text !important;
margin: 0 0 14px !important;
}
.hero-sub {
font-size: 1.05rem;
color: var(--muted);
max-width: 520px;
margin: 0 auto;
line-height: 1.6;
}
/* โ”€โ”€ Cards / Panels โ”€โ”€ */
.card {
background: var(--surface);
border: 1px solid var(--border);
border-radius: 18px;
padding: 28px;
transition: border-color .25s;
}
.card:hover { border-color: rgba(108,99,255,.4); }
.section-label {
font-family: 'Syne', sans-serif;
font-size: .72rem;
font-weight: 700;
letter-spacing: .14em;
text-transform: uppercase;
color: var(--accent);
margin-bottom: 12px;
}
/* โ”€โ”€ Textbox โ”€โ”€ */
textarea, .gr-textbox textarea {
background: var(--surface2) !important;
border: 1.5px solid var(--border) !important;
border-radius: 12px !important;
color: var(--text) !important;
font-family: 'DM Sans', sans-serif !important;
font-size: 1rem !important;
padding: 16px !important;
resize: vertical !important;
transition: border-color .2s, box-shadow .2s !important;
}
textarea:focus, .gr-textbox textarea:focus {
border-color: var(--accent) !important;
box-shadow: 0 0 0 3px var(--glow) !important;
outline: none !important;
}
/* โ”€โ”€ Dropdowns โ”€โ”€ */
.gr-dropdown select, select {
background: var(--surface2) !important;
border: 1.5px solid var(--border) !important;
border-radius: 10px !important;
color: var(--text) !important;
font-family: 'DM Sans', sans-serif !important;
padding: 10px 14px !important;
}
/* โ”€โ”€ Sliders โ”€โ”€ */
input[type="range"] {
accent-color: var(--accent) !important;
height: 4px;
}
/* โ”€โ”€ Generate Button โ”€โ”€ */
.gen-btn, .gen-btn button {
width: 100% !important;
padding: 18px !important;
border-radius: 14px !important;
background: linear-gradient(135deg, var(--accent), #8b5cf6, var(--accent2)) !important;
background-size: 200% 200% !important;
animation: gradShift 4s ease infinite !important;
border: none !important;
color: #fff !important;
font-family: 'Syne', sans-serif !important;
font-size: 1.1rem !important;
font-weight: 700 !important;
letter-spacing: .04em !important;
cursor: pointer !important;
box-shadow: 0 8px 30px rgba(108,99,255,.4) !important;
transition: transform .15s, box-shadow .15s !important;
}
.gen-btn button:hover {
transform: translateY(-2px) !important;
box-shadow: 0 12px 40px rgba(108,99,255,.55) !important;
}
.gen-btn button:active { transform: translateY(0) !important; }
@keyframes gradShift {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
/* โ”€โ”€ Sample Buttons โ”€โ”€ */
.sample-btn button {
background: var(--surface2) !important;
border: 1px solid var(--border) !important;
border-radius: 8px !important;
color: var(--muted) !important;
font-size: .82rem !important;
padding: 8px 14px !important;
transition: all .2s !important;
}
.sample-btn button:hover {
border-color: var(--accent) !important;
color: var(--accent) !important;
background: rgba(108,99,255,.08) !important;
}
/* โ”€โ”€ Audio Player โ”€โ”€ */
.gr-audio {
background: var(--surface2) !important;
border: 1px solid var(--border) !important;
border-radius: 14px !important;
padding: 16px !important;
}
/* โ”€โ”€ Stats bar โ”€โ”€ */
.stats-box textarea, .stats-box input {
background: rgba(108,99,255,.07) !important;
border: 1px solid rgba(108,99,255,.25) !important;
border-radius: 10px !important;
color: var(--accent) !important;
font-family: 'Syne', sans-serif !important;
font-size: .85rem !important;
text-align: center !important;
}
/* โ”€โ”€ Voice grid pills โ”€โ”€ */
.voice-pill {
display: inline-block;
background: var(--surface2);
border: 1px solid var(--border);
border-radius: 100px;
padding: 4px 14px;
font-size: .78rem;
color: var(--muted);
margin: 3px;
transition: all .2s;
}
.voice-pill:hover {
background: rgba(108,99,255,.12);
border-color: var(--accent);
color: var(--accent);
}
/* โ”€โ”€ Footer โ”€โ”€ */
.footer-txt {
text-align: center;
color: var(--muted);
font-size: .8rem;
padding: 28px 0 20px;
border-top: 1px solid var(--border);
margin-top: 40px;
}
/* โ”€โ”€ Misc Gradio overrides โ”€โ”€ */
.gr-form, .gr-box { background: transparent !important; }
label { color: var(--muted) !important; font-size: .82rem !important; font-weight: 500 !important; margin-bottom: 6px !important; }
.gr-panel { background: transparent !important; border: none !important; }
"""
# โ”€โ”€ Build UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Blocks(title="VoiceForge AI โ€” Text to Speech") as demo:
# Hero
gr.HTML("""
<div class="hero-wrap">
<div class="hero-badge">โšก Powered by Edge TTS Neural Engine</div>
<h1 class="hero-title">VoiceForge AI</h1>
<p class="hero-sub">Transform any text into stunning, lifelike speech with 14 neural voices, real-time controls, and studio-quality output.</p>
</div>
""")
with gr.Row(equal_height=False):
# โ”€โ”€ LEFT COLUMN โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Column(scale=3):
gr.HTML('<div class="section-label">โœ๏ธ Your Text</div>')
text_input = gr.Textbox(
placeholder="Type or paste anything here โ€” a story, an announcement, a poemโ€ฆ",
lines=7,
max_lines=20,
show_label=False,
elem_id="main-text",
)
# Sample buttons
gr.HTML('<div class="section-label" style="margin-top:18px">๐ŸŽฒ Quick Samples</div>')
with gr.Row():
for i, s in enumerate(SAMPLES):
short = s[:38] + "โ€ฆ"
btn = gr.Button(f'"{short}"', elem_classes=["sample-btn"], size="sm")
btn.click(fn=lambda t=s: t, outputs=text_input)
# โ”€โ”€ Voice & Preset โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
gr.HTML('<div class="section-label" style="margin-top:24px">๐ŸŽ™๏ธ Voice & Style</div>')
with gr.Row():
voice_dd = gr.Dropdown(
choices=list(VOICES.keys()),
value="๐ŸŒŸ Aria (US โ€“ Female)",
label="Neural Voice",
interactive=True,
)
preset_dd = gr.Dropdown(
choices=["๐ŸŽ›๏ธ Custom"] + list(PRESETS.keys()),
value="๐ŸŽ›๏ธ Custom",
label="Style Preset",
interactive=True,
)
# โ”€โ”€ Fine Controls โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
gr.HTML('<div class="section-label" style="margin-top:20px">๐ŸŽ›๏ธ Fine Controls</div>')
with gr.Row():
rate_sl = gr.Slider(-50, 50, value=0, step=1, label="โšก Speed (%)")
pitch_sl = gr.Slider(-20, 20, value=0, step=1, label="๐ŸŽต Pitch (Hz)")
vol_sl = gr.Slider(-50, 50, value=0, step=1, label="๐Ÿ”Š Volume (%)")
preset_dd.change(
fn=apply_preset,
inputs=[preset_dd],
outputs=[rate_sl, pitch_sl, vol_sl],
)
# โ”€โ”€ RIGHT COLUMN โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Column(scale=2):
gr.HTML('<div class="section-label">๐Ÿš€ Generate</div>')
gen_btn = gr.Button("โœจ Generate Voice", elem_classes=["gen-btn"], variant="primary")
gr.HTML('<div class="section-label" style="margin-top:24px">๐ŸŽง Output Audio</div>')
audio_out = gr.Audio(
label="",
type="filepath",
interactive=False,
)
stats_out = gr.Textbox(
label="",
interactive=False,
show_label=False,
elem_classes=["stats-box"],
placeholder="Stats will appear here after generationโ€ฆ",
)
# Voice reference card
gr.HTML("""
<div class="card" style="margin-top:20px">
<div class="section-label">๐ŸŒ Available Regions</div>
<div>
<span class="voice-pill">๐Ÿ‡บ๐Ÿ‡ธ United States</span>
<span class="voice-pill">๐Ÿ‡ฌ๐Ÿ‡ง United Kingdom</span>
<span class="voice-pill">๐Ÿ‡ฆ๐Ÿ‡บ Australia</span>
<span class="voice-pill">๐Ÿ‡จ๐Ÿ‡ฆ Canada</span>
<span class="voice-pill">๐Ÿ‡ฎ๐Ÿ‡ณ India</span>
</div>
<div class="section-label" style="margin-top:14px">๐ŸŽญ Style Presets</div>
<div>
<span class="voice-pill">๐ŸŽ™๏ธ Podcast</span>
<span class="voice-pill">๐Ÿ“ฐ News</span>
<span class="voice-pill">๐Ÿง˜ Meditation</span>
<span class="voice-pill">๐Ÿ“š Audiobook</span>
<span class="voice-pill">๐Ÿค– AI Assistant</span>
</div>
</div>
""")
gen_btn.click(
fn=generate_voice,
inputs=[text_input, voice_dd, preset_dd, rate_sl, pitch_sl, vol_sl],
outputs=[audio_out, stats_out],
)
# Footer
gr.HTML("""
<div class="footer-txt">
Built with โค๏ธ using Microsoft Edge TTS Neural Voices &nbsp;ยท&nbsp;
VoiceForge AI &nbsp;ยท&nbsp;
<a href="https://huggingface.co" style="color:#6c63ff;text-decoration:none">Hugging Face Spaces</a>
</div>
""")
demo.launch(css=CSS)