TTS-Indic / app.py
AS-Shunya's picture
Update app.py
1afa812 verified
import os
import requests
import gradio as gr
API_URL = os.getenv("TTS_API_URL", "http://15.206.159.28:9051")
def fetch_speakers():
"""Fetch speaker list from the TTS API."""
try:
resp = requests.get(f"{API_URL}/speakers", timeout=10)
resp.raise_for_status()
return resp.json()["speakers"]
except Exception as e:
print(f"Failed to fetch speakers: {e}")
return {}
SPEAKERS = fetch_speakers()
# Build dropdown choices: "Rajesh - Hindi Male"
SPEAKER_CHOICES = [
f"{name} - {desc}" for name, desc in SPEAKERS.items()
] if SPEAKERS else ["No speakers available"]
def generate_speech(speaker_selection, text):
"""Call the TTS API and return the audio file path."""
if not speaker_selection or " - " not in speaker_selection:
raise gr.Error("Please select a speaker.")
if not text or not text.strip():
raise gr.Error("Please enter some text.")
speaker_name = speaker_selection.split(" - ")[0].strip()
try:
resp = requests.post(
f"{API_URL}/tts",
json={"text": text.strip(), "speaker": speaker_name},
timeout=120,
)
resp.raise_for_status()
except requests.exceptions.HTTPError as e:
detail = ""
try:
detail = e.response.json().get("detail", "")
except Exception:
pass
raise gr.Error(f"API error: {detail or str(e)}")
except requests.exceptions.ConnectionError:
raise gr.Error("Cannot connect to TTS API. Is the service running?")
except requests.exceptions.Timeout:
raise gr.Error("Request timed out. Try shorter text.")
os.makedirs("/tmp/shunya-tts-ui", exist_ok=True)
out_path = f"/tmp/shunya-tts-ui/{speaker_name}.wav"
with open(out_path, "wb") as f:
f.write(resp.content)
return out_path, out_path
CUSTOM_CSS = """
/* Global */
.gradio-container {
max-width: 720px !important;
margin: 0 auto !important;
font-family: 'Inter', 'Segoe UI', system-ui, sans-serif !important;
}
/* Header */
.vak-header {
background: linear-gradient(135deg, #0F2850 0%, #1A4080 100%);
border-radius: 12px;
padding: 28px 24px 22px;
margin-bottom: 24px;
text-align: center;
}
.vak-header h1 {
margin: 0 !important;
font-size: 28px !important;
font-weight: 700 !important;
color: #FFFFFF !important;
letter-spacing: 0.5px !important;
}
.vak-header .vak-gold {
display: block;
width: 60px;
height: 2px;
background: #DAA520;
margin: 12px auto;
border-radius: 1px;
}
.vak-header p {
margin: 0 !important;
font-size: 14px !important;
color: #FFFFFF !important;
font-weight: 400 !important;
}
.vak-header .vak-badge {
display: inline-block;
margin-top: 14px;
padding: 4px 14px;
background: rgba(255, 255, 255, 0.12);
border: 1px solid rgba(255, 255, 255, 0.3);
border-radius: 20px;
font-size: 12px !important;
color: #FFFFFF !important;
letter-spacing: 0.3px;
}
/* Speaker dropdown */
.speaker-center {
display: flex !important;
justify-content: center !important;
}
/* Generate button */
.generate-btn {
background: linear-gradient(135deg, #0F2850 0%, #1A4080 100%) !important;
border: none !important;
color: white !important;
font-size: 15px !important;
font-weight: 600 !important;
letter-spacing: 0.4px !important;
padding: 12px 0 !important;
border-radius: 8px !important;
transition: all 0.2s ease !important;
}
.generate-btn:hover {
background: linear-gradient(135deg, #1A4080 0%, #2555A0 100%) !important;
box-shadow: 0 4px 16px rgba(15, 40, 80, 0.3) !important;
transform: translateY(-1px) !important;
}
/* Footer */
.vak-footer {
text-align: center;
padding: 16px 0 4px;
margin-top: 8px;
border-top: 1px solid #E8E8E8;
}
.vak-footer p {
margin: 0;
font-size: 12px;
color: #999;
}
.vak-footer a {
color: #1A4080;
text-decoration: none;
font-weight: 500;
}
"""
HEADER_HTML = """
<div class="vak-header">
<h1>Vāķ Text to Speech</h1>
<span class="vak-gold"></span>
<p>Natural speech synthesis for 55 Indian languages across 5 language families</p>
<span class="vak-badge">55 Languages &middot; Real-time &middot; By Shunya Labs</span>
</div>
"""
FOOTER_HTML = """
<div class="vak-footer">
<p><a href="https://shunyalabs.ai">Shunya Labs</a> &middot;
Part of the <strong>Vak</strong> suite &middot;
Built in India</p>
</div>
"""
with gr.Blocks(title="Vāķ Text to Speech", css=CUSTOM_CSS, theme=gr.themes.Soft()) as demo:
gr.HTML(HEADER_HTML)
with gr.Row(elem_classes=["speaker-center"]):
speaker_dropdown = gr.Dropdown(
choices=SPEAKER_CHOICES,
label="Speaker",
value=SPEAKER_CHOICES[0] if SPEAKER_CHOICES else None,
scale=0,
min_width=420,
)
text_input = gr.Textbox(
label="Text",
lines=4,
placeholder="Enter text to synthesize...",
)
generate_btn = gr.Button("Generate", elem_classes=["generate-btn"], size="lg")
audio_output = gr.Audio(label="Generated Audio", type="filepath")
download_output = gr.File(label="Download WAV")
generate_btn.click(
fn=generate_speech,
inputs=[speaker_dropdown, text_input],
outputs=[audio_output, download_output],
)
gr.HTML(FOOTER_HTML)
if __name__ == "__main__":
demo.launch()