pt-PT_TTS_Demo / app-struture-elements.py
m-nagy's picture
UI: update UI and add space info
e280b3f
import gradio as gr
import requests
import os
from deployment_options import voice_id_2_name, defualt_values
import uuid
ENDPOINT_URL = "https://sentivue-endpoint.hf.space/v1/tts"
ENDPOINT_TOKEN = os.getenv("endpoint_READ")
print(f"Public demo will call endpoint: {ENDPOINT_URL}")
print(f"Token loaded: {'Yes' if ENDPOINT_TOKEN else 'No'}")
voice_names = list(voice_id_2_name.values())
def generate_speech(text: str, voice_name: str):
"""
Calls the private FastAPI endpoint and returns audio
"""
if not text.strip():
return None, "Please enter some text"
if not ENDPOINT_TOKEN:
return None, "Error: endpoint_READ token not found in environment"
try:
voice_name_2_id = {}
for vid, name in voice_id_2_name.items():
voice_name_2_id[name] = vid
voice_id = voice_name_2_id[voice_name]
payload = {
"text": text
}
print(f"Sending request to: {ENDPOINT_URL}/{voice_id}")
print(f"Payload: {payload}")
response = requests.post(
f"{ENDPOINT_URL}/{voice_id}",
headers={
"Authorization": f"Bearer {ENDPOINT_TOKEN}",
"Content-Type": "application/json"
},
json=payload,
# timeout=60,
stream=True
)
response.raise_for_status()
# # Return raw WAV bytes - Gradio handles the rest
# return response.content, "Success!"
# # Save to temporary WAV file
# with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
# tmp_file.write(response.content)
# tmp_path = tmp_file.name
# return tmp_path, "Success!"
# Save to a regular file in current directory (not temp)
# Generate unique ID for output file
generation_id = str(uuid.uuid4())[:15]
output_path = f"speech_{voice_id}_{generation_id}.wav"
with open(output_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return output_path, "Success!"
except requests.exceptions.RequestException as e:
error_msg = f"Error calling endpoint: {str(e)}"
print(error_msg)
return None, error_msg
except Exception as e:
error_msg = f"Unexpected error: {str(e)}"
print(error_msg)
return None, error_msg
# ── Gradio Interface ────────────────────────────────────────────────────────
with gr.Blocks(
title="pt-PT TTS - Demo",
css="""
body {
zoom: 1.2; /* 110% zoom */
}
"""
) as demo:
# Header Section
gr.Markdown(
"""
# πŸŽ™οΈ European Portuguese Text-to-Speech
High-quality, natural-sounding speech synthesis for pt-PT with human-like prosody and accurate number pronunciation.
"""
)
# Model Information Card
# with gr.Accordion("πŸ“‹ Model Information", open=False):
# gr.Markdown(
# """
# ### Technical Specifications
# - **Model Size:** ~3B parameters
# - **Architecture:** LLM-based TTS backbone
# - **Training Data:** +11k hours of curated pt-PT speech
# """
# )
gr.Markdown(
"""
### Technical Specifications
- **Model Size:** ~3B parameters
- **Architecture:** LLM-based TTS backbone
- **Training Data:** +11k hours of curated pt-PT speech
"""
)
# gr.Markdown("---")
# Main Generation Interface
# gr.Markdown("## Generate Speech")
with gr.Row():
# Left Column - Input Controls
with gr.Column(scale=5):
text_input = gr.Textbox(
label="πŸ“ Text to Synthesize",
placeholder="Enter Portuguese text here... (e.g., 'OlΓ‘! Este Γ© um teste do sistema de sΓ­ntese de voz.')",
lines=6,
max_lines=10,
)
with gr.Row():
voice_dropdown = gr.Dropdown(
choices=voice_names,
value=defualt_values['voice_name'],
label="🎭 Voice Selection",
info="More voices coming soon"
)
submit_btn = gr.Button(
"🎡 Generate Speech",
variant="primary",
size="lg"
)
# Right Column - Output
with gr.Column(scale=4):
audio_output = gr.Audio(
label="πŸ”Š Generated Audio",
type="filepath",
autoplay=False,
)
status_text = gr.Textbox(
label="Status",
interactive=False,
)
# Example Inputs
gr.Markdown("### πŸ’‘ Example Texts")
gr.Examples(
examples=[
["OlΓ‘! Bem-vindo ao sistema de sΓ­ntese de voz em portuguΓͺs europeu."],
["A temperatura hoje estΓ‘ entre 15 e 20 graus Celsius."],
["Lisboa Γ© a capital de Portugal, fundada antes do ano 1200."]
],
inputs=text_input,
)
gr.Markdown("---")
# Information Section
with gr.Row():
with gr.Column():
gr.Markdown(
"""
### 🎀 Available Voices
**Current Voice:**
- AndrΓ© (Default)
**Coming Soon:**
- Additional voices
- Extended emotion control
- Prosody control via tags
"""
)
with gr.Column():
gr.Markdown(
"""
### πŸ”Œ API Access
**Status:** Coming soon
The API will allow programmatic access to the TTS system with full voice control and streaming support.
"""
)
with gr.Column():
gr.Markdown(
"""
### 🎨 Fine-tuning
**Status:** Coming soon
**Requirements:**
- ~1.5 hours of recorded speech
- Create custom voice clones
- Maintain natural prosody
"""
)
# Footer
gr.Markdown(
"""
<div style="text-align: center">
Built with ❀️ for European Portuguese β€’ Powered by advanced LLM-based TTS
</div>
"""
)
# Event Handlers
submit_btn.click(
fn=generate_speech,
inputs=[text_input, voice_dropdown],
outputs=[audio_output, status_text],
)
text_input.submit(
fn=generate_speech,
inputs=[text_input, voice_dropdown],
outputs=[audio_output, status_text]
)
demo.queue().launch()