Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import os | |
| from deployment_options import voice_id_2_name, defualt_values | |
| import uuid | |
| ENDPOINT_URL = "https://sentivue-endpoint.hf.space/v1/tts" | |
| ENDPOINT_TOKEN = os.getenv("endpoint_READ") | |
| print(f"Public demo will call endpoint: {ENDPOINT_URL}") | |
| print(f"Token loaded: {'Yes' if ENDPOINT_TOKEN else 'No'}") | |
| voice_names = list(voice_id_2_name.values()) | |
| def generate_speech(text: str, voice_name: str): | |
| """ | |
| Calls the private FastAPI endpoint and returns audio | |
| """ | |
| if not text.strip(): | |
| return None, "Please enter some text" | |
| if not ENDPOINT_TOKEN: | |
| return None, "Error: endpoint_READ token not found in environment" | |
| try: | |
| voice_name_2_id = {} | |
| for vid, name in voice_id_2_name.items(): | |
| voice_name_2_id[name] = vid | |
| voice_id = voice_name_2_id[voice_name] | |
| payload = { | |
| "text": text | |
| } | |
| print(f"Sending request to: {ENDPOINT_URL}/{voice_id}") | |
| print(f"Payload: {payload}") | |
| response = requests.post( | |
| f"{ENDPOINT_URL}/{voice_id}", | |
| headers={ | |
| "Authorization": f"Bearer {ENDPOINT_TOKEN}", | |
| "Content-Type": "application/json" | |
| }, | |
| json=payload, | |
| # timeout=60, | |
| stream=True | |
| ) | |
| response.raise_for_status() | |
| # # Return raw WAV bytes - Gradio handles the rest | |
| # return response.content, "Success!" | |
| # # Save to temporary WAV file | |
| # with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: | |
| # tmp_file.write(response.content) | |
| # tmp_path = tmp_file.name | |
| # return tmp_path, "Success!" | |
| # Save to a regular file in current directory (not temp) | |
| # Generate unique ID for output file | |
| generation_id = str(uuid.uuid4())[:15] | |
| output_path = f"speech_{voice_id}_{generation_id}.wav" | |
| with open(output_path, 'wb') as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| if chunk: | |
| f.write(chunk) | |
| return output_path, "Success!" | |
| except requests.exceptions.RequestException as e: | |
| error_msg = f"Error calling endpoint: {str(e)}" | |
| print(error_msg) | |
| return None, error_msg | |
| except Exception as e: | |
| error_msg = f"Unexpected error: {str(e)}" | |
| print(error_msg) | |
| return None, error_msg | |
| # ββ Gradio Interface ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks( | |
| title="pt-PT TTS - Demo", | |
| css=""" | |
| body { | |
| zoom: 1.2; /* 110% zoom */ | |
| } | |
| """ | |
| ) as demo: | |
| # Header Section | |
| gr.Markdown( | |
| """ | |
| # ποΈ European Portuguese Text-to-Speech | |
| High-quality, natural-sounding speech synthesis for pt-PT with human-like prosody and accurate number pronunciation. | |
| """ | |
| ) | |
| # Model Information Card | |
| # with gr.Accordion("π Model Information", open=False): | |
| # gr.Markdown( | |
| # """ | |
| # ### Technical Specifications | |
| # - **Model Size:** ~3B parameters | |
| # - **Architecture:** LLM-based TTS backbone | |
| # - **Training Data:** +11k hours of curated pt-PT speech | |
| # """ | |
| # ) | |
| gr.Markdown( | |
| """ | |
| ### Technical Specifications | |
| - **Model Size:** ~3B parameters | |
| - **Architecture:** LLM-based TTS backbone | |
| - **Training Data:** +11k hours of curated pt-PT speech | |
| """ | |
| ) | |
| # gr.Markdown("---") | |
| # Main Generation Interface | |
| # gr.Markdown("## Generate Speech") | |
| with gr.Row(): | |
| # Left Column - Input Controls | |
| with gr.Column(scale=5): | |
| text_input = gr.Textbox( | |
| label="π Text to Synthesize", | |
| placeholder="Enter Portuguese text here... (e.g., 'OlΓ‘! Este Γ© um teste do sistema de sΓntese de voz.')", | |
| lines=6, | |
| max_lines=10, | |
| ) | |
| with gr.Row(): | |
| voice_dropdown = gr.Dropdown( | |
| choices=voice_names, | |
| value=defualt_values['voice_name'], | |
| label="π Voice Selection", | |
| info="More voices coming soon" | |
| ) | |
| submit_btn = gr.Button( | |
| "π΅ Generate Speech", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| # Right Column - Output | |
| with gr.Column(scale=4): | |
| audio_output = gr.Audio( | |
| label="π Generated Audio", | |
| type="filepath", | |
| autoplay=False, | |
| ) | |
| status_text = gr.Textbox( | |
| label="Status", | |
| interactive=False, | |
| ) | |
| # Example Inputs | |
| gr.Markdown("### π‘ Example Texts") | |
| gr.Examples( | |
| examples=[ | |
| ["OlΓ‘! Bem-vindo ao sistema de sΓntese de voz em portuguΓͺs europeu."], | |
| ["A temperatura hoje estΓ‘ entre 15 e 20 graus Celsius."], | |
| ["Lisboa Γ© a capital de Portugal, fundada antes do ano 1200."] | |
| ], | |
| inputs=text_input, | |
| ) | |
| gr.Markdown("---") | |
| # Information Section | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown( | |
| """ | |
| ### π€ Available Voices | |
| **Current Voice:** | |
| - AndrΓ© (Default) | |
| **Coming Soon:** | |
| - Additional voices | |
| - Extended emotion control | |
| - Prosody control via tags | |
| """ | |
| ) | |
| with gr.Column(): | |
| gr.Markdown( | |
| """ | |
| ### π API Access | |
| **Status:** Coming soon | |
| The API will allow programmatic access to the TTS system with full voice control and streaming support. | |
| """ | |
| ) | |
| with gr.Column(): | |
| gr.Markdown( | |
| """ | |
| ### π¨ Fine-tuning | |
| **Status:** Coming soon | |
| **Requirements:** | |
| - ~1.5 hours of recorded speech | |
| - Create custom voice clones | |
| - Maintain natural prosody | |
| """ | |
| ) | |
| # Footer | |
| gr.Markdown( | |
| """ | |
| <div style="text-align: center"> | |
| Built with β€οΈ for European Portuguese β’ Powered by advanced LLM-based TTS | |
| </div> | |
| """ | |
| ) | |
| # Event Handlers | |
| submit_btn.click( | |
| fn=generate_speech, | |
| inputs=[text_input, voice_dropdown], | |
| outputs=[audio_output, status_text], | |
| ) | |
| text_input.submit( | |
| fn=generate_speech, | |
| inputs=[text_input, voice_dropdown], | |
| outputs=[audio_output, status_text] | |
| ) | |
| demo.queue().launch() |