Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import os | |
| from deployment_options import voice_id_2_name, defualt_values, voice_name_2_note | |
| import uuid | |
| API_URL = "https://sentivue-endpoint.hf.space" | |
| ENDPOINT_URL = "https://sentivue-endpoint.hf.space/v1/tts" | |
| ENDPOINT_TOKEN = os.getenv("endpoint_READ") | |
| print(f"Public demo will call endpoint: {ENDPOINT_URL}") | |
| print(f"Token loaded: {'Yes' if ENDPOINT_TOKEN else 'No'}") | |
| voice_names = list(voice_id_2_name.values()) | |
| voice_names_display_dict = { | |
| f'{voice_name} ({voice_name_2_note[voice_name]})' : voice_name | |
| for voice_name in voice_names | |
| } | |
| voice_names_display_default = defualt_values['voice_name'] | |
| def generate_speech(text: str, voice_name: str): | |
| """ | |
| Calls the private FastAPI endpoint and returns audio | |
| """ | |
| if not text.strip(): | |
| return None, "Please enter some text" | |
| if not ENDPOINT_TOKEN: | |
| return None, "Error: endpoint_READ token not found in environment" | |
| try: | |
| voice_name_2_id = {} | |
| for vid, name in voice_id_2_name.items(): | |
| voice_name_2_id[name] = vid | |
| voice_id = voice_name_2_id[voice_name] | |
| payload = { | |
| "text": text | |
| } | |
| print(f"Sending request to: {ENDPOINT_URL}/{voice_id}") | |
| print(f"Payload: {payload}") | |
| response = requests.post( | |
| f"{ENDPOINT_URL}/{voice_id}", | |
| headers={ | |
| "Authorization": f"Bearer {ENDPOINT_TOKEN}", | |
| "Content-Type": "application/json" | |
| }, | |
| json=payload, | |
| # timeout=60, | |
| stream=True | |
| ) | |
| response.raise_for_status() | |
| # # Return raw WAV bytes - Gradio handles the rest | |
| # return response.content, "Success!" | |
| # # Save to temporary WAV file | |
| # with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: | |
| # tmp_file.write(response.content) | |
| # tmp_path = tmp_file.name | |
| # return tmp_path, "Success!" | |
| # Save to a regular file in current directory (not temp) | |
| # Generate unique ID for output file | |
| generation_id = str(uuid.uuid4())[:15] | |
| output_path = f"speech_{voice_id}_{generation_id}.wav" | |
| with open(output_path, 'wb') as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| if chunk: | |
| f.write(chunk) | |
| return output_path, "Success!" | |
| except requests.exceptions.RequestException as e: | |
| error_msg = f"Error calling endpoint: {str(e)}" | |
| print(error_msg) | |
| return None, error_msg | |
| except Exception as e: | |
| error_msg = f"Unexpected error: {str(e)}" | |
| print(error_msg) | |
| return None, error_msg | |
| def check_readiness(): | |
| try: | |
| URL = f"{API_URL}/health" | |
| health = requests.get( | |
| URL, | |
| headers={ | |
| "Authorization": f"Bearer {ENDPOINT_TOKEN}", | |
| }, | |
| timeout=5) | |
| data = health.json() | |
| if data.get("ready"): | |
| msg = "✅ Ready" | |
| print(URL, msg) | |
| return gr.Button("🔊 Generate Speech", interactive=True), msg, gr.Timer(active=False) # STOP | |
| else: | |
| msg = "🔄 2/2: Preparing our model, it takes a few seconds..." | |
| print(URL, msg) | |
| return gr.Button(msg, interactive=False), msg, gr.Timer(active=True) # CONTINUE | |
| except: # /health didn't respond | |
| msg = "⏳ 1/2: Preparing our server, it takes around 2 minutes..." | |
| print(URL, msg) | |
| return gr.Button(msg, interactive=False), msg, gr.Timer(active=True) # CONTINUE | |
| # ── Gradio Interface ──────────────────────────────────────────────────────── | |
| with gr.Blocks( | |
| title="pt-PT TTS - Demo", | |
| css=""" | |
| body { | |
| zoom: 1.2; /* 110% zoom */ | |
| } | |
| """ | |
| ) as demo: | |
| # Header Section | |
| gr.Markdown( | |
| """ | |
| # 🎙️ Síntese de Voz em Português Europeu (pt-PT) — Public Preview | |
| Síntese de voz natural em português europeu (pt-PT), com prosódia fluida e pronúncia correta de números. | |
| <small>High-quality European Portuguese (pt-PT) speech synthesis with natural prosody and accurate number pronunciation.</small> | |
| """ | |
| ) | |
| gr.Markdown( | |
| """ | |
| ### Especificações Técnicas | |
| - **Tamanho do modelo:** ~3B parâmetros | |
| - **Arquitetura:** Backbone de TTS baseado em LLM | |
| - **Dados de Treino:** +11k horas de voz pt-PT curada | |
| <small> | |
| Model Size: ~3B parameters | Architecture: LLM-based TTS backbone | Training Data: +11k hours of curated pt-PT speech | |
| </small> | |
| """ | |
| ) | |
| gr.Markdown( | |
| """ | |
| Nota: Para melhor desempenho e compatibilidade de áudio, recomendamos o uso do Google Chrome. | |
| <small> | |
| Note: For best audio performance and compatibility, we recommend using Google Chrome. | |
| </small> | |
| """ | |
| ) | |
| # gr.Markdown("---") | |
| # Main Generation Interface | |
| # gr.Markdown("## Generate Speech") | |
| with gr.Row(): | |
| # Left Column - Input Controls | |
| with gr.Column(scale=5): | |
| text_input = gr.Textbox( | |
| label="📝 Text to Synthesize", | |
| placeholder="Enter Portuguese text here... (e.g., 'Olá! Este é um teste do sistema de síntese de voz.')", | |
| lines=6, | |
| max_lines=10, | |
| ) | |
| with gr.Row(): | |
| voice_dropdown = gr.Dropdown( | |
| choices=list(voice_names_display_dict.items()), | |
| value=voice_names_display_default, | |
| label="🎭 Voice Selection", | |
| info="More voices coming soon" | |
| ) | |
| submit_btn = gr.Button( | |
| "🔊 Generate Speech", | |
| variant="primary", | |
| size="lg", | |
| interactive=False, | |
| ) | |
| # Right Column - Output | |
| with gr.Column(scale=4): | |
| audio_output = gr.Audio( | |
| label="🔊 Generated Audio", | |
| type="filepath", | |
| autoplay=False, | |
| ) | |
| status_text = gr.Textbox( | |
| label="Status", | |
| interactive=False, | |
| ) | |
| # Example Inputs | |
| gr.Markdown("### 💡 Example Texts") | |
| gr.Examples( | |
| examples=[ | |
| ["Olá! Bem-vindo ao sistema de síntese de voz em português europeu."], | |
| ["A temperatura hoje está entre 5 e 9 graus Celsius."], | |
| ["Lisboa é a capital de Portugal, fundada antes do ano 1200."] | |
| ], | |
| inputs=text_input, | |
| ) | |
| # Footer | |
| gr.Markdown( | |
| """ | |
| <div style="text-align: center"> | |
| Criado com ❤️ pela SentiVue | |
| </div> | |
| <div style="text-align: center"> | |
| Built with ❤️ by SentiVue | |
| </div> | |
| """ | |
| ) | |
| timer = gr.Timer(value=1) | |
| # demo.load(fn=wake_server, outputs=[submit_btn, status_text]) | |
| timer.tick(fn=check_readiness, outputs=[submit_btn, status_text, timer]) | |
| # Event Handlers | |
| submit_btn.click( | |
| fn=generate_speech, | |
| inputs=[text_input, voice_dropdown], | |
| outputs=[audio_output, status_text], | |
| ) | |
| text_input.submit( | |
| fn=generate_speech, | |
| inputs=[text_input, voice_dropdown], | |
| outputs=[audio_output, status_text] | |
| ) | |
| demo.queue().launch() |