import gradio as gr import requests import os from deployment_options import voice_id_2_name, defualt_values import uuid ENDPOINT_URL = "https://sentivue-endpoint.hf.space/v1/tts" ENDPOINT_TOKEN = os.getenv("endpoint_READ") print(f"Public demo will call endpoint: {ENDPOINT_URL}") print(f"Token loaded: {'Yes' if ENDPOINT_TOKEN else 'No'}") voice_names = list(voice_id_2_name.values()) def generate_speech(text: str, voice_name: str): """ Calls the private FastAPI endpoint and returns audio """ if not text.strip(): return None, "Please enter some text" if not ENDPOINT_TOKEN: return None, "Error: endpoint_READ token not found in environment" try: voice_name_2_id = {} for vid, name in voice_id_2_name.items(): voice_name_2_id[name] = vid voice_id = voice_name_2_id[voice_name] payload = { "text": text } print(f"Sending request to: {ENDPOINT_URL}/{voice_id}") print(f"Payload: {payload}") response = requests.post( f"{ENDPOINT_URL}/{voice_id}", headers={ "Authorization": f"Bearer {ENDPOINT_TOKEN}", "Content-Type": "application/json" }, json=payload, # timeout=60, stream=True ) response.raise_for_status() # # Return raw WAV bytes - Gradio handles the rest # return response.content, "Success!" # # Save to temporary WAV file # with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: # tmp_file.write(response.content) # tmp_path = tmp_file.name # return tmp_path, "Success!" # Save to a regular file in current directory (not temp) # Generate unique ID for output file generation_id = str(uuid.uuid4())[:15] output_path = f"speech_{voice_id}_{generation_id}.wav" with open(output_path, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): if chunk: f.write(chunk) return output_path, "Success!" except requests.exceptions.RequestException as e: error_msg = f"Error calling endpoint: {str(e)}" print(error_msg) return None, error_msg except Exception as e: error_msg = f"Unexpected error: {str(e)}" print(error_msg) return None, error_msg # ── Gradio Interface ──────────────────────────────────────────────────────── with gr.Blocks( title="pt-PT TTS - Demo", css=""" body { zoom: 1.2; /* 110% zoom */ } """ ) as demo: # Header Section gr.Markdown( """ # 🎙️ European Portuguese Text-to-Speech High-quality, natural-sounding speech synthesis for pt-PT with human-like prosody and accurate number pronunciation. """ ) # Model Information Card # with gr.Accordion("📋 Model Information", open=False): # gr.Markdown( # """ # ### Technical Specifications # - **Model Size:** ~3B parameters # - **Architecture:** LLM-based TTS backbone # - **Training Data:** +11k hours of curated pt-PT speech # """ # ) gr.Markdown( """ ### Technical Specifications - **Model Size:** ~3B parameters - **Architecture:** LLM-based TTS backbone - **Training Data:** +11k hours of curated pt-PT speech """ ) # gr.Markdown("---") # Main Generation Interface # gr.Markdown("## Generate Speech") with gr.Row(): # Left Column - Input Controls with gr.Column(scale=5): text_input = gr.Textbox( label="📝 Text to Synthesize", placeholder="Enter Portuguese text here... (e.g., 'Olá! Este é um teste do sistema de síntese de voz.')", lines=6, max_lines=10, ) with gr.Row(): voice_dropdown = gr.Dropdown( choices=voice_names, value=defualt_values['voice_name'], label="🎭 Voice Selection", info="More voices coming soon" ) submit_btn = gr.Button( "🎵 Generate Speech", variant="primary", size="lg" ) # Right Column - Output with gr.Column(scale=4): audio_output = gr.Audio( label="🔊 Generated Audio", type="filepath", autoplay=False, ) status_text = gr.Textbox( label="Status", interactive=False, ) # Example Inputs gr.Markdown("### 💡 Example Texts") gr.Examples( examples=[ ["Olá! Bem-vindo ao sistema de síntese de voz em português europeu."], ["A temperatura hoje está entre 15 e 20 graus Celsius."], ["Lisboa é a capital de Portugal, fundada antes do ano 1200."] ], inputs=text_input, ) gr.Markdown("---") # Information Section with gr.Row(): with gr.Column(): gr.Markdown( """ ### 🎤 Available Voices **Current Voice:** - André (Default) **Coming Soon:** - Additional voices - Extended emotion control - Prosody control via tags """ ) with gr.Column(): gr.Markdown( """ ### 🔌 API Access **Status:** Coming soon The API will allow programmatic access to the TTS system with full voice control and streaming support. """ ) with gr.Column(): gr.Markdown( """ ### 🎨 Fine-tuning **Status:** Coming soon **Requirements:** - ~1.5 hours of recorded speech - Create custom voice clones - Maintain natural prosody """ ) # Footer gr.Markdown( """