import gradio as gr
import requests
import os
from deployment_options import voice_id_2_name, defualt_values, voice_name_2_note

import uuid

API_URL = "https://sentivue-endpoint.hf.space"
ENDPOINT_URL = "https://sentivue-endpoint.hf.space/v1/tts"
ENDPOINT_TOKEN = os.getenv("endpoint_READ")

print(f"Public demo will call endpoint: {ENDPOINT_URL}")
print(f"Token loaded: {'Yes' if ENDPOINT_TOKEN else 'No'}")

voice_names = list(voice_id_2_name.values())

voice_names_display_dict = {
    f'{voice_name} ({voice_name_2_note[voice_name]})' : voice_name
    for voice_name in voice_names
}
voice_names_display_default = defualt_values['voice_name'] 

def generate_speech(text: str, voice_name: str):
    """
    Calls the private FastAPI endpoint and returns audio
    """
    if not text.strip():
        return None, "Please enter some text"
    
    if not ENDPOINT_TOKEN:
        return None, "Error: endpoint_READ token not found in environment"

    try:

        voice_name_2_id = {}
        for vid, name in voice_id_2_name.items():
            voice_name_2_id[name] = vid

        voice_id = voice_name_2_id[voice_name]
        
        payload = {
            "text": text
        }

        print(f"Sending request to: {ENDPOINT_URL}/{voice_id}")
        print(f"Payload: {payload}")

        response = requests.post(
            f"{ENDPOINT_URL}/{voice_id}",
            headers={
                "Authorization": f"Bearer {ENDPOINT_TOKEN}",
                "Content-Type": "application/json"
            },
            json=payload,
            # timeout=60,
            stream=True
        )

        response.raise_for_status()

        # # Return raw WAV bytes - Gradio handles the rest
        # return response.content, "Success!"

        # # Save to temporary WAV file
        # with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
        #     tmp_file.write(response.content)
        #     tmp_path = tmp_file.name
        
        # return tmp_path, "Success!"

        # Save to a regular file in current directory (not temp)
        # Generate unique ID for output file
        generation_id = str(uuid.uuid4())[:15]
        output_path = f"speech_{voice_id}_{generation_id}.wav"
        
        with open(output_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
        
        return output_path, "Success!"

    except requests.exceptions.RequestException as e:
        error_msg = f"Error calling endpoint: {str(e)}"
        print(error_msg)
        return None, error_msg
    except Exception as e:
        error_msg = f"Unexpected error: {str(e)}"
        print(error_msg)
        return None, error_msg

def check_readiness():
    try:
        URL = f"{API_URL}/health"
        health = requests.get(
            URL,
            headers={
                "Authorization": f"Bearer {ENDPOINT_TOKEN}",
            },
            timeout=5)
        data = health.json()
        
        if data.get("ready"):
            msg = "✅ Ready"
            print(URL, msg)
            return gr.Button("🔊 Generate Speech", interactive=True), msg, gr.Timer(active=False)  # STOP
        else:
            msg = "🔄 2/2: Preparing our model, it takes a few seconds..."
            print(URL, msg)
            return gr.Button(msg, interactive=False), msg, gr.Timer(active=True)  # CONTINUE
    
    except:  # /health didn't respond
        msg = "⏳ 1/2: Preparing our server, it takes around 2 minutes..."
        print(URL, msg)
        return gr.Button(msg, interactive=False), msg, gr.Timer(active=True)  # CONTINUE
    

# ── Gradio Interface ────────────────────────────────────────────────────────

with gr.Blocks(
    title="pt-PT TTS - Demo",
    css="""
    body {
        zoom: 1.2; /* 110% zoom */
    }
    """
    ) as demo:
    
    # Header Section
    gr.Markdown(
        """
        # 🎙️ Síntese de Voz em Português Europeu (pt-PT) — Public Preview
        Síntese de voz natural em português europeu (pt-PT), com prosódia fluida e pronúncia correta de números.  
        <small>High-quality European Portuguese (pt-PT) speech synthesis with natural prosody and accurate number pronunciation.</small>
        """
    )

    gr.Markdown(
        """
        ### Especificações Técnicas
        - **Tamanho do modelo:** ~3B parâmetros
        - **Arquitetura:** Backbone de TTS baseado em LLM
        - **Dados de Treino:** +11k horas de voz pt-PT curada
        
        <small>
        Model Size: ~3B parameters | Architecture: LLM-based TTS backbone | Training Data: +11k hours of curated pt-PT speech
        </small>
        """
    )

    gr.Markdown(
        """
        Nota: Para melhor desempenho e compatibilidade de áudio, recomendamos o uso do Google Chrome.  
        <small>
        Note: For best audio performance and compatibility, we recommend using Google Chrome.
        </small>        
        """
    )

    
    # gr.Markdown("---")
    
    # Main Generation Interface
    # gr.Markdown("## Generate Speech")
    
    with gr.Row():
        # Left Column - Input Controls
        with gr.Column(scale=5):
            text_input = gr.Textbox(
                label="📝 Text to Synthesize",
                placeholder="Enter Portuguese text here... (e.g., 'Olá! Este é um teste do sistema de síntese de voz.')",
                lines=6,
                max_lines=10,
            )
            
            with gr.Row():
                voice_dropdown = gr.Dropdown(
                    choices=list(voice_names_display_dict.items()),
                    value=voice_names_display_default,
                    label="🎭 Voice Selection",
                    info="More voices coming soon"
                )
                
                submit_btn = gr.Button(
                    "🔊 Generate Speech",
                    variant="primary",
                    size="lg",
                    interactive=False, 
                )
        
        # Right Column - Output
        with gr.Column(scale=4):
            audio_output = gr.Audio(
                label="🔊 Generated Audio",
                type="filepath",
                autoplay=False,
            )
            
            status_text = gr.Textbox(
                label="Status",
                interactive=False,
            )
    
    # Example Inputs
    gr.Markdown("### 💡 Example Texts")
    gr.Examples(
        examples=[
            ["Olá! Bem-vindo ao sistema de síntese de voz em português europeu."],
            ["A temperatura hoje está entre 5 e 9 graus Celsius."],
            ["Lisboa é a capital de Portugal, fundada antes do ano 1200."]
        ],
        inputs=text_input,
    )
    
    
    # Footer
    gr.Markdown(
        """        
        <div style="text-align: center">
            Criado com ❤️ pela SentiVue 
        </div>        
        <div style="text-align: center">
            Built with ❤️ by SentiVue
        </div>
        """
    )

    timer = gr.Timer(value=1)


    # demo.load(fn=wake_server, outputs=[submit_btn, status_text])  
    timer.tick(fn=check_readiness, outputs=[submit_btn, status_text, timer])

    # Event Handlers
    submit_btn.click(
        fn=generate_speech,
        inputs=[text_input, voice_dropdown],
        outputs=[audio_output, status_text],
    )
    
    text_input.submit(
        fn=generate_speech,
        inputs=[text_input, voice_dropdown],
        outputs=[audio_output, status_text]
    )

demo.queue().launch()