"""
Qwen3-TTS Web UI for Hugging Face Spaces
=========================================
CPU-only mode for maximum compatibility.
"""

import os
import spaces
import gradio as gr
import numpy as np
import torch
import soundfile as sf
import tempfile
from huggingface_hub import snapshot_download
from qwen_tts import Qwen3TTSModel

# ─────────────────────────────────────────────────────────────────────────────
# Configuration
# ─────────────────────────────────────────────────────────────────────────────
MODEL_SIZE = "1.7B"  # Full quality model
ENGLISH_SPEAKERS = ["Ryan", "Aiden"]

# Load model on CPU at startup
print(f"📦 Loading {MODEL_SIZE} model on CPU...")
model_path = snapshot_download(f"Qwen/Qwen3-TTS-12Hz-{MODEL_SIZE}-CustomVoice")
model = Qwen3TTSModel.from_pretrained(
    model_path,
    device_map="cpu",
    dtype=torch.float32,
)
print("✅ Model loaded!")

# ─────────────────────────────────────────────────────────────────────────────
# TTS Generation Function
# ─────────────────────────────────────────────────────────────────────────────
@spaces.GPU(duration=120)
def generate_speech(text, speaker, voice_style):
    """Generate speech from text."""
    if not text.strip():
        return None, "⚠️ Please enter some text."
    
    try:
        wavs, sr = model.generate_custom_voice(
            text=text,
            language="Auto",
            speaker=speaker,
            instruct=voice_style if voice_style else "",
        )
        
        # Save to temp file
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
        sf.write(temp_file.name, wavs[0], sr)
        
        duration = len(wavs[0]) / sr
        status = f"✅ Generated {duration:.1f}s of audio"
        
        return temp_file.name, status
        
    except Exception as e:
        import traceback
        traceback.print_exc()
        return None, f"❌ Error: {str(e)}"

# ─────────────────────────────────────────────────────────────────────────────
# Gradio UI
# ─────────────────────────────────────────────────────────────────────────────
with gr.Blocks(title="Qwen Voice Assistant") as demo:
    
    gr.Markdown(
        """
        # 🎙️ Qwen Voice Assistant
        ### Text-to-Speech powered by Qwen3-TTS
        
        ⏱️ Generation takes ~30-60 seconds (CPU mode)
        """
    )
    
    with gr.Row():
        with gr.Column(scale=2):
            text_input = gr.Textbox(
                label="Text to Speak",
                placeholder="Enter the text you want to convert to speech...",
                lines=4,
                max_lines=10
            )
            
            with gr.Row():
                speaker_dropdown = gr.Dropdown(
                    choices=ENGLISH_SPEAKERS,
                    value="Ryan",
                    label="Voice",
                    info="Select a speaker voice"
                )
                
                voice_style = gr.Textbox(
                    label="Voice Style (Optional)",
                    placeholder="e.g., happy, slow, whisper...",
                    info="Describe the tone or emotion"
                )
            
            generate_btn = gr.Button("🔊 Generate Speech", variant="primary", size="lg")
            
        with gr.Column(scale=1):
            audio_output = gr.Audio(
                label="Generated Audio",
                type="filepath",
                interactive=False
            )
            status_output = gr.Textbox(
                label="Status",
                interactive=False
            )
    
    # Voice style examples
    gr.Markdown("### 💡 Voice Style Examples")
    gr.Examples(
        examples=[
            ["Hello! How are you today?", "Ryan", "friendly and warm"],
            ["Breaking news: Scientists discover water on Mars!", "Aiden", "excited news anchor"],
            ["Once upon a time, in a land far away...", "Ryan", "storytelling, slow and dramatic"],
            ["Warning! System overload detected.", "Aiden", "urgent and serious"],
            ["I love you with all my heart.", "Ryan", "soft and emotional"],
        ],
        inputs=[text_input, speaker_dropdown, voice_style],
        label="Click an example to try it"
    )
    
    gr.Markdown("---\n**Model:** [Qwen3-TTS-12Hz-1.7B-CustomVoice](https://huggingface.co/Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice) (Apache 2.0)")
    
    # Connect button
    generate_btn.click(
        fn=generate_speech,
        inputs=[text_input, speaker_dropdown, voice_style],
        outputs=[audio_output, status_output]
    )

# ─────────────────────────────────────────────────────────────────────────────
# Launch
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    demo.launch(ssr_mode=False)