from __future__ import annotations

import tempfile
from functools import lru_cache
from pathlib import Path

import gradio as gr
import numpy as np
import soundfile as sf
from kokoro import KPipeline

SPACE_TITLE = "VoiceMM TTS API"
SAMPLE_RATE = 24_000
MAX_CHARS = 450

VOICE_OPTIONS = {
    "pf_dora": "Dora, feminina e clara",
    "pm_alex": "Alex, masculina e neutra",
    "pm_santa": "Santa, masculina e encorpada",
}

EXAMPLES = [
    [
        "Seu produto ficou pronto. Agora ele tem uma voz que passa confianca, ritmo e presenca.",
        "pf_dora",
        1.0,
    ],
    [
        "Apresente sua startup em vinte segundos: problema, promessa e chamada para acao.",
        "pm_alex",
        1.05,
    ],
    [
        "Bem-vindo ao VoiceMM. Transforme roteiro em audio com uma interface simples e bonita.",
        "pm_santa",
        0.95,
    ],
]

CSS = """
.gradio-container {
    background:
        radial-gradient(circle at top left, rgba(237, 180, 93, 0.18), transparent 30%),
        radial-gradient(circle at top right, rgba(33, 181, 168, 0.12), transparent 28%),
        #0f1518;
}

.voicelek-shell {
    max-width: 1024px;
    margin: 0 auto;
}

.voicelek-kicker {
    letter-spacing: 0.18em;
    text-transform: uppercase;
    color: #efbf74;
    font-size: 0.8rem;
}
"""


@lru_cache(maxsize=8)
def get_pipeline(lang_code: str) -> KPipeline:
    return KPipeline(lang_code=lang_code)


def normalize_text(text: str) -> str:
    cleaned = " ".join((text or "").split())
    if not cleaned:
        raise gr.Error("Digite algum texto antes de gerar o audio.")
    if len(cleaned) > MAX_CHARS:
        raise gr.Error(
            f"Use no maximo {MAX_CHARS} caracteres por vez para manter a latencia boa no plano gratis."
        )
    return cleaned


def synthesize(text: str, voice: str, speed: float) -> tuple[str, str]:
    cleaned = normalize_text(text)
    pipeline = get_pipeline(voice[0])

    chunks: list[np.ndarray] = []
    for _, _, audio in pipeline(cleaned, voice=voice, speed=float(speed)):
        chunks.append(np.asarray(audio, dtype=np.float32))

    if not chunks:
        raise gr.Error("O modelo nao conseguiu gerar audio para esse texto.")

    waveform = np.concatenate(chunks)
    output_dir = Path(tempfile.mkdtemp(prefix="voicelek_"))
    output_path = output_dir / "voicelek-output.wav"
    sf.write(output_path, waveform, SAMPLE_RATE)

    duration_seconds = len(waveform) / SAMPLE_RATE
    details = (
        f"**Voz:** {VOICE_OPTIONS[voice]}  \n"
        f"**Velocidade:** {speed:.2f}x  \n"
        f"**Entrada:** {len(cleaned)} caracteres  \n"
        f"**Duracao estimada:** {duration_seconds:.1f}s"
    )
    return str(output_path), details


with gr.Blocks(title=SPACE_TITLE) as demo:
    with gr.Column(elem_classes="voicelek-shell"):
        gr.Markdown(
            """
            <div class="voicelek-kicker">VoiceMM</div>
            # API de TTS em portugues brasileiro

            Esta Space foi pensada para ser o backend de um frontend estatico no GitHub Pages.
            O endpoint publico principal e `"/synthesize"`.
            """,
        )

        with gr.Row():
            with gr.Column(scale=3):
                text_input = gr.Textbox(
                    label="Texto",
                    lines=8,
                    max_lines=12,
                    placeholder="Cole aqui sua copy, roteiro, CTA ou locucao curta.",
                    value=EXAMPLES[0][0],
                )
            with gr.Column(scale=2):
                voice_input = gr.Dropdown(
                    choices=[(label, key) for key, label in VOICE_OPTIONS.items()],
                    value="pf_dora",
                    label="Voz",
                )
                speed_input = gr.Slider(
                    minimum=0.8,
                    maximum=1.25,
                    value=1.0,
                    step=0.05,
                    label="Velocidade",
                )
                generate_button = gr.Button("Gerar audio", variant="primary")

        audio_output = gr.Audio(
            label="Saida",
            type="filepath",
            format="wav",
        )
        details_output = gr.Markdown(
            value="Pronto para receber chamadas via navegador ou direto pela API do Gradio."
        )

        gr.Examples(
            examples=EXAMPLES,
            inputs=[text_input, voice_input, speed_input],
            label="Exemplos rapidos",
        )

        generate_button.click(
            fn=synthesize,
            inputs=[text_input, voice_input, speed_input],
            outputs=[audio_output, details_output],
            api_name="synthesize",
        )

demo.queue(default_concurrency_limit=1, max_size=16)

if __name__ == "__main__":
    demo.launch(
        theme=gr.themes.Soft(
            primary_hue="amber",
            secondary_hue="teal",
            neutral_hue="slate",
        ),
        css=CSS,
        footer_links=["api", "gradio", "settings"],
    )