"""Gradio UI for Small Cuts."""

from __future__ import annotations

import gradio as gr
import numpy as np
from PIL import Image

from .frames import pick_key_frame, sample_frames
from .narrator import get_backend, narrate
from .styles import DEFAULT_STYLE_KEY, style_choices
from .theme import build_theme
from .title_card import derive_title, render_title_card
from .tts import speak

TITLE = "🎬 Small Cuts"
TAGLINE = (
    "Your life, narrated. Drop in a moment — from your phone, webcam, or "
    "smart-glasses footage — pick a director, and hear what scene you're really in. "
    "Every model under 32B. Everything runs in this Space."
)

# Off-Brand cinematic theme for the M2 custom UI quest.
THEME = build_theme()


def _gpu(duration: int = 90):
    """Mark an event handler for ZeroGPU. No-op off-Space.

    ZeroGPU's startup scan looks for the GPU mark on the functions Gradio
    binds — decorating an inner helper instead leaves requests unscheduled
    (worker dies with "No CUDA GPUs are available"). TTS is marked too:
    any torch forward in the main process poisons later worker forks.
    """

    def deco(fn):
        try:
            import spaces
        except ImportError:
            return fn
        return spaces.GPU(duration=duration)(fn)

    return deco


def _narrate_core(
    image: Image.Image | None, style_key: str, scene_hint: str, empty_text: str
) -> tuple[Image.Image, str]:
    if image is None:
        text = empty_text
    else:
        result = narrate(image, style_key=style_key, scene_hint=scene_hint or "")
        text = result.text
    return render_title_card(derive_title(text), style_key), text


@_gpu()
def _narrate_handler(
    image: Image.Image | None, style_key: str, scene_hint: str
) -> tuple[Image.Image, str]:
    return _narrate_core(
        image,
        style_key,
        scene_hint,
        "The narrator clears his throat, looks at the empty screen, and waits. "
        "Some scenes, after all, require a scene.",
    )


@_gpu()
def _narrate_video_handler(
    video_path: str | None, style_key: str, scene_hint: str
) -> tuple[Image.Image, str]:
    frame = pick_key_frame(sample_frames(video_path)) if video_path else None
    return _narrate_core(
        frame,
        style_key,
        scene_hint,
        "The narrator squints at the projector. Nothing. He has narrated "
        "blank screens before, but never by choice.",
    )


@_gpu(duration=30)
def _speak_handler(text: str) -> tuple[int, np.ndarray] | None:
    if not text.strip():
        return None
    speech = speak(text)
    return speech.sample_rate, speech.audio


def build_app() -> gr.Blocks:
    backend = get_backend()
    with gr.Blocks(title=TITLE) as demo:
        gr.Markdown(f"# {TITLE}\n{TAGLINE}")
        with gr.Row():
            with gr.Column(scale=1):
                image = gr.Image(label="Your moment", type="pil", sources=["upload", "webcam"])
                video = gr.Video(
                    label="…or a clip (glasses or phone, narrates the middle of the scene)",
                    sources=["upload"],
                )
                style = gr.Dropdown(
                    choices=style_choices(),
                    value=DEFAULT_STYLE_KEY,
                    label="Director's cut",
                )
                hint = gr.Textbox(
                    label="Anything the narrator should know? (optional)",
                    placeholder="e.g. this is my third coffee today",
                )
                go = gr.Button("🎬 Roll narration", variant="primary")
            with gr.Column(scale=1):
                card = gr.Image(label="Title card", interactive=False)
                narration = gr.Textbox(label="The narrator says…", lines=8)
                speak_btn = gr.Button("🔊 Read it to me", variant="secondary")
                audio = gr.Audio(label="The narrator speaks…", interactive=False)
                gr.Markdown(
                    f"<sub>backend: `{backend.name}` · model: `{backend.model_id}` · "
                    "no cloud APIs — Off the Grid 🏕️</sub>"
                )
        go.click(_narrate_handler, inputs=[image, style, hint], outputs=[card, narration])
        image.change(_narrate_handler, inputs=[image, style, hint], outputs=[card, narration])
        video.change(_narrate_video_handler, inputs=[video, style, hint], outputs=[card, narration])
        speak_btn.click(_speak_handler, inputs=[narration], outputs=[audio])
    return demo