"""
renderer.py — Pillow-based frame generator for kinetic typography
Produces a generator of raw RGB bytes (width × height × 3) at TARGET_FPS.
"""
from typing import Generator
from PIL import Image, ImageDraw

from .transcriber import FrameMeta
from .animations import (
    FRAME_ANIMATIONS,
    THEME_COLORS,
    DEFAULT_FONT_FAMILY,
    get_font,
)

# --- Constants ---
WIDTH = 1280
HEIGHT = 720
TARGET_FPS = 30


def get_frame_times(frame: FrameMeta) -> tuple[float, float]:
    if not frame.words:
        return 0.0, 0.0
    return frame.words[0].start, frame.words[-1].end


def preprocess_bg(img: Image.Image) -> Image.Image:
    """
    Resize and crop a background image to 1280x720, then apply
    a dark semi-transparent overlay to ensure readable text contrast.
    """
    w, h = img.size
    aspect = WIDTH / HEIGHT
    if w / h > aspect:
        new_w = int(h * aspect)
        left = (w - new_w) // 2
        img_cropped = img.crop((left, 0, left + new_w, h))
    else:
        new_h = int(w / aspect)
        top = (h - new_h) // 2
        img_cropped = img.crop((0, top, w, top + new_h))
    
    img_final = img_cropped.resize((WIDTH, HEIGHT), Image.Resampling.LANCZOS)
    
    # Apply a dark overlay (RGBA overlay on top of background)
    overlay = Image.new("RGBA", (WIDTH, HEIGHT), (0, 0, 0, 140))
    img_rgba = img_final.convert("RGBA")
    img_rgba.alpha_composite(overlay)
    return img_rgba.convert("RGB")


def _get_bg_at_time(t: float, frames: list[FrameMeta], bg_list: list[Image.Image]) -> Image.Image | None:
    if not bg_list or not frames:
        return None
        
    next_idx = None
    for idx, frame in enumerate(frames):
        f_start, _ = get_frame_times(frame)
        if f_start > t:
            next_idx = idx
            break
            
    if next_idx == 0:
        f_start, _ = get_frame_times(frames[0])
        dt = f_start - t
        if 0 <= dt <= 1.0:
            alpha = 1.0 - dt
            black = Image.new("RGB", (WIDTH, HEIGHT), (10, 10, 10))
            return Image.blend(black, bg_list[0], alpha)
        return Image.new("RGB", (WIDTH, HEIGHT), (10, 10, 10))
        
    if next_idx is None:
        return bg_list[-1]
        
    prev_idx = next_idx - 1
    f_next_start, _ = get_frame_times(frames[next_idx])
    dt = f_next_start - t
    if 0 <= dt <= 1.0:
        alpha = max(0.0, min(1.0, 1.0 - dt))
        return Image.blend(bg_list[prev_idx], bg_list[next_idx], alpha)
        
    return bg_list[prev_idx]


def _render_frame(
    t: float,
    frames: list[FrameMeta],
    theme_name: str,
    bg_image: Image.Image | None = None,
    font_family: str = DEFAULT_FONT_FAMILY,
) -> bytes:
    """Render a single frame at time t. Returns raw RGB bytes."""
    theme = THEME_COLORS.get(theme_name, THEME_COLORS["Dark"])
    
    if bg_image is not None:
        img = bg_image.copy()
    else:
        img = Image.new("RGB", (WIDTH, HEIGHT), theme.get("bg", (10,10,10)))
        
    draw = ImageDraw.Draw(img)

    # Find active frame
    active_frame: FrameMeta | None = None
    frame_idx = -1
    for i, frame in enumerate(frames):
        f_start, f_end = get_frame_times(frame)
        # Frames stay on screen slightly longer than their last word
        if f_start <= t <= f_end + 0.5:
            active_frame = frame
            frame_idx = i
            break

    # If no active frame is found, we might be in a transition gap.
    # For now, just return the background if it's completely empty space.
    if active_frame is None:
        return img.tobytes()

    f_start, f_end = get_frame_times(active_frame)
    f_duration = f_end - f_start
    t_frame = t - f_start

    # Apply frame-level animation
    frame_anim_func = FRAME_ANIMATIONS.get(active_frame.frame_animation, FRAME_ANIMATIONS["none"])
    fx_off, fy_off, f_scale, f_opacity = frame_anim_func(t_frame, f_duration)

    if f_opacity <= 0:
        return img.tobytes()

    # Calculate per-word layout (unscaled widths/heights). One font is used
    # for the whole render, so its bbox/space metrics are shared by all words.
    font = get_font(font_family)
    space_bbox = draw.textbbox((0, 0), " ", font=font)
    space_w = space_bbox[2] - space_bbox[0]

    word_layouts = []
    max_h = 0

    for w in active_frame.words:
        bbox = draw.textbbox((0, 0), w.text, font=font)
        ww, wh = bbox[2] - bbox[0], bbox[3] - bbox[1]
        word_layouts.append((w, bbox, ww, wh))
        max_h = max(max_h, wh)

    # Greedily wrap words into rows that fit within the screen width, so a
    # long line breaks across multiple rows instead of overflowing or being
    # shrunk down to an unreadably small size.
    max_text_width = WIDTH * 0.92
    rows: list[list[tuple]] = [[]]
    row_widths = [0.0]
    for layout in word_layouts:
        _, _, ww, _ = layout
        cur_row = rows[-1]
        added_w = ww + (space_w if cur_row else 0)
        if cur_row and row_widths[-1] + added_w > max_text_width:
            rows.append([])
            row_widths.append(0.0)
            cur_row = rows[-1]
            added_w = ww
        cur_row.append(layout)
        row_widths[-1] += added_w

    # Shrink only in the rare case a single word alone is wider than the screen.
    max_row_w = max(row_widths)
    if max_row_w > 0 and max_row_w * f_scale > max_text_width:
        f_scale *= max_text_width / (max_row_w * f_scale)

    # Stack rows vertically, centered as a block.
    row_gap = max_h * 0.3
    row_step = (max_h + row_gap) * f_scale
    total_block_h = max_h * f_scale + row_step * (len(rows) - 1)
    y_block_start = HEIGHT / 2 - total_block_h / 2 + fy_off

    # Render each word, row by row
    for row_idx, row in enumerate(rows):
        row_w = row_widths[row_idx]
        x_cursor = (WIDTH - row_w * f_scale) / 2 + fx_off
        y_center = y_block_start + row_idx * row_step

        for (w, bbox, ww, wh) in row:
            # Words not yet spoken are dimmed; spoken/current words use the
            # theme's main text color.
            base_color = theme["theme_default"]
            if t < w.start:
                base_color = theme.get("inactive", (80, 80, 80))

            display_text = w.text

            # Note: Pillow doesn't support text scaling directly without loading
            # a new font, so for frame-level zoom we render the word to a small
            # RGBA buffer and resize that instead.
            if abs(f_scale - 1.0) > 0.01 or f_opacity < 0.99:
                pad = int(wh * 0.5)
                buf_w, buf_h = int(ww + pad*2), int(wh + pad*2)
                word_img = Image.new("RGBA", (buf_w, buf_h), (0,0,0,0))
                word_draw = ImageDraw.Draw(word_img)
                # Draw offset by the glyph's own bbox origin so its ink always
                # lands within [pad, pad+ww] x [pad, pad+wh] — fully inside the
                # buffer regardless of ascenders/descenders (fixes low-profile
                # words like "now" getting clipped at the bottom).
                word_draw.text((pad - bbox[0], pad - bbox[1]), display_text, font=font, fill=(base_color[0], base_color[1], base_color[2], 255))

                if f_scale != 1.0:
                    new_w, new_h = int(buf_w * f_scale), int(buf_h * f_scale)
                    if new_w > 0 and new_h > 0:
                        word_img = word_img.resize((new_w, new_h), Image.Resampling.LANCZOS)

                if f_opacity < 1.0:
                    alpha = word_img.split()[3]
                    alpha = alpha.point(lambda p: p * f_opacity)
                    word_img.putalpha(alpha)

                paste_x = int(x_cursor + (bbox[0] - pad) * f_scale)
                paste_y = int(y_center + (bbox[1] - pad) * f_scale)
                img.paste(word_img, (paste_x, paste_y), word_img)
            else:
                draw.text(
                    (int(x_cursor), int(y_center)),
                    display_text,
                    font=font,
                    fill=base_color
                )

            x_cursor += (ww + space_w) * f_scale

    # Frame-level flash: a brief white wash that decays over the frame's first 0.25s
    if active_frame.frame_animation == "flash":
        FLASH_DUR = 0.25
        if 0.0 <= t_frame < FLASH_DUR:
            alpha = (1.0 - t_frame / FLASH_DUR) * 0.8
            white = Image.new("RGB", img.size, (255, 255, 255))
            img = Image.blend(img, white, alpha)

    return img.tobytes()


def render_frames(
    frames: list[FrameMeta],
    duration: float,
    theme_name: str = "Dark",
    bg_images: list | None = None,
    font_family: str = DEFAULT_FONT_FAMILY,
) -> Generator[bytes, None, None]:
    """
    Yields raw RGB frame bytes for every frame of the video.
    duration: total audio duration in seconds.
    """
    bg_list = None
    if isinstance(bg_images, list) and len(bg_images) > 0:
        processed = [preprocess_bg(img) for img in bg_images]
        # Each background image covers two consecutive lyric frames, so the
        # backdrop changes about half as often as the on-screen text.
        expanded = []
        for img in processed:
            expanded.extend([img, img])
        if len(expanded) < len(frames):
            expanded.extend([expanded[-1]] * (len(frames) - len(expanded)))
        bg_list = expanded[:len(frames)]
    elif bg_images is not None:
        bg_list = [preprocess_bg(bg_images)] * len(frames)

    total_frames = int(duration * TARGET_FPS)
    for frame_idx in range(total_frames):
        t = frame_idx / TARGET_FPS
        
        current_bg = None
        if bg_list is not None:
            current_bg = _get_bg_at_time(t, frames, bg_list)
            
        yield _render_frame(t, frames, theme_name, bg_image=current_bg, font_family=font_family)