import base64
import gc
import io
import uuid
from pathlib import Path

import gradio as gr
import torch
from PIL import Image
from diffusers import (
    ControlNetModel,
    StableDiffusionControlNetPipeline,
    UniPCMultistepScheduler,
)
from transformers import pipeline as hf_pipeline

# Базовые модели. Потом можно заменить на свои локальные / любимые.
BASE_SD_ID = "runwayml/stable-diffusion-v1-5"
CONTROLNET_ID = "lllyasviel/sd-controlnet-depth"
DEPTH_MODEL_ID = "Intel/dpt-hybrid-midas"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32

ROOT_DIR = Path(__file__).resolve().parent
DATA_DIR = ROOT_DIR / "data"
SD_OUTPUTS_DIR = DATA_DIR / "sd_outputs"
SD_OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)

sd_pipe = None
depth_estimator = None


def get_depth_estimator():
    global depth_estimator
    if depth_estimator is None:
        depth_estimator = hf_pipeline(
            "depth-estimation",
            model=DEPTH_MODEL_ID,
            device=0 if DEVICE == "cuda" else -1,
        )
    return depth_estimator


def get_sd_pipe():
    global sd_pipe
    if sd_pipe is None:
        controlnet = ControlNetModel.from_pretrained(
            CONTROLNET_ID,
            torch_dtype=DTYPE,
        )

        kwargs = {
            "controlnet": controlnet,
            "torch_dtype": DTYPE,
            "safety_checker": None,
        }
        if DEVICE == "cuda":
            kwargs["variant"] = "fp16"

        sd_pipe = StableDiffusionControlNetPipeline.from_pretrained(
            BASE_SD_ID,
            **kwargs,
        )
        sd_pipe.scheduler = UniPCMultistepScheduler.from_config(sd_pipe.scheduler.config)
        sd_pipe = sd_pipe.to(DEVICE)

    return sd_pipe


def decode_data_url_to_image(data_url: str) -> Image.Image:
    if not data_url or "," not in data_url:
        raise gr.Error("Canvas is empty. Add assets to the scene first.")

    _, encoded = data_url.split(",", 1)
    binary = base64.b64decode(encoded)
    img = Image.open(io.BytesIO(binary)).convert("RGBA")
    return img


def flatten_rgba_on_white(img: Image.Image) -> Image.Image:
    bg = Image.new("RGBA", img.size, (255, 255, 255, 255))
    merged = Image.alpha_composite(bg, img.convert("RGBA"))
    return merged.convert("RGB")


def resize_for_depth_and_sd(img: Image.Image, target_max_side: int = 768) -> Image.Image:
    w, h = img.size
    scale = min(target_max_side / max(w, h), 1.0) if max(w, h) > 0 else 1.0

    new_w = max(64, int(round((w * scale) / 8) * 8))
    new_h = max(64, int(round((h * scale) / 8) * 8))

    if (new_w, new_h) == (w, h):
        return img

    return img.resize((new_w, new_h), Image.LANCZOS)


def make_depth_image(scene_image: Image.Image) -> Image.Image:
    estimator = get_depth_estimator()
    result = estimator(scene_image)

    depth = result["depth"]
    if not isinstance(depth, Image.Image):
        depth = Image.fromarray(depth)

    depth = depth.convert("RGB")
    if depth.size != scene_image.size:
        depth = depth.resize(scene_image.size, Image.LANCZOS)

    return depth


def save_image(img: Image.Image, prefix: str) -> str:
    path = SD_OUTPUTS_DIR / f"{prefix}_{uuid.uuid4().hex[:8]}.png"
    img.save(path)
    return str(path)


def generate_with_depth_from_scene(
    scene_png_data: str,
    prompt: str,
    negative_prompt: str,
    steps: int,
    guidance_scale: float,
    controlnet_scale: float,
    seed: int,
):
    prompt = (prompt or "").strip()
    if not prompt:
        raise gr.Error("Prompt is empty.")

    rgba_scene = decode_data_url_to_image(scene_png_data)

    # Только для depth-оценки. В сам SD эта картинка уже не идёт.
    scene_rgb = flatten_rgba_on_white(rgba_scene)
    scene_rgb = resize_for_depth_and_sd(scene_rgb, target_max_side=768)

    depth_image = make_depth_image(scene_rgb)

    pipe = get_sd_pipe()
    generator = torch.Generator(device=DEVICE).manual_seed(int(seed))

    result = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt or None,
        image=depth_image,
        num_inference_steps=int(steps),
        guidance_scale=float(guidance_scale),
        controlnet_conditioning_scale=float(controlnet_scale),
        generator=generator,
        width=depth_image.width,
        height=depth_image.height,
    )

    output_image = result.images[0]

    scene_path = save_image(scene_rgb, "scene_for_depth")
    depth_path = save_image(depth_image, "depth")
    output_path = save_image(output_image, "sd")

    gc.collect()
    if DEVICE == "cuda":
        torch.cuda.empty_cache()

    return scene_path, depth_path, output_path