import spaces  # Doit être importé AVANT torch / diffusers sur ZeroGPU

import gradio as gr
import torch
import numpy as np
from PIL import Image
import cv2

from huggingface_hub import hf_hub_download

from diffusers.models import ControlNetModel
from pipeline_stable_diffusion_xl_instantid import (
    StableDiffusionXLInstantIDPipeline,
    draw_kps,
)

from insightface.app import FaceAnalysis


# ---------------------------
# Config globale
# ---------------------------

# ⚠️ Mets ici TON repo diffusers ArtFusion (après conversion + push_to_hub)
BASE_MODEL_ID = "Leteint/artfusionXLReal_v16Lightning"
#BASE_MODEL_ID = "Niggendar/autismmixSDXL_autismmixPony"
#BASE_MODEL_ID = "Meina/MeinaPastel_V7"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32

CHECKPOINT_DIR = "./checkpoints"


# ---------------------------
# Téléchargement des poids InstantID
# ---------------------------

def download_checkpoints():
    # ControlNet InstantID (dossier ControlNetModel complet)
    hf_hub_download(
        repo_id="InstantX/InstantID",
        filename="ControlNetModel/config.json",
        local_dir=CHECKPOINT_DIR,
        local_dir_use_symlinks=False,
    )
    hf_hub_download(
        repo_id="InstantX/InstantID",
        filename="ControlNetModel/diffusion_pytorch_model.safetensors",
        local_dir=CHECKPOINT_DIR,
        local_dir_use_symlinks=False,
    )

    # IP-Adapter InstantID
    hf_hub_download(
        repo_id="InstantX/InstantID",
        filename="ip-adapter.bin",
        local_dir=CHECKPOINT_DIR,
        local_dir_use_symlinks=False,
    )


download_checkpoints()

CONTROLNET_PATH = f"{CHECKPOINT_DIR}/ControlNetModel"
IP_ADAPTER_PATH = f"{CHECKPOINT_DIR}/ip-adapter.bin"


# ---------------------------
# InsightFace (ID de visage)
# ---------------------------

def setup_face_analyzer():
    # CPU pour éviter les embrouilles GPU dans ZeroGPU
    app = FaceAnalysis(name="buffalo_l")
    app.prepare(ctx_id=-1)
    return app


face_app = setup_face_analyzer()


def get_face_info(image: Image.Image):
    """Retourne (embedding, keypoints) pour le plus grand visage de l'image."""
    img = np.array(image.convert("RGB"))
    img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    faces = face_app.get(img_bgr)
    if len(faces) == 0:
        raise RuntimeError("Aucun visage détecté sur l'image.")

    # On prend le plus grand visage
    face = sorted(
        faces,
        key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
        reverse=True,
    )[0]

    # Embedding
    emb = getattr(face, "normed_embedding", None)
    if emb is None and isinstance(face, dict):
        emb = face.get("embedding", None)
    if emb is None:
        raise RuntimeError("Impossible de récupérer l'embedding du visage.")

    # Keypoints
    kps = getattr(face, "kps", None)
    if kps is None and isinstance(face, dict):
        kps = face.get("kps", None)
    if kps is None:
        raise RuntimeError("Impossible de récupérer les keypoints du visage.")

    return np.array(emb, dtype=np.float32), np.array(kps)


# ---------------------------
# Chargement du pipeline InstantID SDXL + ArtFusion
# ---------------------------

def load_pipeline():
    # ControlNet InstantID
    controlnet = ControlNetModel.from_pretrained(
        CONTROLNET_PATH,
        torch_dtype=DTYPE,
    )

    # Pipeline InstantID SDXL avec ArtFusion comme base
    pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
        BASE_MODEL_ID,
        controlnet=controlnet,
        torch_dtype=DTYPE,
    )

    if DEVICE == "cuda":
        pipe.to("cuda")
    else:
        pipe.to("cpu")

    # IP-Adapter InstantID
    pipe.load_ip_adapter_instantid(IP_ADAPTER_PATH)
    pipe.set_ip_adapter_scale(0.6)

    return pipe


pipe = load_pipeline()


# ---------------------------
# Fonction de génération (ZeroGPU)
# ---------------------------

@spaces.GPU
def generate(face_image, prompt, negative_prompt="", steps=30, guidance_scale=5, height=1024, width=768):
    """
    face_image : image contenant le visage de référence
    prompt : description du corps, tenue, décor, style
    """

    try:
        if face_image is None:
            raise gr.Error("Merci de fournir une image de visage.")

        # On s'assure que le pipe est bien sur le bon device
        if DEVICE == "cuda":
            pipe.to("cuda")
        else:
            pipe.to("cpu")

        # 1) Embedding + keypoints du visage
        face_emb, face_kps = get_face_info(face_image)   # (512,), (5,2) typiquement
        face_emb_batch = face_emb[None]                  # (1,512)

        # 2) Génération de l'image de keypoints (condition ControlNet)
        kps_image = draw_kps(face_image, face_kps)       # PIL.Image, comme dans l'exemple officiel

        # 3) Appel du pipeline InstantID SDXL
        out = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            image=kps_image,
            image_embeds=face_emb_batch,
            num_inference_steps=int(steps),
            guidance_scale=float(guidance_scale),
            controlnet_conditioning_scale=0.4,
            height=int(height),
            width=int(width),
        )

        return out.images[0]

    except Exception as e:
        import traceback
        traceback.print_exc()
        raise gr.Error(str(e))


# ---------------------------
# UI Gradio
# ---------------------------

with gr.Blocks() as demo:
    gr.Markdown("## InstantID + ArtFusion XL Real (SDXL) – Visage → corps généré (ZeroGPU)")

    with gr.Row():
        with gr.Column():
            face_img = gr.Image(type="pil", label="Image visage (référence ID)")

            prompt = gr.Textbox(
                label="Prompt",
                lines=3,
                value=(
                    "photorealistic full body portrait, white European man, realistic skin texture, "
                    "firefighter uniform with detailed fabric, realistic studio lighting, 35mm DSLR, "
                    "sharp focus on face, clean background, high resolution"
                ),
            )

            neg_prompt = gr.Textbox(
                label="Negative",
                value=(
                    "cartoon, anime, painting, illustration, lowres, blurry, deformed, bad anatomy, "
                    "extra limbs, waxy skin, oversharpen, text, watermark"
                ),
            )

            steps = gr.Slider(5, 60, 30, step=1, label="Steps")
            guidance = gr.Slider(1.0, 10.0, 5.0, step=0.5, label="Guidance scale")
            height = gr.Slider(640, 1536, 1024, step=64, label="Height")
            width = gr.Slider(640, 1024, 768, step=64, label="Width")

            btn = gr.Button("Generate")

        with gr.Column():
            output = gr.Image(label="Result")

    btn.click(
        generate,
        [face_img, prompt, neg_prompt, steps, guidance, height, width],
        output,
    )

demo.launch()