import os
import subprocess
import sys

import spaces
import torch
from optimization import optimize_pipeline_
from diffusers import QwenImageEditPlusPipeline
from PIL import Image
import gradio as gr

HF_BASE_MODEL = "Qwen/Qwen-Image-Edit-2509"
BFS_LORA = "Alissonerdx/BFS-Best-Face-Swap"
BFS_LORA_WEIGHT = "bfs_head_v3_qwen_image_edit_2509.safetensors"  # Head V3 (recommended)

# --------- PIPELINE (ZERO GPU) ---------

# Lưu device để dùng lại trong quá trình suy luận.
EXEC_DEVICE = "cpu"

# Cho phép ép dùng CPU nếu GPU yếu hoặc hay abort (ZeroGPU không ổn định).
FORCE_CPU = bool(int(os.getenv("FORCE_CPU", "1")))
# Chỉ bật GPU khi thực sự muốn (mặc định = 0 để tránh abort vì OOM trên ZeroGPU).
PREFER_GPU = bool(int(os.getenv("PREFER_GPU", "0")))


def ensure_torchvision():
    """
    Qwen2VLProcessor yêu cầu torchvision. Thử import, nếu thiếu sẽ cài đặt phiên bản khớp torch.
    """
    try:
        import torchvision  # noqa: F401
        return
    except ImportError:
        torch_version = torch.__version__.split("+")[0]
        try:
            subprocess.check_call(
                [sys.executable, "-m", "pip", "install", f"torchvision=={torch_version}"],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
            )
            import torchvision  # noqa: F401
        except Exception as exc:  # pragma: no cover - chỉ chạy trên hạ tầng Spaces
            raise ImportError(
                "Torchvision is required for Qwen2VLProcessor. "
                "Please add a matching torchvision to requirements (e.g. pip install torchvision==torch_version)."
            ) from exc


def _build_pipeline(device: str, dtype: torch.dtype):
    ensure_torchvision()

    pipe = QwenImageEditPlusPipeline.from_pretrained(
        HF_BASE_MODEL,
        torch_dtype=dtype,
    )
    pipe.to(device)

    # giảm chiếm dụng VRAM/RAM
    pipe.enable_attention_slicing()
    pipe.enable_vae_slicing()

    # load LoRA BFS Head V3
    pipe.load_lora_weights(
        BFS_LORA,
        weight_name=BFS_LORA_WEIGHT,
        adapter_name="bfs_head_v3",
    )
    pipe.set_adapters(["bfs_head_v3"], adapter_weights=[1.0])

    pipe.set_progress_bar_config(disable=True)
    return pipe


def maybe_optimize_pipeline(pipe):
    """
    Áp dụng AOTI tối ưu hóa trên GPU (nếu đang chạy CUDA).
    Dùng input dummy nhỏ để tránh tốn VRAM, fallback im lặng nếu lỗi.
    """
    if EXEC_DEVICE != "cuda":
        return pipe
    try:
        dummy = Image.new("RGB", (256, 256))
        generator = torch.Generator(device="cuda").manual_seed(0)
        optimize_pipeline_(
            pipe,
            image=[dummy, dummy],
            prompt="warmup",
            negative_prompt=" ",
            num_inference_steps=1,
            true_cfg_scale=1.0,
            guidance_scale=1.0,
            num_images_per_prompt=1,
            generator=generator,
            width=256,
            height=256,
        )
    except Exception:
        # Nếu tối ưu thất bại (thường do bộ nhớ), giữ pipeline gốc để tiếp tục chạy.
        pass
    return pipe


@spaces.GPU  # bắt buộc cho ZeroGPU
def load_pipeline():
    global EXEC_DEVICE

    # Mặc định chạy CPU để tránh GPU abort. Bật GPU bằng PREFER_GPU=1 và FORCE_CPU=0.
    prefer_cuda = torch.cuda.is_available() and PREFER_GPU and not FORCE_CPU
    device = "cuda" if prefer_cuda else "cpu"
    dtype = torch.float16 if device == "cuda" else torch.float32

    try:
        pipe = _build_pipeline(device, dtype)
        EXEC_DEVICE = device
        pipe = maybe_optimize_pipeline(pipe)
        return pipe
    except Exception as exc:
        # GPU worker thường abort vì OOM. Fallback CPU để không crash app.
        if device == "cuda":
            device = "cpu"
            dtype = torch.float32
            pipe = _build_pipeline(device, dtype)
            EXEC_DEVICE = device
            return pipe
        raise exc


pipe = load_pipeline()


# --------- UTILITIES ---------
def resize_to_max(img: Image.Image, max_side: int = 896) -> Image.Image:
    w, h = img.size
    max_dim = max(w, h)
    if max_dim <= max_side:
        return img  # không upscale

    scale = max_side / max_dim
    new_w = int(w * scale)
    new_h = int(h * scale)
    return img.resize((new_w, new_h), Image.Resampling.LANCZOS)


DEFAULT_PROMPT = (
    "head_swap: start with Picture 1 as the base image, keeping its lighting, "
    "environment, and background. remove the head from Picture 1 completely and "
    "replace it with the head from Picture 2. ensure the head and body have correct "
    "anatomical proportions, and blend the skin tones, shadows, and lighting naturally "
    "so the final result appears as one coherent, realistic person."
)


# --------- INFERENCE FUNCTION ---------
def run_bfs(
    body_image,   # Picture 1 (body)
    face_image,   # Picture 2 (face)
    prompt_text,
    steps,
    true_cfg_scale,
    guidance_scale,
    seed,
):
    if body_image is None or face_image is None:
        return None, "⚠️ Cần upload đủ 2 ảnh: Picture 1 (body) và Picture 2 (face)."

    # BFS Head V3: Image 1 = body, Image 2 = face
    body_image = resize_to_max(body_image)
    face_image = resize_to_max(face_image)

    if not str(prompt_text).strip():
        prompt = DEFAULT_PROMPT
    else:
        prompt = prompt_text.strip()

    generator = torch.Generator(device=EXEC_DEVICE).manual_seed(int(seed))

    inputs = {
        "image": [body_image, face_image],   # rất quan trọng: [body, face]
        "prompt": prompt,
        "negative_prompt": " ",
        "num_inference_steps": int(steps),
        "true_cfg_scale": float(true_cfg_scale),
        "guidance_scale": float(guidance_scale),
        "num_images_per_prompt": 1,
        "generator": generator,
        "width": body_image.width,
        "height": body_image.height,
    }

    with torch.inference_mode():
        out = pipe(**inputs)

    return out.images[0], ""


# --------- GRADIO UI ---------
with gr.Blocks(title="BFS - Best Face Swap (Qwen Image Edit 2509, CPU)") as demo:
    gr.Markdown(
        """
# 🧠 BFS - Best Face Swap (Qwen Image Edit 2509, CPU)

**BFS Head V3** – Picture 1 = **Body**, Picture 2 = **Face**.  
Model chạy trên **CPU (zero GPU)** nên sẽ hơi chậm, ưu tiên ảnh vừa phải (≤ 896px cạnh dài).

> Vui lòng không dùng cho người thật / người nổi tiếng ngoài đời.
        """
    )

    with gr.Row():
        with gr.Column():
            body_image = gr.Image(
                label="Picture 1 - BODY (ảnh gốc, giữ background)",
                type="pil",
            )
            face_image = gr.Image(
                label="Picture 2 - FACE (ảnh mặt muốn ghép)",
                type="pil",
            )

            prompt_box = gr.Textbox(
                label="Prompt (để trống dùng prompt BFS Head V3 mặc định)",
                value="",
                lines=4,
            )

            steps = gr.Slider(
                label="Steps",
                minimum=8,
                maximum=40,
                value=24,
                step=1,
            )
            true_cfg_scale = gr.Slider(
                label="True CFG Scale",
                minimum=0.0,
                maximum=10.0,
                value=4.0,
                step=0.1,
            )
            guidance_scale = gr.Slider(
                label="Guidance Scale",
                minimum=0.0,
                maximum=8.0,
                value=1.0,
                step=0.1,
            )
            seed = gr.Number(
                label="Seed",
                value=0,
                precision=0,
            )

            run_button = gr.Button("🚀 Run Face / Head Swap", variant="primary")

        with gr.Column():
            output_image = gr.Image(
                label="Kết quả",
                type="pil",
            )
            info = gr.Markdown("")

    run_button.click(
        fn=run_bfs,
        inputs=[body_image, face_image, prompt_box, steps, true_cfg_scale, guidance_scale, seed],
        outputs=[output_image, info],
    )

if __name__ == "__main__":
    demo.launch()