from __future__ import annotations

import os
import sys
from pathlib import Path

try:
    import spaces
except ImportError:
    class _SpacesCompat:
        @staticmethod
        def GPU(*decorator_args, **decorator_kwargs):
            if decorator_args and callable(decorator_args[0]) and len(decorator_args) == 1 and not decorator_kwargs:
                return decorator_args[0]

            def decorator(fn):
                return fn

            return decorator

    spaces = _SpacesCompat()

import gradio as gr
import torch

CURRENT_FILE = Path(__file__).resolve()
PROJECT_ROOT = CURRENT_FILE.parents[1]
for candidate in (CURRENT_FILE.parent, CURRENT_FILE.parents[1]):
    if (candidate / "infer").exists() and (candidate / "models").exists():
        PROJECT_ROOT = candidate
        break
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from demo.real_world_pipeline import (  # noqa: E402
    DEFAULT_BBOX_MODEL,
    DEFAULT_REAL_CONFIG_PATH,
    DEFAULT_RUN_NAME,
    DEFAULT_WORK_DIR,
    run_real_world_pipeline,
)

DEFAULT_EXAMPLE_DIR = Path(
    os.environ.get(
        "SYNLAYERS_EXAMPLE_DIR",
        "/project/llmsvgen/share/data/kmw_layered_dataset/real_world_inference/layers_real_test_1024",
    )
)


def read_int_env(name: str, default: int) -> int:
    raw = os.environ.get(name)
    if raw is None:
        return default
    try:
        return int(raw)
    except ValueError:
        return default


ZERO_GPU_SIZE = (os.environ.get("SYNLAYERS_ZERO_GPU_SIZE", "large").strip() or "large").lower()
ZERO_GPU_DURATION = max(60, read_int_env("SYNLAYERS_ZERO_GPU_DURATION", 900))


def list_example_images(limit: int = 6) -> list[list[str]]:
    if not DEFAULT_EXAMPLE_DIR.exists():
        return []

    candidates = []
    for ext in ("*.png", "*.jpg", "*.jpeg", "*.webp"):
        candidates.extend(DEFAULT_EXAMPLE_DIR.glob(ext))
    candidates = sorted(candidates)[:limit]
    return [[str(path)] for path in candidates]


def build_gallery(result: dict) -> list[tuple[str, str]]:
    gallery: list[tuple[str, str]] = []
    if result.get("whole_image_rgba"):
        gallery.append((result["whole_image_rgba"], "Whole RGBA"))
    if result.get("background_rgba"):
        gallery.append((result["background_rgba"], "Background RGBA"))
    for idx, path in enumerate(result.get("layer_images", [])):
        gallery.append((path, f"Layer {idx}"))
    return gallery


def get_gpu_name() -> str:
    if not torch.cuda.is_available():
        return "None"
    try:
        return torch.cuda.get_device_name(torch.cuda.current_device())
    except Exception as exc:  # pragma: no cover - defensive runtime reporting
        return f"Unavailable ({exc})"


def is_zero_gpu_space() -> bool:
    accelerator = os.environ.get("ACCELERATOR", "").lower()
    return (
        os.environ.get("ZEROGPU_V2", "").lower() == "true"
        or os.environ.get("ZERO_GPU_PATCH_TORCH_DEVICE") == "1"
        or accelerator == "zerogpu"
        or accelerator.startswith("zero")
    )


def get_runtime_status_markdown() -> str:
    accelerator = os.environ.get("ACCELERATOR", "unknown")
    space_id = os.environ.get("SPACE_ID", "local")
    model_repo = os.environ.get("SYNLAYERS_MODEL_REPO", "(unset)")
    zero_gpu_enabled = is_zero_gpu_space()

    lines = ["## Runtime Status", f"- `SPACE_ID`: `{space_id}`", f"- `ACCELERATOR`: `{accelerator}`"]

    if zero_gpu_enabled:
        lines.extend(
            [
                f"- `ZeroGPU mode`: `True`",
                f"- `Requested GPU size`: `{ZERO_GPU_SIZE}`",
                f"- `Requested max duration`: `{ZERO_GPU_DURATION}` seconds",
                f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`",
                f"- `CUDA probe outside @spaces.GPU`: `{torch.cuda.is_available()}`",
                "",
                "This Space is configured for Hugging Face ZeroGPU.",
                "A shared H200 GPU is requested on demand when you click `Run Full Pipeline`.",
                "Queueing and quota are managed by Hugging Face ZeroGPU, not by an in-app GPU selector.",
            ]
        )
    else:
        cuda_available = torch.cuda.is_available()
        lines.extend(
            [
                f"- `CUDA available`: `{cuda_available}`",
                f"- `GPU device`: `{get_gpu_name()}`",
                f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`",
                "",
            ]
        )

        if accelerator == "none" or not cuda_available:
            lines.extend(
                [
                    "This Space is not currently running with a usable CUDA GPU.",
                    "The GPU type must be chosen by the Space owner in Hugging Face `Settings -> Hardware`.",
                    "Visitors cannot switch GPUs from inside the Gradio app.",
                ]
            )
        else:
            lines.append("The CUDA runtime is available and the full SynLayers pipeline can run here.")

    return "\n".join(lines)


@spaces.GPU(duration=ZERO_GPU_DURATION, size=ZERO_GPU_SIZE)
def run_demo_inference(
    image_path: str,
    sample_name: str,
    max_new_tokens: int,
    seed_value: float,
) -> dict:
    seed = int(seed_value) if seed_value >= 0 else None
    return run_real_world_pipeline(
        image_path=image_path,
        sample_name=sample_name or None,
        work_dir=DEFAULT_WORK_DIR,
        bbox_model=DEFAULT_BBOX_MODEL,
        config_path=DEFAULT_REAL_CONFIG_PATH,
        max_new_tokens=int(max_new_tokens),
        seed=seed,
        run_name=DEFAULT_RUN_NAME,
    )


def run_demo(
    image_path: str,
    sample_name: str,
    max_new_tokens: int,
    seed_value: float,
):
    if not image_path:
        raise gr.Error("Please upload an input image first.")

    try:
        result = run_demo_inference(
            image_path=image_path,
            sample_name=sample_name,
            max_new_tokens=max_new_tokens,
            seed_value=seed_value,
        )
    except Exception as exc:
        raise gr.Error(str(exc)) from exc

    return (
        result["bbox_visualization"],
        result["merged_image"],
        result["bbox_record"].get("whole_caption", ""),
        result["bbox_record"],
        result["metadata"],
        build_gallery(result),
        result["archive_path"],
        result["case_dir"],
    )


with gr.Blocks(title="SynLayers Real-World Demo") as demo:
    gr.Markdown(
        """
        # SynLayers Real-World Decomposition

        Upload a single image and run the full pipeline in one step:
        1. VLM for whole-caption + bounding-box detection
        2. SynLayers real-image layer decomposition

        This Space can run either on a dedicated GPU Space or on Hugging Face ZeroGPU.
        The first request may take time while model assets are loaded from Hugging Face.

        In ZeroGPU mode, a shared GPU is requested only while inference is running.
        """
    )
    runtime_status = gr.Markdown(get_runtime_status_markdown())
    refresh_status_button = gr.Button("Refresh Runtime Status")

    with gr.Row():
        with gr.Column(scale=1):
            image_input = gr.Image(type="filepath", label="Input Image")
            sample_name_input = gr.Textbox(
                label="Optional Sample Name",
                placeholder="Leave empty to use the uploaded filename",
            )
            max_new_tokens_input = gr.Slider(
                minimum=128,
                maximum=2048,
                value=1024,
                step=64,
                label="VLM Max New Tokens",
            )
            seed_input = gr.Number(
                value=42,
                precision=0,
                label="Seed (-1 keeps config default)",
            )
            run_button = gr.Button("Run Full Pipeline", variant="primary")

        with gr.Column(scale=1):
            bbox_vis_output = gr.Image(type="filepath", label="Detected Bounding Boxes")
            merged_output = gr.Image(type="filepath", label="Merged Decomposition")

    caption_output = gr.Textbox(label="Whole Caption", lines=6)
    with gr.Row():
        bbox_json_output = gr.JSON(label="BBox JSON")
        meta_json_output = gr.JSON(label="Inference Metadata")
    layer_gallery = gr.Gallery(label="Predicted Layers", columns=4, height="auto")
    with gr.Row():
        archive_output = gr.File(label="Download Result Bundle")
        case_dir_output = gr.Textbox(label="Saved Case Directory")

    examples = list_example_images()
    if examples:
        gr.Examples(examples=examples, inputs=[image_input], label="Example Images")

    refresh_status_button.click(
        fn=get_runtime_status_markdown,
        outputs=runtime_status,
    )

    run_button.click(
        fn=run_demo,
        inputs=[
            image_input,
            sample_name_input,
            max_new_tokens_input,
            seed_input,
        ],
        outputs=[
            bbox_vis_output,
            merged_output,
            caption_output,
            bbox_json_output,
            meta_json_output,
            layer_gallery,
            archive_output,
            case_dir_output,
        ],
    )


if __name__ == "__main__":
    demo.queue().launch(
        server_name="0.0.0.0",
        server_port=int(os.environ.get("PORT", "7860")),
    )