"""
HuggingFace Space app for SPILL: Glass Keypoint Detection + 3D Reconstruction

Gradio demo with two tabs:
1. 2D Reconstruction — detect keypoints on uploaded images
2. 3D Reconstruction — monocular depth estimation + cylinder reconstruction

To deploy:
1. Create a new Space on HuggingFace with `gradio` SDK
2. Upload this directory contents
3. Or run: `huggingface-cli login` then push from terminal
"""
import os
import cv2
import numpy as np
import torch
import gradio as gr
from pathlib import Path

# Import SPILL library (installed via requirements.txt)
from spill import GlassDetector, Monocular3DReconstructor
from spill import depth_overlay, create_3d_figure, build_3d_info

# Model paths - checkpoints are stored in the repo
BASE_DIR = Path(__file__).parent
CHECKPOINT_PATH = BASE_DIR / "checkpoints" / "wild_glasses.ckpt"
YOLO_PATH = BASE_DIR / "checkpoints" / "yolov8m.pt"

# Detect device — default to CPU for safety (Blackwell GPU + old PyTorch issue).
# HF Spaces set SPILL_FORCE_CUDA=1 in the Dockerfile to enable GPU.
if os.environ.get("SPILL_FORCE_CUDA"):
    DEVICE = "cuda"
else:
    DEVICE = "cpu"

print(f"Loading SPILL models on {DEVICE}...")
detector = GlassDetector(
    keypoint_checkpoint=str(CHECKPOINT_PATH),
    yolo_model_path=str(YOLO_PATH),
    device=DEVICE,
)
print("2D models loaded!")

# Lazy-load 3D reconstructor on first use (saves GPU VRAM at startup)
_reconstructor = None


def get_reconstructor():
    global _reconstructor
    if _reconstructor is None:
        print("[3D] Lazy-loading 3D reconstructor...")
        _reconstructor = Monocular3DReconstructor(
            depth_model_size="large",
            device=DEVICE,
        )
    return _reconstructor


KP_COLORS = {
    "bottom_front": (255, 0, 0),      # Red
    "top_front": (0, 255, 0),          # Green
    "top_left": (0, 0, 255),           # Blue
    "top_right": (255, 0, 255),        # Magenta
    "fluid_level": (0, 255, 255),      # Cyan
    "fluid_level_2": (255, 255, 0),    # Yellow
}


def draw_detections(image, keypoints_list):
    """Draw keypoints and bounding boxes on the image."""
    output = image.copy()

    for idx, kp in enumerate(keypoints_list):
        # Draw original YOLO bounding box (no padding)
        x1, y1, x2, y2 = kp.bounding_box
        cv2.rectangle(output, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        cv2.putText(output, f"Glass #{idx+1}", (int(x1), int(y1) - 8),
                     cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # Draw keypoints (no text labels on image)
        all_kps = [
            ("bottom_front", kp.bottom_front),
            ("top_front", kp.top_front),
            ("top_left", kp.top_left),
            ("top_right", kp.top_right),
            ("fluid_level", kp.fluid_level),
            ("fluid_level_2", kp.fluid_level_2),
        ]

        for name, pt in all_kps:
            if pt is not None:
                color = KP_COLORS[name]
                cv2.circle(output, (int(pt[0]), int(pt[1])), 6, color, -1)
                cv2.circle(output, (int(pt[0]), int(pt[1])), 8, color, 1)

    return output


def detect_glasses_2d(image):
    """2D tab callback: detect glasses and return annotated image + info."""
    if image is None:
        return None, "Please upload an image."

    # Gradio gives us RGB, convert to BGR for OpenCV
    if isinstance(image, dict):
        image = image["image"]
    image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    # Detect
    keypoints_list = detector.detect(image_bgr)

    if not keypoints_list:
        output = image.copy()
        return output, "No glasses detected. Try an image with clear glasses/cups/wine glasses."

    # Draw
    output_rgb = draw_detections(image_bgr, keypoints_list)
    output_rgb = cv2.cvtColor(output_rgb, cv2.COLOR_BGR2RGB)

    # Build info text — per-keypoint pixel coordinates (CriticBarista style)
    info_lines = [f"Found {len(keypoints_list)} glass(es):\n"]
    for i, kp in enumerate(keypoints_list):
        info_lines.append(f"\n--- Glass #{i+1} ---")
        info_lines.append(f"  bottom_front:  ({kp.bottom_front[0]:.0f}, {kp.bottom_front[1]:.0f})")
        info_lines.append(f"  top_front:     ({kp.top_front[0]:.0f}, {kp.top_front[1]:.0f})")
        info_lines.append(f"  top_left:      ({kp.top_left[0]:.0f}, {kp.top_left[1]:.0f})")
        info_lines.append(f"  top_right:     ({kp.top_right[0]:.0f}, {kp.top_right[1]:.0f})")
        if kp.fluid_level is not None:
            info_lines.append(f"  fluid_level:   ({kp.fluid_level[0]:.0f}, {kp.fluid_level[1]:.0f})")
        if kp.fluid_level_2 is not None:
            info_lines.append(f"  fluid_level_2: ({kp.fluid_level_2[0]:.0f}, {kp.fluid_level_2[1]:.0f})")

    info = "\n".join(info_lines)
    return output_rgb, info


def detect_glasses_3d(image):
    """3D tab callback: full monocular 3D reconstruction."""
    if image is None:
        return None, None, None, "Please upload an image."

    if isinstance(image, dict):
        image = image["image"]
    image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    # Step 1: 2D keypoint detection
    keypoints_list = detector.detect(image_bgr)

    if not keypoints_list:
        output = image.copy()
        return (output, None, None,
                "No glasses detected. Try an image with clear glasses/cups/wine glasses.")

    # Step 2: Load 3D reconstructor (lazy)
    reconstructor = get_reconstructor()

    # Step 3: Full 3D reconstruction
    glasses, depth_map, info = reconstructor.reconstruct(image_bgr, keypoints_list)

    # Step 4: Annotated image (2D keypoints overlaid)
    annotated_rgb = draw_detections(image_bgr, keypoints_list)
    annotated_rgb = cv2.cvtColor(annotated_rgb, cv2.COLOR_BGR2RGB)

    # Step 5: Depth overlay
    depth_vis = depth_overlay(image, depth_map)

    # Step 6: 3D plot
    plot = create_3d_figure(glasses, info, image.shape)

    # Step 7: Info text
    info_text = build_3d_info(glasses, keypoints_list, info)

    return annotated_rgb, depth_vis, plot, info_text


# ── Build Gradio interface ──────────────────────────────────────

DESCRIPTION = """
# SPILL: Glass Detection & 3D Reconstruction

Detect **transparent glassware** in images using semantic keypoint detection — and reconstruct full 3D cylinders from a single RGB image.

**How it works:**
1. YOLOv8 detects glass bounding boxes (cups, vases, wine glasses)
2. A keypoint detector predicts structural points + fluid level on each glass
3. (3D tab) DepthAnythingV2 estimates monocular depth → RANSAC finds the table plane → cylinder estimation gives radius, height, tilt, and fluid level

**Key points:**
- 🔴 Bottom Front — base of the glass facing the camera
- 🟢 Top Front — rim edge facing the camera
- 🔵 Top Left — left edge of the rim
- 🟣 Top Right — right edge of the rim
- 🟡 Fluid Level — liquid surface detected by the model
- 🟠 Fluid Level (alt) — secondary fluid level candidate (shown when the model detects multiple peaks on the fluid level heatmap; useful when the first peak is uncertain, so downstream use cases can conservatively pick the highest or lowest value)

**3D Reconstruction** uses DepthAnythingV2-Large for monocular depth estimation, Open3D RANSAC for plane detection, and the SPILL cylinder solver for full 3D parameters.
"""

with gr.Blocks(title="SPILL Glass Detection & 3D Reconstruction") as demo:
    gr.Markdown(DESCRIPTION)

    with gr.Tabs():
        # ── 2D Detection Tab ──
        with gr.Tab("2D Reconstruction"):
            gr.Markdown(
                "### 2D Keypoint Detection\n\n"
                "Upload an image and see detected glass keypoints overlaid. "
                "No depth camera needed."
            )
            with gr.Row():
                input_2d = gr.Image(type="numpy", label="Upload Image", sources=["upload", "clipboard"])
                output_2d = gr.Image(type="numpy", label="Detection Result")
            detect_2d_btn = gr.Button("Detect Glasses", variant="primary")
            info_2d = gr.Textbox(label="Detection Info")
            detect_2d_btn.click(
                fn=detect_glasses_2d,
                inputs=input_2d,
                outputs=[output_2d, info_2d],
            )

        # ── 3D Reconstruction Tab ──
        with gr.Tab("3D Reconstruction"):
            gr.Markdown(
                "### Monocular 3D Reconstruction\n\n"
                "From a single RGB image: estimate depth (DepthAnythingV2), find the table plane (RANSAC), "
                "and reconstruct 3D glass cylinders — radius, height, tilt angle, and fluid level. "
                "No depth camera required!\n\n"
                "**Best results:** place the glass on a flat surface (table), keep the camera roughly level."
            )
            with gr.Row():
                input_3d = gr.Image(type="numpy", label="Upload Image", sources=["upload", "clipboard"])
                output_3d_annotated = gr.Image(type="numpy", label="Keypoints Overlay")
            with gr.Row():
                output_3d_depth = gr.Image(type="numpy", label="Depth Estimate")
                output_3d_plot = gr.Plot(label="3D Reconstruction")
            detect_3d_btn = gr.Button("Reconstruct 3D", variant="primary")
            info_3d = gr.Textbox(label="3D Reconstruction Info")
            detect_3d_btn.click(
                fn=detect_glasses_3d,
                inputs=input_3d,
                outputs=[output_3d_annotated, output_3d_depth, output_3d_plot, info_3d],
            )

    gr.Markdown("""
    ---
    **Paper:** [SPILL: Size, Pose, and Internal Liquid Level Estimation](https://github.com/Louadria/SPILL)
    | **Dataset:** [Glasses-in-the-Wild](https://doi.org/10.5281/zenodo.17288314)
    | **Code:** [Louadria/SPILL](https://github.com/Louadria/SPILL)
    """)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)