import gradio as gr
import numpy as np
import torch
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForDepthEstimation

# Load model and processor once at startup
MODEL_ID = "depth-anything/Depth-Anything-V2-Base-hf"
print(f"Loading model: {MODEL_ID}...")
image_processor = AutoImageProcessor.from_pretrained(MODEL_ID)
model = AutoModelForDepthEstimation.from_pretrained(MODEL_ID)
model.eval()
print("Model loaded.")


def estimate_depth(input_image):
    """
    Takes a PIL image, runs depth estimation, returns the depth map as a grayscale PIL image.
    """
    if input_image is None:
        raise gr.Error("Please upload an image.")

    # Resize so longest side is 518px (model's native resolution)
    w, h = input_image.size
    scale = 518 / max(w, h)
    new_w = round(w * scale)
    new_h = round(h * scale)
    resized = input_image.resize((new_w, new_h), Image.LANCZOS)

    # Preprocess
    inputs = image_processor(images=resized, return_tensors="pt")

    # Inference
    with torch.no_grad():
        outputs = model(**inputs)

    # Post-process: interpolate to resized image dimensions
    predicted_depth = outputs.predicted_depth.unsqueeze(1)
    predicted_depth = torch.nn.functional.interpolate(
        predicted_depth,
        size=(new_h, new_w),
        mode="bicubic",
        align_corners=False,
    ).squeeze()

    # Normalize to 0-255
    depth = predicted_depth.cpu().numpy()
    depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
    depth_image = Image.fromarray(depth.astype(np.uint8))

    return depth_image


# Build the Gradio interface
demo = gr.Interface(
    fn=estimate_depth,
    inputs=gr.Image(type="pil", label="Upload Image"),
    outputs=gr.Image(type="pil", label="Depth Map"),
    title="Depth Anything V2 — Depth Estimation",
    description="Upload an image to generate a depth map. Lighter areas are closer, darker areas are farther.",
    examples=[],
)

if __name__ == "__main__":
    demo.launch()