import numpy as np
import torch
from PIL import Image


_depth_cache = None


def get_depth_model():
    global _depth_cache
    if _depth_cache is None:
        from transformers import pipeline as hf_pipeline
        # Depth Pro from Apple — best metric depth, ~600MB
        _depth_cache = hf_pipeline(
            "depth-estimation",
            model="apple/DepthPro-hf",
            device=0 if torch.cuda.is_available() else -1,
        )
    return _depth_cache


def estimate_depth(image: Image.Image) -> dict:
    """
    Returns {"depth": [[float]], "width": int, "height": int, "min": float, "max": float}
    Depth values are metric (meters) when Depth Pro is used.
    """
    pipe = get_depth_model()
    result = pipe(image)
    depth_map = result["depth"]  # PIL image or numpy array

    if isinstance(depth_map, Image.Image):
        arr = np.array(depth_map).astype(np.float32)
    else:
        arr = np.array(depth_map, dtype=np.float32)

    # Resize to match source image if needed
    if arr.shape[:2] != (image.height, image.width):
        depth_pil = Image.fromarray(arr).resize(
            (image.width, image.height), Image.BILINEAR
        )
        arr = np.array(depth_pil)

    dmin = float(arr.min())
    dmax = float(arr.max())

    return {
        "depth": arr.tolist(),
        "width": image.width,
        "height": image.height,
        "min": dmin,
        "max": dmax,
    }