import io
import cv2
import numpy as np
import torch
from fastapi import FastAPI, UploadFile
from PIL import Image
from depth_anything_v2.dpt import DepthAnythingV2

app = FastAPI()

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
ENCODER  = "vitl"
DATASET  = "hypersim"
MAX_DEPTH = 20

MODEL_CONFIGS = {
    "vits": {"encoder": "vits", "features": 64,  "out_channels": [48, 96, 192, 384]},
    "vitb": {"encoder": "vitb", "features": 128, "out_channels": [96, 192, 384, 768]},
    "vitl": {"encoder": "vitl", "features": 256, "out_channels": [256, 512, 1024, 1024]},
}

model = DepthAnythingV2(**{**MODEL_CONFIGS[ENCODER], "max_depth": MAX_DEPTH})
model.load_state_dict(
    torch.load(
        f"/app/checkpoints/depth_anything_v2_metric_{DATASET}_{ENCODER}.pth",
        map_location="cpu"
    )
)
model = model.to(DEVICE).eval()


@app.get("/")
def root():
    return {
        "message": "Depth Anything V2 Metric API running",
        "device": DEVICE,
        "encoder": ENCODER,
        "dataset": DATASET,
        "max_depth_meters": MAX_DEPTH,
    }


@app.post("/depth")
async def depth(file: UploadFile):
    contents = await file.read()
    pil_img = Image.open(io.BytesIO(contents)).convert("RGB")
    img_np = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)

    with torch.inference_mode():
        depth_map = model.infer_image(img_np)

    h, w = depth_map.shape

    return {
        "unit": "meters",
        "closest_distance_m":  round(float(np.min(depth_map)), 3),
        "farthest_distance_m": round(float(np.max(depth_map)), 3),
        "mean_distance_m":     round(float(np.mean(depth_map)), 3),
        "center_distance_m":   round(float(depth_map[h // 2, w // 2]), 3),
        "image_size": {"width": w, "height": h},
    }