File size: 1,763 Bytes
752261f
 
 
 
6a7137a
 
 
44880d3
6a7137a
44880d3
752261f
ead7363
 
 
44880d3
752261f
47daa43
 
 
752261f
efd8064
47daa43
752261f
47daa43
 
 
 
6f6cfaa
752261f
6f6cfaa
c2f34c3
6a7137a
 
47daa43
 
 
 
 
 
 
752261f
5a70d37
6a7137a
 
752261f
 
 
 
 
ead7363
752261f
 
6a7137a
 
47daa43
 
 
 
 
752261f
6a7137a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import io
import cv2
import numpy as np
import torch
from fastapi import FastAPI, UploadFile
from PIL import Image
from depth_anything_v2.dpt import DepthAnythingV2

app = FastAPI()

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
ENCODER  = "vitl"
DATASET  = "hypersim"
MAX_DEPTH = 20

MODEL_CONFIGS = {
    "vits": {"encoder": "vits", "features": 64,  "out_channels": [48, 96, 192, 384]},
    "vitb": {"encoder": "vitb", "features": 128, "out_channels": [96, 192, 384, 768]},
    "vitl": {"encoder": "vitl", "features": 256, "out_channels": [256, 512, 1024, 1024]},
}

model = DepthAnythingV2(**{**MODEL_CONFIGS[ENCODER], "max_depth": MAX_DEPTH})
model.load_state_dict(
    torch.load(
        f"/app/checkpoints/depth_anything_v2_metric_{DATASET}_{ENCODER}.pth",
        map_location="cpu"
    )
)
model = model.to(DEVICE).eval()


@app.get("/")
def root():
    return {
        "message": "Depth Anything V2 Metric API running",
        "device": DEVICE,
        "encoder": ENCODER,
        "dataset": DATASET,
        "max_depth_meters": MAX_DEPTH,
    }


@app.post("/depth")
async def depth(file: UploadFile):
    contents = await file.read()
    pil_img = Image.open(io.BytesIO(contents)).convert("RGB")
    img_np = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)

    with torch.inference_mode():
        depth_map = model.infer_image(img_np)

    h, w = depth_map.shape

    return {
        "unit": "meters",
        "closest_distance_m":  round(float(np.min(depth_map)), 3),
        "farthest_distance_m": round(float(np.max(depth_map)), 3),
        "mean_distance_m":     round(float(np.mean(depth_map)), 3),
        "center_distance_m":   round(float(depth_map[h // 2, w // 2]), 3),
        "image_size": {"width": w, "height": h},
    }