rezzzq commited on
Commit
e561725
·
1 Parent(s): b91aefd

Add custom inference handler for DA3METRIC-LARGE depth estimation

Browse files
Files changed (2) hide show
  1. handler.py +96 -0
  2. requirements.txt +4 -0
handler.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Custom handler for Hugging Face Inference Endpoints.
3
+ Serves the Depth Anything V3 Metric Large model for depth estimation.
4
+ """
5
+
6
+ import base64
7
+ import io
8
+ from typing import Any
9
+
10
+ import numpy as np
11
+ import torch
12
+ from PIL import Image
13
+
14
+
15
+ class EndpointHandler:
16
+ def __init__(self, path: str = ""):
17
+ """
18
+ Initialize the depth estimation model.
19
+
20
+ Args:
21
+ path: Path to the model directory (provided by HF Inference Endpoints)
22
+ """
23
+ from depth_anything_3.api import DepthAnything3
24
+
25
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
+ self.model = DepthAnything3.from_pretrained("depth-anything/da3metric-large")
27
+ self.model = self.model.to(device=self.device)
28
+
29
+ def __call__(self, data: dict[str, Any]) -> dict[str, Any]:
30
+ """
31
+ Process incoming requests for depth estimation.
32
+
33
+ Args:
34
+ data: Request payload with 'inputs' containing base64 image(s)
35
+
36
+ Returns:
37
+ Dictionary with depth map, confidence, intrinsics, extrinsics
38
+ """
39
+ inputs = data.get("inputs")
40
+
41
+ # Handle base64 encoded image input
42
+ if isinstance(inputs, str):
43
+ # Single base64 image
44
+ image_data = base64.b64decode(inputs)
45
+ image = Image.open(io.BytesIO(image_data)).convert("RGB")
46
+ images = [image]
47
+ elif isinstance(inputs, dict) and "image" in inputs:
48
+ # Dict with image key
49
+ image_data = base64.b64decode(inputs["image"])
50
+ image = Image.open(io.BytesIO(image_data)).convert("RGB")
51
+ images = [image]
52
+ elif isinstance(inputs, list):
53
+ # List of base64 images
54
+ images = []
55
+ for img_b64 in inputs:
56
+ image_data = base64.b64decode(img_b64)
57
+ image = Image.open(io.BytesIO(image_data)).convert("RGB")
58
+ images.append(image)
59
+ else:
60
+ return {"error": "Invalid input format. Expected base64 encoded image(s)."}
61
+
62
+ # Run inference
63
+ with torch.inference_mode():
64
+ prediction = self.model.inference(images)
65
+
66
+ # Extract results
67
+ depth = prediction.depth.cpu().numpy() # [N, H, W]
68
+ conf = prediction.conf.cpu().numpy() # [N, H, W]
69
+ intrinsics = prediction.intrinsics.cpu().numpy() # [N, 3, 3]
70
+ extrinsics = prediction.extrinsics.cpu().numpy() # [N, 3, 4]
71
+
72
+ # Return base64-encoded numpy arrays
73
+ response = {
74
+ "depth": self._encode_array(depth),
75
+ "confidence": self._encode_array(conf),
76
+ "intrinsics": self._encode_array(intrinsics),
77
+ "extrinsics": self._encode_array(extrinsics),
78
+ "shape": {
79
+ "depth": list(depth.shape),
80
+ "confidence": list(conf.shape),
81
+ "intrinsics": list(intrinsics.shape),
82
+ "extrinsics": list(extrinsics.shape),
83
+ },
84
+ "depth_range": {
85
+ "min": float(depth.min()),
86
+ "max": float(depth.max()),
87
+ },
88
+ }
89
+
90
+ return response
91
+
92
+ def _encode_array(self, arr: np.ndarray) -> str:
93
+ """Encode numpy array as base64 string."""
94
+ buffer = io.BytesIO()
95
+ np.save(buffer, arr.astype(np.float32))
96
+ return base64.b64encode(buffer.getvalue()).decode("utf-8")
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ depth-anything-3
2
+ torch
3
+ pillow
4
+ numpy