rezzzq
/

da3-depth-api

Model card Files Files and versions

xet

Community

rezzzq commited on Dec 16, 2025

Commit

e561725

1 Parent(s): b91aefd

Add custom inference handler for DA3METRIC-LARGE depth estimation

Browse files

Files changed (2) hide show

handler.py +96 -0
requirements.txt +4 -0

handler.py ADDED Viewed

	@@ -0,0 +1,96 @@

+"""
+Custom handler for Hugging Face Inference Endpoints.
+Serves the Depth Anything V3 Metric Large model for depth estimation.
+"""
+import base64
+import io
+from typing import Any
+import numpy as np
+import torch
+from PIL import Image
+class EndpointHandler:
+    def __init__(self, path: str = ""):
+        """
+        Initialize the depth estimation model.
+        Args:
+            path: Path to the model directory (provided by HF Inference Endpoints)
+        """
+        from depth_anything_3.api import DepthAnything3
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model = DepthAnything3.from_pretrained("depth-anything/da3metric-large")
+        self.model = self.model.to(device=self.device)
+    def __call__(self, data: dict[str, Any]) -> dict[str, Any]:
+        """
+        Process incoming requests for depth estimation.
+        Args:
+            data: Request payload with 'inputs' containing base64 image(s)
+        Returns:
+            Dictionary with depth map, confidence, intrinsics, extrinsics
+        """
+        inputs = data.get("inputs")
+        # Handle base64 encoded image input
+        if isinstance(inputs, str):
+            # Single base64 image
+            image_data = base64.b64decode(inputs)
+            image = Image.open(io.BytesIO(image_data)).convert("RGB")
+            images = [image]
+        elif isinstance(inputs, dict) and "image" in inputs:
+            # Dict with image key
+            image_data = base64.b64decode(inputs["image"])
+            image = Image.open(io.BytesIO(image_data)).convert("RGB")
+            images = [image]
+        elif isinstance(inputs, list):
+            # List of base64 images
+            images = []
+            for img_b64 in inputs:
+                image_data = base64.b64decode(img_b64)
+                image = Image.open(io.BytesIO(image_data)).convert("RGB")
+                images.append(image)
+        else:
+            return {"error": "Invalid input format. Expected base64 encoded image(s)."}
+        # Run inference
+        with torch.inference_mode():
+            prediction = self.model.inference(images)
+        # Extract results
+        depth = prediction.depth.cpu().numpy()  # [N, H, W]
+        conf = prediction.conf.cpu().numpy()    # [N, H, W]
+        intrinsics = prediction.intrinsics.cpu().numpy()  # [N, 3, 3]
+        extrinsics = prediction.extrinsics.cpu().numpy()  # [N, 3, 4]
+        # Return base64-encoded numpy arrays
+        response = {
+            "depth": self._encode_array(depth),
+            "confidence": self._encode_array(conf),
+            "intrinsics": self._encode_array(intrinsics),
+            "extrinsics": self._encode_array(extrinsics),
+            "shape": {
+                "depth": list(depth.shape),
+                "confidence": list(conf.shape),
+                "intrinsics": list(intrinsics.shape),
+                "extrinsics": list(extrinsics.shape),
+            },
+            "depth_range": {
+                "min": float(depth.min()),
+                "max": float(depth.max()),
+            },
+        }
+        return response
+    def _encode_array(self, arr: np.ndarray) -> str:
+        """Encode numpy array as base64 string."""
+        buffer = io.BytesIO()
+        np.save(buffer, arr.astype(np.float32))
+        return base64.b64encode(buffer.getvalue()).decode("utf-8")

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+depth-anything-3
+torch
+pillow
+numpy