""" Custom handler for Hugging Face Inference Endpoints. Serves the Depth Anything V3 Metric Large model for depth estimation. """ import base64 import io from typing import Any import numpy as np import torch from PIL import Image class EndpointHandler: def __init__(self, path: str = ""): """ Initialize the depth estimation model. Args: path: Path to the model directory (provided by HF Inference Endpoints) """ from depth_anything_3.api import DepthAnything3 self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model = DepthAnything3.from_pretrained("depth-anything/da3metric-large") self.model = self.model.to(device=self.device) def __call__(self, data: dict[str, Any]) -> dict[str, Any]: """ Process incoming requests for depth estimation. Args: data: Request payload with 'inputs' containing base64 image(s) Returns: Dictionary with depth map, confidence, intrinsics, extrinsics """ inputs = data.get("inputs") # Handle base64 encoded image input if isinstance(inputs, str): # Single base64 image image_data = base64.b64decode(inputs) image = Image.open(io.BytesIO(image_data)).convert("RGB") images = [image] elif isinstance(inputs, dict) and "image" in inputs: # Dict with image key image_data = base64.b64decode(inputs["image"]) image = Image.open(io.BytesIO(image_data)).convert("RGB") images = [image] elif isinstance(inputs, list): # List of base64 images images = [] for img_b64 in inputs: image_data = base64.b64decode(img_b64) image = Image.open(io.BytesIO(image_data)).convert("RGB") images.append(image) else: return {"error": "Invalid input format. Expected base64 encoded image(s)."} # Run inference with torch.inference_mode(): prediction = self.model.inference(images) # Extract results depth = prediction.depth.cpu().numpy() # [N, H, W] conf = prediction.conf.cpu().numpy() # [N, H, W] intrinsics = prediction.intrinsics.cpu().numpy() # [N, 3, 3] extrinsics = prediction.extrinsics.cpu().numpy() # [N, 3, 4] # Return base64-encoded numpy arrays response = { "depth": self._encode_array(depth), "confidence": self._encode_array(conf), "intrinsics": self._encode_array(intrinsics), "extrinsics": self._encode_array(extrinsics), "shape": { "depth": list(depth.shape), "confidence": list(conf.shape), "intrinsics": list(intrinsics.shape), "extrinsics": list(extrinsics.shape), }, "depth_range": { "min": float(depth.min()), "max": float(depth.max()), }, } return response def _encode_array(self, arr: np.ndarray) -> str: """Encode numpy array as base64 string.""" buffer = io.BytesIO() np.save(buffer, arr.astype(np.float32)) return base64.b64encode(buffer.getvalue()).decode("utf-8")