File size: 3,370 Bytes
e561725 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
"""
Custom handler for Hugging Face Inference Endpoints.
Serves the Depth Anything V3 Metric Large model for depth estimation.
"""
import base64
import io
from typing import Any
import numpy as np
import torch
from PIL import Image
class EndpointHandler:
def __init__(self, path: str = ""):
"""
Initialize the depth estimation model.
Args:
path: Path to the model directory (provided by HF Inference Endpoints)
"""
from depth_anything_3.api import DepthAnything3
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = DepthAnything3.from_pretrained("depth-anything/da3metric-large")
self.model = self.model.to(device=self.device)
def __call__(self, data: dict[str, Any]) -> dict[str, Any]:
"""
Process incoming requests for depth estimation.
Args:
data: Request payload with 'inputs' containing base64 image(s)
Returns:
Dictionary with depth map, confidence, intrinsics, extrinsics
"""
inputs = data.get("inputs")
# Handle base64 encoded image input
if isinstance(inputs, str):
# Single base64 image
image_data = base64.b64decode(inputs)
image = Image.open(io.BytesIO(image_data)).convert("RGB")
images = [image]
elif isinstance(inputs, dict) and "image" in inputs:
# Dict with image key
image_data = base64.b64decode(inputs["image"])
image = Image.open(io.BytesIO(image_data)).convert("RGB")
images = [image]
elif isinstance(inputs, list):
# List of base64 images
images = []
for img_b64 in inputs:
image_data = base64.b64decode(img_b64)
image = Image.open(io.BytesIO(image_data)).convert("RGB")
images.append(image)
else:
return {"error": "Invalid input format. Expected base64 encoded image(s)."}
# Run inference
with torch.inference_mode():
prediction = self.model.inference(images)
# Extract results
depth = prediction.depth.cpu().numpy() # [N, H, W]
conf = prediction.conf.cpu().numpy() # [N, H, W]
intrinsics = prediction.intrinsics.cpu().numpy() # [N, 3, 3]
extrinsics = prediction.extrinsics.cpu().numpy() # [N, 3, 4]
# Return base64-encoded numpy arrays
response = {
"depth": self._encode_array(depth),
"confidence": self._encode_array(conf),
"intrinsics": self._encode_array(intrinsics),
"extrinsics": self._encode_array(extrinsics),
"shape": {
"depth": list(depth.shape),
"confidence": list(conf.shape),
"intrinsics": list(intrinsics.shape),
"extrinsics": list(extrinsics.shape),
},
"depth_range": {
"min": float(depth.min()),
"max": float(depth.max()),
},
}
return response
def _encode_array(self, arr: np.ndarray) -> str:
"""Encode numpy array as base64 string."""
buffer = io.BytesIO()
np.save(buffer, arr.astype(np.float32))
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|