Spaces:
Running
on
Zero
Running
on
Zero
| # Copyright (c) 2025 ByteDance Ltd. and/or its affiliates | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| """ | |
| Output processor for Depth Anything 3. | |
| This module handles model output processing, including tensor-to-numpy conversion, | |
| batch dimension removal, and Prediction object creation. | |
| """ | |
| from __future__ import annotations | |
| import numpy as np | |
| import torch | |
| from addict import Dict as AddictDict | |
| from depth_anything_3.specs import Prediction | |
| class OutputProcessor: | |
| """ | |
| Output processor for converting model outputs to Prediction objects. | |
| Handles tensor-to-numpy conversion, batch dimension removal, | |
| and creates structured Prediction objects with proper data types. | |
| """ | |
| def __init__(self) -> None: | |
| """Initialize the output processor.""" | |
| def __call__(self, model_output: dict[str, torch.Tensor]) -> Prediction: | |
| """ | |
| Convert model output to Prediction object. | |
| Args: | |
| model_output: Model output dictionary containing depth, conf, extrinsics, intrinsics | |
| Expected shapes: depth (B, N, 1, H, W), conf (B, N, 1, H, W), | |
| extrinsics (B, N, 4, 4), intrinsics (B, N, 3, 3) | |
| Returns: | |
| Prediction: Object containing depth estimation results with shapes: | |
| depth (N, H, W), conf (N, H, W), extrinsics (N, 4, 4), intrinsics (N, 3, 3) | |
| """ | |
| # Extract data from batch dimension (B=1, N=number of images) | |
| depth = self._extract_depth(model_output) | |
| conf = self._extract_conf(model_output) | |
| extrinsics = self._extract_extrinsics(model_output) | |
| intrinsics = self._extract_intrinsics(model_output) | |
| sky = self._extract_sky(model_output) | |
| aux = self._extract_aux(model_output) | |
| gaussians = model_output.get("gaussians", None) | |
| scale_factor = model_output.get("scale_factor", None) | |
| return Prediction( | |
| depth=depth, | |
| sky=sky, | |
| conf=conf, | |
| extrinsics=extrinsics, | |
| intrinsics=intrinsics, | |
| is_metric=getattr(model_output, "is_metric", 0), | |
| gaussians=gaussians, | |
| aux=aux, | |
| scale_factor=scale_factor, | |
| ) | |
| def _extract_depth(self, model_output: dict[str, torch.Tensor]) -> np.ndarray: | |
| """ | |
| Extract depth tensor from model output and convert to numpy. | |
| Args: | |
| model_output: Model output dictionary | |
| Returns: | |
| Depth array with shape (N, H, W) | |
| """ | |
| depth = model_output["depth"].squeeze(0).squeeze(-1).cpu().numpy() # (N, H, W) | |
| return depth | |
| def _extract_conf(self, model_output: dict[str, torch.Tensor]) -> np.ndarray: | |
| """ | |
| Extract confidence tensor from model output and convert to numpy. | |
| Args: | |
| model_output: Model output dictionary | |
| Returns: | |
| Confidence array with shape (N, H, W) or None | |
| """ | |
| conf = model_output.get("depth_conf", None) | |
| if conf is not None: | |
| conf = conf.squeeze(0).cpu().numpy() # (N, H, W) | |
| return conf | |
| def _extract_extrinsics(self, model_output: dict[str, torch.Tensor]) -> np.ndarray: | |
| """ | |
| Extract extrinsics tensor from model output and convert to numpy. | |
| Args: | |
| model_output: Model output dictionary | |
| Returns: | |
| Extrinsics array with shape (N, 4, 4) or None | |
| """ | |
| extrinsics = model_output.get("extrinsics", None) | |
| if extrinsics is not None: | |
| extrinsics = extrinsics.squeeze(0).cpu().numpy() # (N, 4, 4) | |
| return extrinsics | |
| def _extract_intrinsics(self, model_output: dict[str, torch.Tensor]) -> np.ndarray: | |
| """ | |
| Extract intrinsics tensor from model output and convert to numpy. | |
| Args: | |
| model_output: Model output dictionary | |
| Returns: | |
| Intrinsics array with shape (N, 3, 3) or None | |
| """ | |
| intrinsics = model_output.get("intrinsics", None) | |
| if intrinsics is not None: | |
| intrinsics = intrinsics.squeeze(0).cpu().numpy() # (N, 3, 3) | |
| return intrinsics | |
| def _extract_sky(self, model_output: dict[str, torch.Tensor]) -> np.ndarray: | |
| """ | |
| Extract sky tensor from model output and convert to numpy. | |
| Args: | |
| model_output: Model output dictionary | |
| Returns: | |
| Sky mask array with shape (N, H, W) or None | |
| """ | |
| sky = model_output.get("sky", None) | |
| if sky is not None: | |
| sky = sky.squeeze(0).cpu().numpy() >= 0.5 # (N, H, W) | |
| return sky | |
| def _extract_aux(self, model_output: dict[str, torch.Tensor]) -> AddictDict: | |
| """ | |
| Extract auxiliary data from model output and convert to numpy. | |
| Args: | |
| model_output: Model output dictionary | |
| Returns: | |
| Dictionary containing auxiliary data | |
| """ | |
| aux = model_output.get("aux", None) | |
| ret = AddictDict() | |
| if aux is not None: | |
| for k in aux.keys(): | |
| if isinstance(aux[k], torch.Tensor): | |
| ret[k] = aux[k].squeeze(0).cpu().numpy() | |
| else: | |
| ret[k] = aux[k] | |
| return ret | |
| # Backward compatibility alias | |
| OutputAdapter = OutputProcessor | |