linhaotong
update
b9f87ab
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Output processor for Depth Anything 3.
This module handles model output processing, including tensor-to-numpy conversion,
batch dimension removal, and Prediction object creation.
"""
from __future__ import annotations
import numpy as np
import torch
from addict import Dict as AddictDict
from depth_anything_3.specs import Prediction
class OutputProcessor:
"""
Output processor for converting model outputs to Prediction objects.
Handles tensor-to-numpy conversion, batch dimension removal,
and creates structured Prediction objects with proper data types.
"""
def __init__(self) -> None:
"""Initialize the output processor."""
def __call__(self, model_output: dict[str, torch.Tensor]) -> Prediction:
"""
Convert model output to Prediction object.
Args:
model_output: Model output dictionary containing depth, conf, extrinsics, intrinsics
Expected shapes: depth (B, N, 1, H, W), conf (B, N, 1, H, W),
extrinsics (B, N, 4, 4), intrinsics (B, N, 3, 3)
Returns:
Prediction: Object containing depth estimation results with shapes:
depth (N, H, W), conf (N, H, W), extrinsics (N, 4, 4), intrinsics (N, 3, 3)
"""
# Extract data from batch dimension (B=1, N=number of images)
depth = self._extract_depth(model_output)
conf = self._extract_conf(model_output)
extrinsics = self._extract_extrinsics(model_output)
intrinsics = self._extract_intrinsics(model_output)
sky = self._extract_sky(model_output)
aux = self._extract_aux(model_output)
gaussians = model_output.get("gaussians", None)
scale_factor = model_output.get("scale_factor", None)
return Prediction(
depth=depth,
sky=sky,
conf=conf,
extrinsics=extrinsics,
intrinsics=intrinsics,
is_metric=getattr(model_output, "is_metric", 0),
gaussians=gaussians,
aux=aux,
scale_factor=scale_factor,
)
def _extract_depth(self, model_output: dict[str, torch.Tensor]) -> np.ndarray:
"""
Extract depth tensor from model output and convert to numpy.
Args:
model_output: Model output dictionary
Returns:
Depth array with shape (N, H, W)
"""
depth = model_output["depth"].squeeze(0).squeeze(-1).cpu().numpy() # (N, H, W)
return depth
def _extract_conf(self, model_output: dict[str, torch.Tensor]) -> np.ndarray:
"""
Extract confidence tensor from model output and convert to numpy.
Args:
model_output: Model output dictionary
Returns:
Confidence array with shape (N, H, W) or None
"""
conf = model_output.get("depth_conf", None)
if conf is not None:
conf = conf.squeeze(0).cpu().numpy() # (N, H, W)
return conf
def _extract_extrinsics(self, model_output: dict[str, torch.Tensor]) -> np.ndarray:
"""
Extract extrinsics tensor from model output and convert to numpy.
Args:
model_output: Model output dictionary
Returns:
Extrinsics array with shape (N, 4, 4) or None
"""
extrinsics = model_output.get("extrinsics", None)
if extrinsics is not None:
extrinsics = extrinsics.squeeze(0).cpu().numpy() # (N, 4, 4)
return extrinsics
def _extract_intrinsics(self, model_output: dict[str, torch.Tensor]) -> np.ndarray:
"""
Extract intrinsics tensor from model output and convert to numpy.
Args:
model_output: Model output dictionary
Returns:
Intrinsics array with shape (N, 3, 3) or None
"""
intrinsics = model_output.get("intrinsics", None)
if intrinsics is not None:
intrinsics = intrinsics.squeeze(0).cpu().numpy() # (N, 3, 3)
return intrinsics
def _extract_sky(self, model_output: dict[str, torch.Tensor]) -> np.ndarray:
"""
Extract sky tensor from model output and convert to numpy.
Args:
model_output: Model output dictionary
Returns:
Sky mask array with shape (N, H, W) or None
"""
sky = model_output.get("sky", None)
if sky is not None:
sky = sky.squeeze(0).cpu().numpy() >= 0.5 # (N, H, W)
return sky
def _extract_aux(self, model_output: dict[str, torch.Tensor]) -> AddictDict:
"""
Extract auxiliary data from model output and convert to numpy.
Args:
model_output: Model output dictionary
Returns:
Dictionary containing auxiliary data
"""
aux = model_output.get("aux", None)
ret = AddictDict()
if aux is not None:
for k in aux.keys():
if isinstance(aux[k], torch.Tensor):
ret[k] = aux[k].squeeze(0).cpu().numpy()
else:
ret[k] = aux[k]
return ret
# Backward compatibility alias
OutputAdapter = OutputProcessor