depth-anything-3

Running on Zero

depth-anything-3 / depth_anything_3 /utils /io /output_processor.py

linhaotong

update

b9f87ab 30 days ago

5.84 kB

	# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""
	Output processor for Depth Anything 3.

	This module handles model output processing, including tensor-to-numpy conversion,
	batch dimension removal, and Prediction object creation.
	"""

	from __future__ import annotations

	import numpy as np
	import torch
	from addict import Dict as AddictDict

	from depth_anything_3.specs import Prediction


	class OutputProcessor:
	"""
	Output processor for converting model outputs to Prediction objects.

	Handles tensor-to-numpy conversion, batch dimension removal,
	and creates structured Prediction objects with proper data types.
	"""

	def __init__(self) -> None:
	"""Initialize the output processor."""

	def __call__(self, model_output: dict[str, torch.Tensor]) -> Prediction:
	"""
	Convert model output to Prediction object.

	Args:
	model_output: Model output dictionary containing depth, conf, extrinsics, intrinsics
	Expected shapes: depth (B, N, 1, H, W), conf (B, N, 1, H, W),
	extrinsics (B, N, 4, 4), intrinsics (B, N, 3, 3)

	Returns:
	Prediction: Object containing depth estimation results with shapes:
	depth (N, H, W), conf (N, H, W), extrinsics (N, 4, 4), intrinsics (N, 3, 3)
	"""
	# Extract data from batch dimension (B=1, N=number of images)
	depth = self._extract_depth(model_output)
	conf = self._extract_conf(model_output)
	extrinsics = self._extract_extrinsics(model_output)
	intrinsics = self._extract_intrinsics(model_output)
	sky = self._extract_sky(model_output)
	aux = self._extract_aux(model_output)
	gaussians = model_output.get("gaussians", None)
	scale_factor = model_output.get("scale_factor", None)

	return Prediction(
	depth=depth,
	sky=sky,
	conf=conf,
	extrinsics=extrinsics,
	intrinsics=intrinsics,
	is_metric=getattr(model_output, "is_metric", 0),
	gaussians=gaussians,
	aux=aux,
	scale_factor=scale_factor,
	)

	def _extract_depth(self, model_output: dict[str, torch.Tensor]) -> np.ndarray:
	"""
	Extract depth tensor from model output and convert to numpy.

	Args:
	model_output: Model output dictionary

	Returns:
	Depth array with shape (N, H, W)
	"""
	depth = model_output["depth"].squeeze(0).squeeze(-1).cpu().numpy() # (N, H, W)
	return depth

	def _extract_conf(self, model_output: dict[str, torch.Tensor]) -> np.ndarray:
	"""
	Extract confidence tensor from model output and convert to numpy.

	Args:
	model_output: Model output dictionary

	Returns:
	Confidence array with shape (N, H, W) or None
	"""
	conf = model_output.get("depth_conf", None)
	if conf is not None:
	conf = conf.squeeze(0).cpu().numpy() # (N, H, W)
	return conf

	def _extract_extrinsics(self, model_output: dict[str, torch.Tensor]) -> np.ndarray:
	"""
	Extract extrinsics tensor from model output and convert to numpy.

	Args:
	model_output: Model output dictionary

	Returns:
	Extrinsics array with shape (N, 4, 4) or None
	"""
	extrinsics = model_output.get("extrinsics", None)
	if extrinsics is not None:
	extrinsics = extrinsics.squeeze(0).cpu().numpy() # (N, 4, 4)
	return extrinsics

	def _extract_intrinsics(self, model_output: dict[str, torch.Tensor]) -> np.ndarray:
	"""
	Extract intrinsics tensor from model output and convert to numpy.

	Args:
	model_output: Model output dictionary

	Returns:
	Intrinsics array with shape (N, 3, 3) or None
	"""
	intrinsics = model_output.get("intrinsics", None)
	if intrinsics is not None:
	intrinsics = intrinsics.squeeze(0).cpu().numpy() # (N, 3, 3)
	return intrinsics

	def _extract_sky(self, model_output: dict[str, torch.Tensor]) -> np.ndarray:
	"""
	Extract sky tensor from model output and convert to numpy.

	Args:
	model_output: Model output dictionary

	Returns:
	Sky mask array with shape (N, H, W) or None
	"""
	sky = model_output.get("sky", None)
	if sky is not None:
	sky = sky.squeeze(0).cpu().numpy() >= 0.5 # (N, H, W)
	return sky

	def _extract_aux(self, model_output: dict[str, torch.Tensor]) -> AddictDict:
	"""
	Extract auxiliary data from model output and convert to numpy.

	Args:
	model_output: Model output dictionary

	Returns:
	Dictionary containing auxiliary data
	"""
	aux = model_output.get("aux", None)
	ret = AddictDict()
	if aux is not None:
	for k in aux.keys():
	if isinstance(aux[k], torch.Tensor):
	ret[k] = aux[k].squeeze(0).cpu().numpy()
	else:
	ret[k] = aux[k]
	return ret


	# Backward compatibility alias
	OutputAdapter = OutputProcessor