Spaces:

dimdimz
/

DimensioDepth

Sleeping

App Files Files Community

DimensioDepth / backend /utils /model_loader.py

wwieerrz

🎨 Launch DimensioDepth - Advanced AI Depth Estimation

463afdd about 1 month ago

raw

history blame contribute delete

6.43 kB

	import onnxruntime as ort
	import numpy as np
	from pathlib import Path
	from typing import Optional, Tuple
	import cv2


	class DepthAnythingV2:
	"""
	Depth Anything V2 model wrapper for ONNX inference
	Supports both small (25M) and large (1.3B) models
	"""

	def __init__(
	self,
	model_path: str,
	use_gpu: bool = True,
	use_tensorrt: bool = False
	):
	"""
	Initialize Depth Anything V2 model

	Args:
	model_path: Path to ONNX model file
	use_gpu: Whether to use GPU acceleration
	use_tensorrt: Whether to use TensorRT optimization
	"""
	self.model_path = Path(model_path)

	if not self.model_path.exists():
	raise FileNotFoundError(f"Model not found: {model_path}")

	# Setup ONNX Runtime session
	providers = self._get_providers(use_gpu, use_tensorrt)

	session_options = ort.SessionOptions()
	session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL

	self.session = ort.InferenceSession(
	str(self.model_path),
	sess_options=session_options,
	providers=providers
	)

	# Get input/output names
	self.input_name = self.session.get_inputs()[0].name
	self.output_name = self.session.get_outputs()[0].name

	# Get expected input shape
	input_shape = self.session.get_inputs()[0].shape
	# Handle dynamic dimensions (e.g., ['batch_size', 3, 'height', 'width'])
	# Default to 518x518 for Depth-Anything V2
	if isinstance(input_shape[2], str):
	self.input_height = 518
	self.input_width = 518
	else:
	self.input_height = input_shape[2]
	self.input_width = input_shape[3]

	print(f"✓ Loaded model: {self.model_path.name}")
	print(f" Input shape: {input_shape}")
	print(f" Providers: {providers}")

	def _get_providers(self, use_gpu: bool, use_tensorrt: bool) -> list:
	"""Get ONNX Runtime execution providers"""
	providers = []

	if use_tensorrt and use_gpu:
	providers.append('TensorrtExecutionProvider')

	if use_gpu:
	providers.append('CUDAExecutionProvider')

	providers.append('CPUExecutionProvider')

	return providers

	def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, Tuple[int, int]]:
	"""
	Preprocess image for model input

	Args:
	image: Input image (RGB, HxWx3)

	Returns:
	Tuple of (preprocessed_image, original_size)
	"""
	h, w = image.shape[:2]
	original_size = (h, w)

	# Resize to model input size
	image = cv2.resize(
	image,
	(self.input_width, self.input_height),
	interpolation=cv2.INTER_LINEAR
	)

	# Normalize
	image = image.astype(np.float32) / 255.0

	# ImageNet normalization
	mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
	std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
	image = (image - mean) / std

	# Transpose to NCHW format
	image = image.transpose(2, 0, 1)
	image = np.expand_dims(image, axis=0)

	return image, original_size

	def postprocess(
	self,
	depth: np.ndarray,
	original_size: Tuple[int, int]
	) -> np.ndarray:
	"""
	Postprocess depth map output

	Args:
	depth: Raw depth output from model
	original_size: Original image size (h, w)

	Returns:
	Depth map resized to original size
	"""
	# Remove batch dimension
	if len(depth.shape) == 4:
	depth = depth[0]

	# Remove channel dimension if present
	if len(depth.shape) == 3:
	depth = depth[0]

	# Resize to original size
	h, w = original_size
	depth = cv2.resize(depth, (w, h), interpolation=cv2.INTER_LINEAR)

	# Normalize to 0-1 range
	depth = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)

	return depth

	def predict(
	self,
	image: np.ndarray,
	resize_output: bool = True
	) -> np.ndarray:
	"""
	Run depth estimation on image

	Args:
	image: Input image (RGB, HxWx3)
	resize_output: Whether to resize output to original size

	Returns:
	Depth map (same size as input if resize_output=True)
	"""
	# Preprocess
	input_tensor, original_size = self.preprocess(image)

	# Run inference
	outputs = self.session.run(
	[self.output_name],
	{self.input_name: input_tensor}
	)

	depth = outputs[0]

	# Postprocess
	if resize_output:
	depth = self.postprocess(depth, original_size)

	return depth

	def __call__(self, image: np.ndarray) -> np.ndarray:
	"""Convenience method for prediction"""
	return self.predict(image)


	class ModelManager:
	"""
	Manages multiple depth models and provides a unified interface
	"""

	def __init__(self):
	self.models = {}

	def load_model(
	self,
	name: str,
	model_path: str,
	use_gpu: bool = True,
	use_tensorrt: bool = False
	) -> DepthAnythingV2:
	"""
	Load a depth model

	Args:
	name: Model identifier (e.g., 'small', 'large')
	model_path: Path to ONNX model
	use_gpu: Whether to use GPU
	use_tensorrt: Whether to use TensorRT

	Returns:
	Loaded model instance
	"""
	model = DepthAnythingV2(model_path, use_gpu, use_tensorrt)
	self.models[name] = model
	return model

	def get_model(self, name: str) -> Optional[DepthAnythingV2]:
	"""Get a loaded model by name"""
	return self.models.get(name)

	def predict(self, image: np.ndarray, model_name: str = 'small') -> np.ndarray:
	"""
	Run prediction using specified model

	Args:
	image: Input image
	model_name: Name of model to use

	Returns:
	Depth map
	"""
	model = self.get_model(model_name)
	if model is None:
	raise ValueError(f"Model '{model_name}' not loaded")

	return model.predict(image)