Spaces:
Sleeping
Sleeping
| import onnxruntime as ort | |
| import numpy as np | |
| from pathlib import Path | |
| from typing import Optional, Tuple | |
| import cv2 | |
| class DepthAnythingV2: | |
| """ | |
| Depth Anything V2 model wrapper for ONNX inference | |
| Supports both small (25M) and large (1.3B) models | |
| """ | |
| def __init__( | |
| self, | |
| model_path: str, | |
| use_gpu: bool = True, | |
| use_tensorrt: bool = False | |
| ): | |
| """ | |
| Initialize Depth Anything V2 model | |
| Args: | |
| model_path: Path to ONNX model file | |
| use_gpu: Whether to use GPU acceleration | |
| use_tensorrt: Whether to use TensorRT optimization | |
| """ | |
| self.model_path = Path(model_path) | |
| if not self.model_path.exists(): | |
| raise FileNotFoundError(f"Model not found: {model_path}") | |
| # Setup ONNX Runtime session | |
| providers = self._get_providers(use_gpu, use_tensorrt) | |
| session_options = ort.SessionOptions() | |
| session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL | |
| self.session = ort.InferenceSession( | |
| str(self.model_path), | |
| sess_options=session_options, | |
| providers=providers | |
| ) | |
| # Get input/output names | |
| self.input_name = self.session.get_inputs()[0].name | |
| self.output_name = self.session.get_outputs()[0].name | |
| # Get expected input shape | |
| input_shape = self.session.get_inputs()[0].shape | |
| # Handle dynamic dimensions (e.g., ['batch_size', 3, 'height', 'width']) | |
| # Default to 518x518 for Depth-Anything V2 | |
| if isinstance(input_shape[2], str): | |
| self.input_height = 518 | |
| self.input_width = 518 | |
| else: | |
| self.input_height = input_shape[2] | |
| self.input_width = input_shape[3] | |
| print(f"β Loaded model: {self.model_path.name}") | |
| print(f" Input shape: {input_shape}") | |
| print(f" Providers: {providers}") | |
| def _get_providers(self, use_gpu: bool, use_tensorrt: bool) -> list: | |
| """Get ONNX Runtime execution providers""" | |
| providers = [] | |
| if use_tensorrt and use_gpu: | |
| providers.append('TensorrtExecutionProvider') | |
| if use_gpu: | |
| providers.append('CUDAExecutionProvider') | |
| providers.append('CPUExecutionProvider') | |
| return providers | |
| def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, Tuple[int, int]]: | |
| """ | |
| Preprocess image for model input | |
| Args: | |
| image: Input image (RGB, HxWx3) | |
| Returns: | |
| Tuple of (preprocessed_image, original_size) | |
| """ | |
| h, w = image.shape[:2] | |
| original_size = (h, w) | |
| # Resize to model input size | |
| image = cv2.resize( | |
| image, | |
| (self.input_width, self.input_height), | |
| interpolation=cv2.INTER_LINEAR | |
| ) | |
| # Normalize | |
| image = image.astype(np.float32) / 255.0 | |
| # ImageNet normalization | |
| mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) | |
| std = np.array([0.229, 0.224, 0.225], dtype=np.float32) | |
| image = (image - mean) / std | |
| # Transpose to NCHW format | |
| image = image.transpose(2, 0, 1) | |
| image = np.expand_dims(image, axis=0) | |
| return image, original_size | |
| def postprocess( | |
| self, | |
| depth: np.ndarray, | |
| original_size: Tuple[int, int] | |
| ) -> np.ndarray: | |
| """ | |
| Postprocess depth map output | |
| Args: | |
| depth: Raw depth output from model | |
| original_size: Original image size (h, w) | |
| Returns: | |
| Depth map resized to original size | |
| """ | |
| # Remove batch dimension | |
| if len(depth.shape) == 4: | |
| depth = depth[0] | |
| # Remove channel dimension if present | |
| if len(depth.shape) == 3: | |
| depth = depth[0] | |
| # Resize to original size | |
| h, w = original_size | |
| depth = cv2.resize(depth, (w, h), interpolation=cv2.INTER_LINEAR) | |
| # Normalize to 0-1 range | |
| depth = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8) | |
| return depth | |
| def predict( | |
| self, | |
| image: np.ndarray, | |
| resize_output: bool = True | |
| ) -> np.ndarray: | |
| """ | |
| Run depth estimation on image | |
| Args: | |
| image: Input image (RGB, HxWx3) | |
| resize_output: Whether to resize output to original size | |
| Returns: | |
| Depth map (same size as input if resize_output=True) | |
| """ | |
| # Preprocess | |
| input_tensor, original_size = self.preprocess(image) | |
| # Run inference | |
| outputs = self.session.run( | |
| [self.output_name], | |
| {self.input_name: input_tensor} | |
| ) | |
| depth = outputs[0] | |
| # Postprocess | |
| if resize_output: | |
| depth = self.postprocess(depth, original_size) | |
| return depth | |
| def __call__(self, image: np.ndarray) -> np.ndarray: | |
| """Convenience method for prediction""" | |
| return self.predict(image) | |
| class ModelManager: | |
| """ | |
| Manages multiple depth models and provides a unified interface | |
| """ | |
| def __init__(self): | |
| self.models = {} | |
| def load_model( | |
| self, | |
| name: str, | |
| model_path: str, | |
| use_gpu: bool = True, | |
| use_tensorrt: bool = False | |
| ) -> DepthAnythingV2: | |
| """ | |
| Load a depth model | |
| Args: | |
| name: Model identifier (e.g., 'small', 'large') | |
| model_path: Path to ONNX model | |
| use_gpu: Whether to use GPU | |
| use_tensorrt: Whether to use TensorRT | |
| Returns: | |
| Loaded model instance | |
| """ | |
| model = DepthAnythingV2(model_path, use_gpu, use_tensorrt) | |
| self.models[name] = model | |
| return model | |
| def get_model(self, name: str) -> Optional[DepthAnythingV2]: | |
| """Get a loaded model by name""" | |
| return self.models.get(name) | |
| def predict(self, image: np.ndarray, model_name: str = 'small') -> np.ndarray: | |
| """ | |
| Run prediction using specified model | |
| Args: | |
| image: Input image | |
| model_name: Name of model to use | |
| Returns: | |
| Depth map | |
| """ | |
| model = self.get_model(model_name) | |
| if model is None: | |
| raise ValueError(f"Model '{model_name}' not loaded") | |
| return model.predict(image) | |