File size: 6,428 Bytes
463afdd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import onnxruntime as ort
import numpy as np
from pathlib import Path
from typing import Optional, Tuple
import cv2


class DepthAnythingV2:
    """
    Depth Anything V2 model wrapper for ONNX inference
    Supports both small (25M) and large (1.3B) models
    """

    def __init__(
        self,
        model_path: str,
        use_gpu: bool = True,
        use_tensorrt: bool = False
    ):
        """
        Initialize Depth Anything V2 model

        Args:
            model_path: Path to ONNX model file
            use_gpu: Whether to use GPU acceleration
            use_tensorrt: Whether to use TensorRT optimization
        """
        self.model_path = Path(model_path)

        if not self.model_path.exists():
            raise FileNotFoundError(f"Model not found: {model_path}")

        # Setup ONNX Runtime session
        providers = self._get_providers(use_gpu, use_tensorrt)

        session_options = ort.SessionOptions()
        session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL

        self.session = ort.InferenceSession(
            str(self.model_path),
            sess_options=session_options,
            providers=providers
        )

        # Get input/output names
        self.input_name = self.session.get_inputs()[0].name
        self.output_name = self.session.get_outputs()[0].name

        # Get expected input shape
        input_shape = self.session.get_inputs()[0].shape
        # Handle dynamic dimensions (e.g., ['batch_size', 3, 'height', 'width'])
        # Default to 518x518 for Depth-Anything V2
        if isinstance(input_shape[2], str):
            self.input_height = 518
            self.input_width = 518
        else:
            self.input_height = input_shape[2]
            self.input_width = input_shape[3]

        print(f"✓ Loaded model: {self.model_path.name}")
        print(f"  Input shape: {input_shape}")
        print(f"  Providers: {providers}")

    def _get_providers(self, use_gpu: bool, use_tensorrt: bool) -> list:
        """Get ONNX Runtime execution providers"""
        providers = []

        if use_tensorrt and use_gpu:
            providers.append('TensorrtExecutionProvider')

        if use_gpu:
            providers.append('CUDAExecutionProvider')

        providers.append('CPUExecutionProvider')

        return providers

    def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, Tuple[int, int]]:
        """
        Preprocess image for model input

        Args:
            image: Input image (RGB, HxWx3)

        Returns:
            Tuple of (preprocessed_image, original_size)
        """
        h, w = image.shape[:2]
        original_size = (h, w)

        # Resize to model input size
        image = cv2.resize(
            image,
            (self.input_width, self.input_height),
            interpolation=cv2.INTER_LINEAR
        )

        # Normalize
        image = image.astype(np.float32) / 255.0

        # ImageNet normalization
        mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
        std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
        image = (image - mean) / std

        # Transpose to NCHW format
        image = image.transpose(2, 0, 1)
        image = np.expand_dims(image, axis=0)

        return image, original_size

    def postprocess(
        self,
        depth: np.ndarray,
        original_size: Tuple[int, int]
    ) -> np.ndarray:
        """
        Postprocess depth map output

        Args:
            depth: Raw depth output from model
            original_size: Original image size (h, w)

        Returns:
            Depth map resized to original size
        """
        # Remove batch dimension
        if len(depth.shape) == 4:
            depth = depth[0]

        # Remove channel dimension if present
        if len(depth.shape) == 3:
            depth = depth[0]

        # Resize to original size
        h, w = original_size
        depth = cv2.resize(depth, (w, h), interpolation=cv2.INTER_LINEAR)

        # Normalize to 0-1 range
        depth = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)

        return depth

    def predict(
        self,
        image: np.ndarray,
        resize_output: bool = True
    ) -> np.ndarray:
        """
        Run depth estimation on image

        Args:
            image: Input image (RGB, HxWx3)
            resize_output: Whether to resize output to original size

        Returns:
            Depth map (same size as input if resize_output=True)
        """
        # Preprocess
        input_tensor, original_size = self.preprocess(image)

        # Run inference
        outputs = self.session.run(
            [self.output_name],
            {self.input_name: input_tensor}
        )

        depth = outputs[0]

        # Postprocess
        if resize_output:
            depth = self.postprocess(depth, original_size)

        return depth

    def __call__(self, image: np.ndarray) -> np.ndarray:
        """Convenience method for prediction"""
        return self.predict(image)


class ModelManager:
    """
    Manages multiple depth models and provides a unified interface
    """

    def __init__(self):
        self.models = {}

    def load_model(
        self,
        name: str,
        model_path: str,
        use_gpu: bool = True,
        use_tensorrt: bool = False
    ) -> DepthAnythingV2:
        """
        Load a depth model

        Args:
            name: Model identifier (e.g., 'small', 'large')
            model_path: Path to ONNX model
            use_gpu: Whether to use GPU
            use_tensorrt: Whether to use TensorRT

        Returns:
            Loaded model instance
        """
        model = DepthAnythingV2(model_path, use_gpu, use_tensorrt)
        self.models[name] = model
        return model

    def get_model(self, name: str) -> Optional[DepthAnythingV2]:
        """Get a loaded model by name"""
        return self.models.get(name)

    def predict(self, image: np.ndarray, model_name: str = 'small') -> np.ndarray:
        """
        Run prediction using specified model

        Args:
            image: Input image
            model_name: Name of model to use

        Returns:
            Depth map
        """
        model = self.get_model(model_name)
        if model is None:
            raise ValueError(f"Model '{model_name}' not loaded")

        return model.predict(image)