toto10 commited on
Commit
93fee81
·
1 Parent(s): f8965a7

9e9d39220f9e6e307aecc00a29e8e24c648b930f8fc232b426bb2a4e5b4ffe21

Browse files
Files changed (50) hide show
  1. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/transforms.py +234 -0
  2. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/output/.placeholder +0 -0
  3. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/LICENSE +21 -0
  4. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/README.md +131 -0
  5. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/do_catkin_make.sh +5 -0
  6. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/downloads.sh +5 -0
  7. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_melodic_ubuntu_17_18.sh +34 -0
  8. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_noetic_ubuntu_20.sh +33 -0
  9. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/make_package_cpp.sh +16 -0
  10. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/launch_midas_cpp.sh +2 -0
  11. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/CMakeLists.txt +189 -0
  12. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_cpp.launch +19 -0
  13. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_talker_listener.launch +23 -0
  14. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/package.xml +77 -0
  15. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener.py +61 -0
  16. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener_original.py +61 -0
  17. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/talker.py +53 -0
  18. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/src/main.cpp +285 -0
  19. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/run_talker_listener_test.sh +16 -0
  20. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/run.py +277 -0
  21. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/README.md +147 -0
  22. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/input/.placeholder +0 -0
  23. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/make_onnx_model.py +112 -0
  24. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/output/.placeholder +0 -0
  25. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_onnx.py +119 -0
  26. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_pb.py +135 -0
  27. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/transforms.py +234 -0
  28. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/utils.py +82 -0
  29. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/utils.py +199 -0
  30. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/weights/.placeholder +0 -0
  31. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/builder.py +51 -0
  32. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/depth_model.py +152 -0
  33. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/attractor.py +208 -0
  34. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/dist_layers.py +121 -0
  35. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/localbins_layers.py +169 -0
  36. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/patch_transformer.py +91 -0
  37. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/model_io.py +92 -0
  38. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/__init__.py +31 -0
  39. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth.json +58 -0
  40. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json +22 -0
  41. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/zoedepth_v1.py +250 -0
  42. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth_nk/__init__.py +31 -0
  43. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json +67 -0
  44. microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth_nk/zoedepth_nk_v1.py +333 -0
  45. microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/__init__.py +24 -0
  46. microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/arg_utils.py +33 -0
  47. microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/config.py +437 -0
  48. microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/easydict/__init__.py +158 -0
  49. microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/geometry.py +98 -0
  50. microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/misc.py +368 -0
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/transforms.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ import math
4
+
5
+
6
+ def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
7
+ """Rezise the sample to ensure the given size. Keeps aspect ratio.
8
+
9
+ Args:
10
+ sample (dict): sample
11
+ size (tuple): image size
12
+
13
+ Returns:
14
+ tuple: new size
15
+ """
16
+ shape = list(sample["disparity"].shape)
17
+
18
+ if shape[0] >= size[0] and shape[1] >= size[1]:
19
+ return sample
20
+
21
+ scale = [0, 0]
22
+ scale[0] = size[0] / shape[0]
23
+ scale[1] = size[1] / shape[1]
24
+
25
+ scale = max(scale)
26
+
27
+ shape[0] = math.ceil(scale * shape[0])
28
+ shape[1] = math.ceil(scale * shape[1])
29
+
30
+ # resize
31
+ sample["image"] = cv2.resize(
32
+ sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method
33
+ )
34
+
35
+ sample["disparity"] = cv2.resize(
36
+ sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST
37
+ )
38
+ sample["mask"] = cv2.resize(
39
+ sample["mask"].astype(np.float32),
40
+ tuple(shape[::-1]),
41
+ interpolation=cv2.INTER_NEAREST,
42
+ )
43
+ sample["mask"] = sample["mask"].astype(bool)
44
+
45
+ return tuple(shape)
46
+
47
+
48
+ class Resize(object):
49
+ """Resize sample to given size (width, height).
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ width,
55
+ height,
56
+ resize_target=True,
57
+ keep_aspect_ratio=False,
58
+ ensure_multiple_of=1,
59
+ resize_method="lower_bound",
60
+ image_interpolation_method=cv2.INTER_AREA,
61
+ ):
62
+ """Init.
63
+
64
+ Args:
65
+ width (int): desired output width
66
+ height (int): desired output height
67
+ resize_target (bool, optional):
68
+ True: Resize the full sample (image, mask, target).
69
+ False: Resize image only.
70
+ Defaults to True.
71
+ keep_aspect_ratio (bool, optional):
72
+ True: Keep the aspect ratio of the input sample.
73
+ Output sample might not have the given width and height, and
74
+ resize behaviour depends on the parameter 'resize_method'.
75
+ Defaults to False.
76
+ ensure_multiple_of (int, optional):
77
+ Output width and height is constrained to be multiple of this parameter.
78
+ Defaults to 1.
79
+ resize_method (str, optional):
80
+ "lower_bound": Output will be at least as large as the given size.
81
+ "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
82
+ "minimal": Scale as least as possible. (Output size might be smaller than given size.)
83
+ Defaults to "lower_bound".
84
+ """
85
+ self.__width = width
86
+ self.__height = height
87
+
88
+ self.__resize_target = resize_target
89
+ self.__keep_aspect_ratio = keep_aspect_ratio
90
+ self.__multiple_of = ensure_multiple_of
91
+ self.__resize_method = resize_method
92
+ self.__image_interpolation_method = image_interpolation_method
93
+
94
+ def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
95
+ y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
96
+
97
+ if max_val is not None and y > max_val:
98
+ y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int)
99
+
100
+ if y < min_val:
101
+ y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int)
102
+
103
+ return y
104
+
105
+ def get_size(self, width, height):
106
+ # determine new height and width
107
+ scale_height = self.__height / height
108
+ scale_width = self.__width / width
109
+
110
+ if self.__keep_aspect_ratio:
111
+ if self.__resize_method == "lower_bound":
112
+ # scale such that output size is lower bound
113
+ if scale_width > scale_height:
114
+ # fit width
115
+ scale_height = scale_width
116
+ else:
117
+ # fit height
118
+ scale_width = scale_height
119
+ elif self.__resize_method == "upper_bound":
120
+ # scale such that output size is upper bound
121
+ if scale_width < scale_height:
122
+ # fit width
123
+ scale_height = scale_width
124
+ else:
125
+ # fit height
126
+ scale_width = scale_height
127
+ elif self.__resize_method == "minimal":
128
+ # scale as least as possbile
129
+ if abs(1 - scale_width) < abs(1 - scale_height):
130
+ # fit width
131
+ scale_height = scale_width
132
+ else:
133
+ # fit height
134
+ scale_width = scale_height
135
+ else:
136
+ raise ValueError(
137
+ f"resize_method {self.__resize_method} not implemented"
138
+ )
139
+
140
+ if self.__resize_method == "lower_bound":
141
+ new_height = self.constrain_to_multiple_of(
142
+ scale_height * height, min_val=self.__height
143
+ )
144
+ new_width = self.constrain_to_multiple_of(
145
+ scale_width * width, min_val=self.__width
146
+ )
147
+ elif self.__resize_method == "upper_bound":
148
+ new_height = self.constrain_to_multiple_of(
149
+ scale_height * height, max_val=self.__height
150
+ )
151
+ new_width = self.constrain_to_multiple_of(
152
+ scale_width * width, max_val=self.__width
153
+ )
154
+ elif self.__resize_method == "minimal":
155
+ new_height = self.constrain_to_multiple_of(scale_height * height)
156
+ new_width = self.constrain_to_multiple_of(scale_width * width)
157
+ else:
158
+ raise ValueError(f"resize_method {self.__resize_method} not implemented")
159
+
160
+ return (new_width, new_height)
161
+
162
+ def __call__(self, sample):
163
+ width, height = self.get_size(
164
+ sample["image"].shape[1], sample["image"].shape[0]
165
+ )
166
+
167
+ # resize sample
168
+ sample["image"] = cv2.resize(
169
+ sample["image"],
170
+ (width, height),
171
+ interpolation=self.__image_interpolation_method,
172
+ )
173
+
174
+ if self.__resize_target:
175
+ if "disparity" in sample:
176
+ sample["disparity"] = cv2.resize(
177
+ sample["disparity"],
178
+ (width, height),
179
+ interpolation=cv2.INTER_NEAREST,
180
+ )
181
+
182
+ if "depth" in sample:
183
+ sample["depth"] = cv2.resize(
184
+ sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST
185
+ )
186
+
187
+ sample["mask"] = cv2.resize(
188
+ sample["mask"].astype(np.float32),
189
+ (width, height),
190
+ interpolation=cv2.INTER_NEAREST,
191
+ )
192
+ sample["mask"] = sample["mask"].astype(bool)
193
+
194
+ return sample
195
+
196
+
197
+ class NormalizeImage(object):
198
+ """Normlize image by given mean and std.
199
+ """
200
+
201
+ def __init__(self, mean, std):
202
+ self.__mean = mean
203
+ self.__std = std
204
+
205
+ def __call__(self, sample):
206
+ sample["image"] = (sample["image"] - self.__mean) / self.__std
207
+
208
+ return sample
209
+
210
+
211
+ class PrepareForNet(object):
212
+ """Prepare sample for usage as network input.
213
+ """
214
+
215
+ def __init__(self):
216
+ pass
217
+
218
+ def __call__(self, sample):
219
+ image = np.transpose(sample["image"], (2, 0, 1))
220
+ sample["image"] = np.ascontiguousarray(image).astype(np.float32)
221
+
222
+ if "mask" in sample:
223
+ sample["mask"] = sample["mask"].astype(np.float32)
224
+ sample["mask"] = np.ascontiguousarray(sample["mask"])
225
+
226
+ if "disparity" in sample:
227
+ disparity = sample["disparity"].astype(np.float32)
228
+ sample["disparity"] = np.ascontiguousarray(disparity)
229
+
230
+ if "depth" in sample:
231
+ depth = sample["depth"].astype(np.float32)
232
+ sample["depth"] = np.ascontiguousarray(depth)
233
+
234
+ return sample
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/output/.placeholder ADDED
File without changes
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2020 Alexey
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/README.md ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MiDaS for ROS1 by using LibTorch in C++
2
+
3
+ ### Requirements
4
+
5
+ - Ubuntu 17.10 / 18.04 / 20.04, Debian Stretch
6
+ - ROS Melodic for Ubuntu (17.10 / 18.04) / Debian Stretch, ROS Noetic for Ubuntu 20.04
7
+ - C++11
8
+ - LibTorch >= 1.6
9
+
10
+ ## Quick Start with a MiDaS Example
11
+
12
+ MiDaS is a neural network to compute depth from a single image.
13
+
14
+ * input from `image_topic`: `sensor_msgs/Image` - `RGB8` image with any shape
15
+ * output to `midas_topic`: `sensor_msgs/Image` - `TYPE_32FC1` inverse relative depth maps in range [0 - 255] with original size and channels=1
16
+
17
+ ### Install Dependecies
18
+
19
+ * install ROS Melodic for Ubuntu 17.10 / 18.04:
20
+ ```bash
21
+ wget https://raw.githubusercontent.com/isl-org/MiDaS/master/ros/additions/install_ros_melodic_ubuntu_17_18.sh
22
+ ./install_ros_melodic_ubuntu_17_18.sh
23
+ ```
24
+
25
+ or Noetic for Ubuntu 20.04:
26
+
27
+ ```bash
28
+ wget https://raw.githubusercontent.com/isl-org/MiDaS/master/ros/additions/install_ros_noetic_ubuntu_20.sh
29
+ ./install_ros_noetic_ubuntu_20.sh
30
+ ```
31
+
32
+
33
+ * install LibTorch 1.7 with CUDA 11.0:
34
+
35
+ On **Jetson (ARM)**:
36
+ ```bash
37
+ wget https://nvidia.box.com/shared/static/wa34qwrwtk9njtyarwt5nvo6imenfy26.whl -O torch-1.7.0-cp36-cp36m-linux_aarch64.whl
38
+ sudo apt-get install python3-pip libopenblas-base libopenmpi-dev
39
+ pip3 install Cython
40
+ pip3 install numpy torch-1.7.0-cp36-cp36m-linux_aarch64.whl
41
+ ```
42
+ Or compile LibTorch from source: https://github.com/pytorch/pytorch#from-source
43
+
44
+ On **Linux (x86_64)**:
45
+ ```bash
46
+ cd ~/
47
+ wget https://download.pytorch.org/libtorch/cu110/libtorch-cxx11-abi-shared-with-deps-1.7.0%2Bcu110.zip
48
+ unzip libtorch-cxx11-abi-shared-with-deps-1.7.0+cu110.zip
49
+ ```
50
+
51
+ * create symlink for OpenCV:
52
+
53
+ ```bash
54
+ sudo ln -s /usr/include/opencv4 /usr/include/opencv
55
+ ```
56
+
57
+ * download and install MiDaS:
58
+
59
+ ```bash
60
+ source ~/.bashrc
61
+ cd ~/
62
+ mkdir catkin_ws
63
+ cd catkin_ws
64
+ git clone https://github.com/isl-org/MiDaS
65
+ mkdir src
66
+ cp -r MiDaS/ros/* src
67
+
68
+ chmod +x src/additions/*.sh
69
+ chmod +x src/*.sh
70
+ chmod +x src/midas_cpp/scripts/*.py
71
+ cp src/additions/do_catkin_make.sh ./do_catkin_make.sh
72
+ ./do_catkin_make.sh
73
+ ./src/additions/downloads.sh
74
+ ```
75
+
76
+ ### Usage
77
+
78
+ * run only `midas` node: `~/catkin_ws/src/launch_midas_cpp.sh`
79
+
80
+ #### Test
81
+
82
+ * Test - capture video and show result in the window:
83
+ * place any `test.mp4` video file to the directory `~/catkin_ws/src/`
84
+ * run `midas` node: `~/catkin_ws/src/launch_midas_cpp.sh`
85
+ * run test nodes in another terminal: `cd ~/catkin_ws/src && ./run_talker_listener_test.sh` and wait 30 seconds
86
+
87
+ (to use Python 2, run command `sed -i 's/python3/python2/' ~/catkin_ws/src/midas_cpp/scripts/*.py` )
88
+
89
+ ## Mobile version of MiDaS - Monocular Depth Estimation
90
+
91
+ ### Accuracy
92
+
93
+ * MiDaS v2 small - ResNet50 default-decoder 384x384
94
+ * MiDaS v2.1 small - EfficientNet-Lite3 small-decoder 256x256
95
+
96
+ **Zero-shot error** (the lower - the better):
97
+
98
+ | Model | DIW WHDR | Eth3d AbsRel | Sintel AbsRel | Kitti δ>1.25 | NyuDepthV2 δ>1.25 | TUM δ>1.25 |
99
+ |---|---|---|---|---|---|---|
100
+ | MiDaS v2 small 384x384 | **0.1248** | 0.1550 | **0.3300** | **21.81** | 15.73 | 17.00 |
101
+ | MiDaS v2.1 small 256x256 | 0.1344 | **0.1344** | 0.3370 | 29.27 | **13.43** | **14.53** |
102
+ | Relative improvement, % | -8 % | **+13 %** | -2 % | -34 % | **+15 %** | **+15 %** |
103
+
104
+ None of Train/Valid/Test subsets of datasets (DIW, Eth3d, Sintel, Kitti, NyuDepthV2, TUM) were not involved in Training or Fine Tuning.
105
+
106
+ ### Inference speed (FPS) on nVidia GPU
107
+
108
+ Inference speed excluding pre and post processing, batch=1, **Frames Per Second** (the higher - the better):
109
+
110
+ | Model | Jetson Nano, FPS | RTX 2080Ti, FPS |
111
+ |---|---|---|
112
+ | MiDaS v2 small 384x384 | 1.6 | 117 |
113
+ | MiDaS v2.1 small 256x256 | 8.1 | 232 |
114
+ | SpeedUp, X times | **5x** | **2x** |
115
+
116
+ ### Citation
117
+
118
+ This repository contains code to compute depth from a single image. It accompanies our [paper](https://arxiv.org/abs/1907.01341v3):
119
+
120
+ >Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer
121
+ René Ranftl, Katrin Lasinger, David Hafner, Konrad Schindler, Vladlen Koltun
122
+
123
+ Please cite our paper if you use this code or any of the models:
124
+ ```
125
+ @article{Ranftl2020,
126
+ author = {Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun},
127
+ title = {Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer},
128
+ journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},
129
+ year = {2020},
130
+ }
131
+ ```
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/do_catkin_make.sh ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ mkdir src
2
+ catkin_make
3
+ source devel/setup.bash
4
+ echo $ROS_PACKAGE_PATH
5
+ chmod +x ./devel/setup.bash
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/downloads.sh ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ mkdir ~/.ros
2
+ wget https://github.com/isl-org/MiDaS/releases/download/v2_1/model-small-traced.pt
3
+ cp ./model-small-traced.pt ~/.ros/model-small-traced.pt
4
+
5
+
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_melodic_ubuntu_17_18.sh ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #@title { display-mode: "code" }
2
+
3
+ #from http://wiki.ros.org/indigo/Installation/Ubuntu
4
+
5
+ #1.2 Setup sources.list
6
+ sudo sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list'
7
+
8
+ # 1.3 Setup keys
9
+ sudo apt-key adv --keyserver 'hkp://keyserver.ubuntu.com:80' --recv-key C1CF6E31E6BADE8868B172B4F42ED6FBAB17C654
10
+ sudo apt-key adv --keyserver 'hkp://ha.pool.sks-keyservers.net:80' --recv-key 421C365BD9FF1F717815A3895523BAEEB01FA116
11
+
12
+ curl -sSL 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xC1CF6E31E6BADE8868B172B4F42ED6FBAB17C654' | sudo apt-key add -
13
+
14
+ # 1.4 Installation
15
+ sudo apt-get update
16
+ sudo apt-get upgrade
17
+
18
+ # Desktop-Full Install:
19
+ sudo apt-get install ros-melodic-desktop-full
20
+
21
+ printf "\nsource /opt/ros/melodic/setup.bash\n" >> ~/.bashrc
22
+
23
+ # 1.5 Initialize rosdep
24
+ sudo rosdep init
25
+ rosdep update
26
+
27
+
28
+ # 1.7 Getting rosinstall (python)
29
+ sudo apt-get install python-rosinstall
30
+ sudo apt-get install python-catkin-tools
31
+ sudo apt-get install python-rospy
32
+ sudo apt-get install python-rosdep
33
+ sudo apt-get install python-roscd
34
+ sudo apt-get install python-pip
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_noetic_ubuntu_20.sh ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #@title { display-mode: "code" }
2
+
3
+ #from http://wiki.ros.org/indigo/Installation/Ubuntu
4
+
5
+ #1.2 Setup sources.list
6
+ sudo sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list'
7
+
8
+ # 1.3 Setup keys
9
+ sudo apt-key adv --keyserver 'hkp://keyserver.ubuntu.com:80' --recv-key C1CF6E31E6BADE8868B172B4F42ED6FBAB17C654
10
+
11
+ curl -sSL 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xC1CF6E31E6BADE8868B172B4F42ED6FBAB17C654' | sudo apt-key add -
12
+
13
+ # 1.4 Installation
14
+ sudo apt-get update
15
+ sudo apt-get upgrade
16
+
17
+ # Desktop-Full Install:
18
+ sudo apt-get install ros-noetic-desktop-full
19
+
20
+ printf "\nsource /opt/ros/noetic/setup.bash\n" >> ~/.bashrc
21
+
22
+ # 1.5 Initialize rosdep
23
+ sudo rosdep init
24
+ rosdep update
25
+
26
+
27
+ # 1.7 Getting rosinstall (python)
28
+ sudo apt-get install python3-rosinstall
29
+ sudo apt-get install python3-catkin-tools
30
+ sudo apt-get install python3-rospy
31
+ sudo apt-get install python3-rosdep
32
+ sudo apt-get install python3-roscd
33
+ sudo apt-get install python3-pip
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/make_package_cpp.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cd ~/catkin_ws/src
2
+ catkin_create_pkg midas_cpp std_msgs roscpp cv_bridge sensor_msgs image_transport
3
+ cd ~/catkin_ws
4
+ catkin_make
5
+
6
+ chmod +x ~/catkin_ws/devel/setup.bash
7
+ printf "\nsource ~/catkin_ws/devel/setup.bash" >> ~/.bashrc
8
+ source ~/catkin_ws/devel/setup.bash
9
+
10
+
11
+ sudo rosdep init
12
+ rosdep update
13
+ #rospack depends1 midas_cpp
14
+ roscd midas_cpp
15
+ #cat package.xml
16
+ #rospack depends midas_cpp
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/launch_midas_cpp.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ source ~/catkin_ws/devel/setup.bash
2
+ roslaunch midas_cpp midas_cpp.launch model_name:="model-small-traced.pt" input_topic:="image_topic" output_topic:="midas_topic" out_orig_size:="true"
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/CMakeLists.txt ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 3.0.2)
2
+ project(midas_cpp)
3
+
4
+ ## Compile as C++11, supported in ROS Kinetic and newer
5
+ # add_compile_options(-std=c++11)
6
+
7
+ ## Find catkin macros and libraries
8
+ ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
9
+ ## is used, also find other catkin packages
10
+ find_package(catkin REQUIRED COMPONENTS
11
+ cv_bridge
12
+ image_transport
13
+ roscpp
14
+ rospy
15
+ sensor_msgs
16
+ std_msgs
17
+ )
18
+
19
+ ## System dependencies are found with CMake's conventions
20
+ # find_package(Boost REQUIRED COMPONENTS system)
21
+
22
+ list(APPEND CMAKE_PREFIX_PATH "~/libtorch")
23
+ list(APPEND CMAKE_PREFIX_PATH "/usr/local/lib/python3.6/dist-packages/torch/lib")
24
+ list(APPEND CMAKE_PREFIX_PATH "/usr/local/lib/python2.7/dist-packages/torch/lib")
25
+
26
+ if(NOT EXISTS "~/libtorch")
27
+ if (EXISTS "/usr/local/lib/python3.6/dist-packages/torch")
28
+ include_directories(/usr/local/include)
29
+ include_directories(/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include)
30
+ include_directories(/usr/local/lib/python3.6/dist-packages/torch/include)
31
+
32
+ link_directories(/usr/local/lib)
33
+ link_directories(/usr/local/lib/python3.6/dist-packages/torch/lib)
34
+
35
+ set(CMAKE_PREFIX_PATH /usr/local/lib/python3.6/dist-packages/torch)
36
+ set(Boost_USE_MULTITHREADED ON)
37
+ set(Torch_DIR /usr/local/lib/python3.6/dist-packages/torch)
38
+
39
+ elseif (EXISTS "/usr/local/lib/python2.7/dist-packages/torch")
40
+
41
+ include_directories(/usr/local/include)
42
+ include_directories(/usr/local/lib/python2.7/dist-packages/torch/include/torch/csrc/api/include)
43
+ include_directories(/usr/local/lib/python2.7/dist-packages/torch/include)
44
+
45
+ link_directories(/usr/local/lib)
46
+ link_directories(/usr/local/lib/python2.7/dist-packages/torch/lib)
47
+
48
+ set(CMAKE_PREFIX_PATH /usr/local/lib/python2.7/dist-packages/torch)
49
+ set(Boost_USE_MULTITHREADED ON)
50
+ set(Torch_DIR /usr/local/lib/python2.7/dist-packages/torch)
51
+ endif()
52
+ endif()
53
+
54
+
55
+
56
+ find_package(Torch REQUIRED)
57
+ find_package(OpenCV REQUIRED)
58
+ include_directories( ${OpenCV_INCLUDE_DIRS} )
59
+
60
+ add_executable(midas_cpp src/main.cpp)
61
+ target_link_libraries(midas_cpp "${TORCH_LIBRARIES}" "${OpenCV_LIBS} ${catkin_LIBRARIES}")
62
+ set_property(TARGET midas_cpp PROPERTY CXX_STANDARD 14)
63
+
64
+
65
+
66
+ ###################################
67
+ ## catkin specific configuration ##
68
+ ###################################
69
+ ## The catkin_package macro generates cmake config files for your package
70
+ ## Declare things to be passed to dependent projects
71
+ ## INCLUDE_DIRS: uncomment this if your package contains header files
72
+ ## LIBRARIES: libraries you create in this project that dependent projects also need
73
+ ## CATKIN_DEPENDS: catkin_packages dependent projects also need
74
+ ## DEPENDS: system dependencies of this project that dependent projects also need
75
+ catkin_package(
76
+ # INCLUDE_DIRS include
77
+ # LIBRARIES midas_cpp
78
+ # CATKIN_DEPENDS cv_bridge image_transport roscpp sensor_msgs std_msgs
79
+ # DEPENDS system_lib
80
+ )
81
+
82
+ ###########
83
+ ## Build ##
84
+ ###########
85
+
86
+ ## Specify additional locations of header files
87
+ ## Your package locations should be listed before other locations
88
+ include_directories(
89
+ # include
90
+ ${catkin_INCLUDE_DIRS}
91
+ )
92
+
93
+ ## Declare a C++ library
94
+ # add_library(${PROJECT_NAME}
95
+ # src/${PROJECT_NAME}/midas_cpp.cpp
96
+ # )
97
+
98
+ ## Add cmake target dependencies of the library
99
+ ## as an example, code may need to be generated before libraries
100
+ ## either from message generation or dynamic reconfigure
101
+ # add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
102
+
103
+ ## Declare a C++ executable
104
+ ## With catkin_make all packages are built within a single CMake context
105
+ ## The recommended prefix ensures that target names across packages don't collide
106
+ # add_executable(${PROJECT_NAME}_node src/midas_cpp_node.cpp)
107
+
108
+ ## Rename C++ executable without prefix
109
+ ## The above recommended prefix causes long target names, the following renames the
110
+ ## target back to the shorter version for ease of user use
111
+ ## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
112
+ # set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
113
+
114
+ ## Add cmake target dependencies of the executable
115
+ ## same as for the library above
116
+ # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
117
+
118
+ ## Specify libraries to link a library or executable target against
119
+ # target_link_libraries(${PROJECT_NAME}_node
120
+ # ${catkin_LIBRARIES}
121
+ # )
122
+
123
+ #############
124
+ ## Install ##
125
+ #############
126
+
127
+ # all install targets should use catkin DESTINATION variables
128
+ # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
129
+
130
+ ## Mark executable scripts (Python etc.) for installation
131
+ ## in contrast to setup.py, you can choose the destination
132
+ # catkin_install_python(PROGRAMS
133
+ # scripts/my_python_script
134
+ # DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
135
+ # )
136
+
137
+ ## Mark executables for installation
138
+ ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
139
+ # install(TARGETS ${PROJECT_NAME}_node
140
+ # RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
141
+ # )
142
+
143
+ ## Mark libraries for installation
144
+ ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html
145
+ # install(TARGETS ${PROJECT_NAME}
146
+ # ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
147
+ # LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
148
+ # RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION}
149
+ # )
150
+
151
+ ## Mark cpp header files for installation
152
+ # install(DIRECTORY include/${PROJECT_NAME}/
153
+ # DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
154
+ # FILES_MATCHING PATTERN "*.h"
155
+ # PATTERN ".svn" EXCLUDE
156
+ # )
157
+
158
+ ## Mark other files for installation (e.g. launch and bag files, etc.)
159
+ # install(FILES
160
+ # # myfile1
161
+ # # myfile2
162
+ # DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
163
+ # )
164
+
165
+ #############
166
+ ## Testing ##
167
+ #############
168
+
169
+ ## Add gtest based cpp test target and link libraries
170
+ # catkin_add_gtest(${PROJECT_NAME}-test test/test_midas_cpp.cpp)
171
+ # if(TARGET ${PROJECT_NAME}-test)
172
+ # target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
173
+ # endif()
174
+
175
+ ## Add folders to be run by python nosetests
176
+ # catkin_add_nosetests(test)
177
+
178
+ install(TARGETS ${PROJECT_NAME}
179
+ ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
180
+ LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
181
+ RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
182
+ )
183
+
184
+ add_custom_command(
185
+ TARGET midas_cpp POST_BUILD
186
+ COMMAND ${CMAKE_COMMAND} -E copy
187
+ ${CMAKE_CURRENT_BINARY_DIR}/midas_cpp
188
+ ${CMAKE_SOURCE_DIR}/midas_cpp
189
+ )
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_cpp.launch ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <launch>
2
+ <arg name="input_topic" default="image_topic"/>
3
+ <arg name="output_topic" default="midas_topic"/>
4
+ <arg name="model_name" default="model-small-traced.pt"/>
5
+ <arg name="out_orig_size" default="true"/>
6
+ <arg name="net_width" default="256"/>
7
+ <arg name="net_height" default="256"/>
8
+ <arg name="logging" default="false"/>
9
+
10
+ <node pkg="midas_cpp" type="midas_cpp" name="midas_cpp" output="log" respawn="true">
11
+ <param name="input_topic" value="$(arg input_topic)"/>
12
+ <param name="output_topic" value="$(arg output_topic)"/>
13
+ <param name="model_name" value="$(arg model_name)"/>
14
+ <param name="out_orig_size" value="$(arg out_orig_size)"/>
15
+ <param name="net_width" value="$(arg net_width)"/>
16
+ <param name="net_height" value="$(arg net_height)"/>
17
+ <param name="logging" value="$(arg logging)"/>
18
+ </node>
19
+ </launch>
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_talker_listener.launch ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <launch>
2
+ <arg name="use_camera" default="false"/>
3
+ <arg name="input_video_file" default="test.mp4"/>
4
+
5
+ <arg name="show_output" default="true"/>
6
+ <arg name="save_output" default="false"/>
7
+ <arg name="output_video_file" default="result.mp4"/>
8
+
9
+ <node pkg="midas_cpp" type="talker.py" name="talker" output="log" respawn="true">
10
+ <param name="use_camera" value="$(arg use_camera)"/>
11
+ <param name="input_video_file" value="$(arg input_video_file)"/>
12
+ </node>
13
+
14
+ <node pkg="midas_cpp" type="listener.py" name="listener" output="log" respawn="true">
15
+ <param name="show_output" value="$(arg show_output)"/>
16
+ <param name="save_output" value="$(arg save_output)"/>
17
+ <param name="output_video_file" value="$(arg output_video_file)"/>
18
+ </node>
19
+
20
+ <node pkg="midas_cpp" type="listener_original.py" name="listener_original" output="log" respawn="true">
21
+ <param name="show_output" value="$(arg show_output)"/>
22
+ </node>
23
+ </launch>
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/package.xml ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <package format="2">
3
+ <name>midas_cpp</name>
4
+ <version>0.1.0</version>
5
+ <description>The midas_cpp package</description>
6
+
7
+ <maintainer email="alexeyab84@gmail.com">Alexey Bochkovskiy</maintainer>
8
+ <license>MIT</license>
9
+ <url type="website">https://github.com/isl-org/MiDaS/tree/master/ros</url>
10
+ <!-- <author email="alexeyab84@gmail.com">Alexey Bochkovskiy</author> -->
11
+
12
+
13
+ <!-- One license tag required, multiple allowed, one license per tag -->
14
+ <!-- Commonly used license strings: -->
15
+ <!-- BSD, MIT, Boost Software License, GPLv2, GPLv3, LGPLv2.1, LGPLv3 -->
16
+ <license>TODO</license>
17
+
18
+
19
+ <!-- Url tags are optional, but multiple are allowed, one per tag -->
20
+ <!-- Optional attribute type can be: website, bugtracker, or repository -->
21
+ <!-- Example: -->
22
+ <!-- <url type="website">http://wiki.ros.org/midas_cpp</url> -->
23
+
24
+
25
+ <!-- Author tags are optional, multiple are allowed, one per tag -->
26
+ <!-- Authors do not have to be maintainers, but could be -->
27
+ <!-- Example: -->
28
+ <!-- <author email="jane.doe@example.com">Jane Doe</author> -->
29
+
30
+
31
+ <!-- The *depend tags are used to specify dependencies -->
32
+ <!-- Dependencies can be catkin packages or system dependencies -->
33
+ <!-- Examples: -->
34
+ <!-- Use depend as a shortcut for packages that are both build and exec dependencies -->
35
+ <!-- <depend>roscpp</depend> -->
36
+ <!-- Note that this is equivalent to the following: -->
37
+ <!-- <build_depend>roscpp</build_depend> -->
38
+ <!-- <exec_depend>roscpp</exec_depend> -->
39
+ <!-- Use build_depend for packages you need at compile time: -->
40
+ <!-- <build_depend>message_generation</build_depend> -->
41
+ <!-- Use build_export_depend for packages you need in order to build against this package: -->
42
+ <!-- <build_export_depend>message_generation</build_export_depend> -->
43
+ <!-- Use buildtool_depend for build tool packages: -->
44
+ <!-- <buildtool_depend>catkin</buildtool_depend> -->
45
+ <!-- Use exec_depend for packages you need at runtime: -->
46
+ <!-- <exec_depend>message_runtime</exec_depend> -->
47
+ <!-- Use test_depend for packages you need only for testing: -->
48
+ <!-- <test_depend>gtest</test_depend> -->
49
+ <!-- Use doc_depend for packages you need only for building documentation: -->
50
+ <!-- <doc_depend>doxygen</doc_depend> -->
51
+ <buildtool_depend>catkin</buildtool_depend>
52
+ <build_depend>cv_bridge</build_depend>
53
+ <build_depend>image_transport</build_depend>
54
+ <build_depend>roscpp</build_depend>
55
+ <build_depend>rospy</build_depend>
56
+ <build_depend>sensor_msgs</build_depend>
57
+ <build_depend>std_msgs</build_depend>
58
+ <build_export_depend>cv_bridge</build_export_depend>
59
+ <build_export_depend>image_transport</build_export_depend>
60
+ <build_export_depend>roscpp</build_export_depend>
61
+ <build_export_depend>rospy</build_export_depend>
62
+ <build_export_depend>sensor_msgs</build_export_depend>
63
+ <build_export_depend>std_msgs</build_export_depend>
64
+ <exec_depend>cv_bridge</exec_depend>
65
+ <exec_depend>image_transport</exec_depend>
66
+ <exec_depend>roscpp</exec_depend>
67
+ <exec_depend>rospy</exec_depend>
68
+ <exec_depend>sensor_msgs</exec_depend>
69
+ <exec_depend>std_msgs</exec_depend>
70
+
71
+
72
+ <!-- The export tag contains other, unspecified, tags -->
73
+ <export>
74
+ <!-- Other tools can request additional information be placed here -->
75
+
76
+ </export>
77
+ </package>
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import print_function
3
+
4
+ import roslib
5
+ #roslib.load_manifest('my_package')
6
+ import sys
7
+ import rospy
8
+ import cv2
9
+ import numpy as np
10
+ from std_msgs.msg import String
11
+ from sensor_msgs.msg import Image
12
+ from cv_bridge import CvBridge, CvBridgeError
13
+
14
+ class video_show:
15
+
16
+ def __init__(self):
17
+ self.show_output = rospy.get_param('~show_output', True)
18
+ self.save_output = rospy.get_param('~save_output', False)
19
+ self.output_video_file = rospy.get_param('~output_video_file','result.mp4')
20
+ # rospy.loginfo(f"Listener - params: show_output={self.show_output}, save_output={self.save_output}, output_video_file={self.output_video_file}")
21
+
22
+ self.bridge = CvBridge()
23
+ self.image_sub = rospy.Subscriber("midas_topic", Image, self.callback)
24
+
25
+ def callback(self, data):
26
+ try:
27
+ cv_image = self.bridge.imgmsg_to_cv2(data)
28
+ except CvBridgeError as e:
29
+ print(e)
30
+ return
31
+
32
+ if cv_image.size == 0:
33
+ return
34
+
35
+ rospy.loginfo("Listener: Received new frame")
36
+ cv_image = cv_image.astype("uint8")
37
+
38
+ if self.show_output==True:
39
+ cv2.imshow("video_show", cv_image)
40
+ cv2.waitKey(10)
41
+
42
+ if self.save_output==True:
43
+ if self.video_writer_init==False:
44
+ fourcc = cv2.VideoWriter_fourcc(*'XVID')
45
+ self.out = cv2.VideoWriter(self.output_video_file, fourcc, 25, (cv_image.shape[1], cv_image.shape[0]))
46
+
47
+ self.out.write(cv_image)
48
+
49
+
50
+
51
+ def main(args):
52
+ rospy.init_node('listener', anonymous=True)
53
+ ic = video_show()
54
+ try:
55
+ rospy.spin()
56
+ except KeyboardInterrupt:
57
+ print("Shutting down")
58
+ cv2.destroyAllWindows()
59
+
60
+ if __name__ == '__main__':
61
+ main(sys.argv)
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener_original.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import print_function
3
+
4
+ import roslib
5
+ #roslib.load_manifest('my_package')
6
+ import sys
7
+ import rospy
8
+ import cv2
9
+ import numpy as np
10
+ from std_msgs.msg import String
11
+ from sensor_msgs.msg import Image
12
+ from cv_bridge import CvBridge, CvBridgeError
13
+
14
+ class video_show:
15
+
16
+ def __init__(self):
17
+ self.show_output = rospy.get_param('~show_output', True)
18
+ self.save_output = rospy.get_param('~save_output', False)
19
+ self.output_video_file = rospy.get_param('~output_video_file','result.mp4')
20
+ # rospy.loginfo(f"Listener original - params: show_output={self.show_output}, save_output={self.save_output}, output_video_file={self.output_video_file}")
21
+
22
+ self.bridge = CvBridge()
23
+ self.image_sub = rospy.Subscriber("image_topic", Image, self.callback)
24
+
25
+ def callback(self, data):
26
+ try:
27
+ cv_image = self.bridge.imgmsg_to_cv2(data)
28
+ except CvBridgeError as e:
29
+ print(e)
30
+ return
31
+
32
+ if cv_image.size == 0:
33
+ return
34
+
35
+ rospy.loginfo("Listener_original: Received new frame")
36
+ cv_image = cv_image.astype("uint8")
37
+
38
+ if self.show_output==True:
39
+ cv2.imshow("video_show_orig", cv_image)
40
+ cv2.waitKey(10)
41
+
42
+ if self.save_output==True:
43
+ if self.video_writer_init==False:
44
+ fourcc = cv2.VideoWriter_fourcc(*'XVID')
45
+ self.out = cv2.VideoWriter(self.output_video_file, fourcc, 25, (cv_image.shape[1], cv_image.shape[0]))
46
+
47
+ self.out.write(cv_image)
48
+
49
+
50
+
51
+ def main(args):
52
+ rospy.init_node('listener_original', anonymous=True)
53
+ ic = video_show()
54
+ try:
55
+ rospy.spin()
56
+ except KeyboardInterrupt:
57
+ print("Shutting down")
58
+ cv2.destroyAllWindows()
59
+
60
+ if __name__ == '__main__':
61
+ main(sys.argv)
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/talker.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+
4
+ import roslib
5
+ #roslib.load_manifest('my_package')
6
+ import sys
7
+ import rospy
8
+ import cv2
9
+ from std_msgs.msg import String
10
+ from sensor_msgs.msg import Image
11
+ from cv_bridge import CvBridge, CvBridgeError
12
+
13
+
14
+ def talker():
15
+ rospy.init_node('talker', anonymous=True)
16
+
17
+ use_camera = rospy.get_param('~use_camera', False)
18
+ input_video_file = rospy.get_param('~input_video_file','test.mp4')
19
+ # rospy.loginfo(f"Talker - params: use_camera={use_camera}, input_video_file={input_video_file}")
20
+
21
+ # rospy.loginfo("Talker: Trying to open a video stream")
22
+ if use_camera == True:
23
+ cap = cv2.VideoCapture(0)
24
+ else:
25
+ cap = cv2.VideoCapture(input_video_file)
26
+
27
+ pub = rospy.Publisher('image_topic', Image, queue_size=1)
28
+ rate = rospy.Rate(30) # 30hz
29
+ bridge = CvBridge()
30
+
31
+ while not rospy.is_shutdown():
32
+ ret, cv_image = cap.read()
33
+ if ret==False:
34
+ print("Talker: Video is over")
35
+ rospy.loginfo("Video is over")
36
+ return
37
+
38
+ try:
39
+ image = bridge.cv2_to_imgmsg(cv_image, "bgr8")
40
+ except CvBridgeError as e:
41
+ rospy.logerr("Talker: cv2image conversion failed: ", e)
42
+ print(e)
43
+ continue
44
+
45
+ rospy.loginfo("Talker: Publishing frame")
46
+ pub.publish(image)
47
+ rate.sleep()
48
+
49
+ if __name__ == '__main__':
50
+ try:
51
+ talker()
52
+ except rospy.ROSInterruptException:
53
+ pass
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/src/main.cpp ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <ros/ros.h>
2
+ #include <image_transport/image_transport.h>
3
+ #include <cv_bridge/cv_bridge.h>
4
+ #include <sensor_msgs/image_encodings.h>
5
+
6
+ #include <initializer_list>
7
+
8
+ #include <torch/script.h> // One-stop header.
9
+
10
+ #include <opencv2/core/version.hpp>
11
+ #include <opencv2/imgproc/imgproc.hpp>
12
+ #include <opencv2/opencv.hpp>
13
+ #include <opencv2/opencv_modules.hpp>
14
+
15
+ #include <opencv2/highgui/highgui.hpp>
16
+ #include <opencv2/video/video.hpp>
17
+
18
+ // includes for OpenCV >= 3.x
19
+ #ifndef CV_VERSION_EPOCH
20
+ #include <opencv2/core/types.hpp>
21
+ #include <opencv2/videoio/videoio.hpp>
22
+ #include <opencv2/imgcodecs/imgcodecs.hpp>
23
+ #endif
24
+
25
+ // OpenCV includes for OpenCV 2.x
26
+ #ifdef CV_VERSION_EPOCH
27
+ #include <opencv2/highgui/highgui_c.h>
28
+ #include <opencv2/imgproc/imgproc_c.h>
29
+ #include <opencv2/core/types_c.h>
30
+ #include <opencv2/core/version.hpp>
31
+ #endif
32
+
33
+ static const std::string OPENCV_WINDOW = "Image window";
34
+
35
+ class Midas
36
+ {
37
+ ros::NodeHandle nh_;
38
+ image_transport::ImageTransport it_;
39
+ image_transport::Subscriber image_sub_;
40
+ image_transport::Publisher image_pub_;
41
+
42
+ torch::jit::script::Module module;
43
+ torch::Device device;
44
+
45
+ auto ToTensor(cv::Mat img, bool show_output = false, bool unsqueeze = false, int unsqueeze_dim = 0)
46
+ {
47
+ //std::cout << "image shape: " << img.size() << std::endl;
48
+ at::Tensor tensor_image = torch::from_blob(img.data, { img.rows, img.cols, 3 }, at::kByte);
49
+
50
+ if (unsqueeze)
51
+ {
52
+ tensor_image.unsqueeze_(unsqueeze_dim);
53
+ //std::cout << "tensors new shape: " << tensor_image.sizes() << std::endl;
54
+ }
55
+
56
+ if (show_output)
57
+ {
58
+ std::cout << tensor_image.slice(2, 0, 1) << std::endl;
59
+ }
60
+ //std::cout << "tenor shape: " << tensor_image.sizes() << std::endl;
61
+ return tensor_image;
62
+ }
63
+
64
+ auto ToInput(at::Tensor tensor_image)
65
+ {
66
+ // Create a vector of inputs.
67
+ return std::vector<torch::jit::IValue>{tensor_image};
68
+ }
69
+
70
+ auto ToCvImage(at::Tensor tensor, int cv_type = CV_8UC3)
71
+ {
72
+ int width = tensor.sizes()[0];
73
+ int height = tensor.sizes()[1];
74
+ try
75
+ {
76
+ cv::Mat output_mat;
77
+ if (cv_type == CV_8UC4 || cv_type == CV_8UC3 || cv_type == CV_8UC2 || cv_type == CV_8UC1) {
78
+ cv::Mat cv_image(cv::Size{ height, width }, cv_type, tensor.data_ptr<uchar>());
79
+ output_mat = cv_image;
80
+ }
81
+ else if (cv_type == CV_32FC4 || cv_type == CV_32FC3 || cv_type == CV_32FC2 || cv_type == CV_32FC1) {
82
+ cv::Mat cv_image(cv::Size{ height, width }, cv_type, tensor.data_ptr<float>());
83
+ output_mat = cv_image;
84
+ }
85
+ else if (cv_type == CV_64FC4 || cv_type == CV_64FC3 || cv_type == CV_64FC2 || cv_type == CV_64FC1) {
86
+ cv::Mat cv_image(cv::Size{ height, width }, cv_type, tensor.data_ptr<double>());
87
+ output_mat = cv_image;
88
+ }
89
+
90
+ //show_image(output_mat, "converted image from tensor");
91
+ return output_mat.clone();
92
+ }
93
+ catch (const c10::Error& e)
94
+ {
95
+ std::cout << "an error has occured : " << e.msg() << std::endl;
96
+ }
97
+ return cv::Mat(height, width, CV_8UC3);
98
+ }
99
+
100
+ std::string input_topic, output_topic, model_name;
101
+ bool out_orig_size;
102
+ int net_width, net_height;
103
+ torch::NoGradGuard guard;
104
+ at::Tensor mean, std;
105
+ at::Tensor output, tensor;
106
+
107
+ public:
108
+ Midas()
109
+ : nh_(), it_(nh_), device(torch::Device(torch::kCPU))
110
+ {
111
+ ros::param::param<std::string>("~input_topic", input_topic, "image_topic");
112
+ ros::param::param<std::string>("~output_topic", output_topic, "midas_topic");
113
+ ros::param::param<std::string>("~model_name", model_name, "model-small-traced.pt");
114
+ ros::param::param<bool>("~out_orig_size", out_orig_size, true);
115
+ ros::param::param<int>("~net_width", net_width, 256);
116
+ ros::param::param<int>("~net_height", net_height, 256);
117
+
118
+ std::cout << ", input_topic = " << input_topic <<
119
+ ", output_topic = " << output_topic <<
120
+ ", model_name = " << model_name <<
121
+ ", out_orig_size = " << out_orig_size <<
122
+ ", net_width = " << net_width <<
123
+ ", net_height = " << net_height <<
124
+ std::endl;
125
+
126
+ // Subscrive to input video feed and publish output video feed
127
+ image_sub_ = it_.subscribe(input_topic, 1, &Midas::imageCb, this);
128
+ image_pub_ = it_.advertise(output_topic, 1);
129
+
130
+ std::cout << "Try to load torchscript model \n";
131
+
132
+ try {
133
+ // Deserialize the ScriptModule from a file using torch::jit::load().
134
+ module = torch::jit::load(model_name);
135
+ }
136
+ catch (const c10::Error& e) {
137
+ std::cerr << "error loading the model\n";
138
+ exit(0);
139
+ }
140
+
141
+ std::cout << "ok\n";
142
+
143
+ try {
144
+ module.eval();
145
+ torch::jit::getProfilingMode() = false;
146
+ torch::jit::setGraphExecutorOptimize(true);
147
+
148
+ mean = torch::tensor({ 0.485, 0.456, 0.406 });
149
+ std = torch::tensor({ 0.229, 0.224, 0.225 });
150
+
151
+ if (torch::hasCUDA()) {
152
+ std::cout << "cuda is available" << std::endl;
153
+ at::globalContext().setBenchmarkCuDNN(true);
154
+ device = torch::Device(torch::kCUDA);
155
+ module.to(device);
156
+ mean = mean.to(device);
157
+ std = std.to(device);
158
+ }
159
+ }
160
+ catch (const c10::Error& e)
161
+ {
162
+ std::cerr << " module initialization: " << e.msg() << std::endl;
163
+ }
164
+ }
165
+
166
+ ~Midas()
167
+ {
168
+ }
169
+
170
+ void imageCb(const sensor_msgs::ImageConstPtr& msg)
171
+ {
172
+ cv_bridge::CvImagePtr cv_ptr;
173
+ try
174
+ {
175
+ // sensor_msgs::Image to cv::Mat
176
+ cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::RGB8);
177
+ }
178
+ catch (cv_bridge::Exception& e)
179
+ {
180
+ ROS_ERROR("cv_bridge exception: %s", e.what());
181
+ return;
182
+ }
183
+
184
+ // pre-processing
185
+ auto tensor_cpu = ToTensor(cv_ptr->image); // OpenCV-image -> Libtorch-tensor
186
+
187
+ try {
188
+ tensor = tensor_cpu.to(device); // move to device (CPU or GPU)
189
+
190
+ tensor = tensor.toType(c10::kFloat);
191
+ tensor = tensor.permute({ 2, 0, 1 }); // HWC -> CHW
192
+ tensor = tensor.unsqueeze(0);
193
+ tensor = at::upsample_bilinear2d(tensor, { net_height, net_width }, true); // resize
194
+ tensor = tensor.squeeze(0);
195
+ tensor = tensor.permute({ 1, 2, 0 }); // CHW -> HWC
196
+
197
+ tensor = tensor.div(255).sub(mean).div(std); // normalization
198
+ tensor = tensor.permute({ 2, 0, 1 }); // HWC -> CHW
199
+ tensor.unsqueeze_(0); // CHW -> NCHW
200
+ }
201
+ catch (const c10::Error& e)
202
+ {
203
+ std::cerr << " pre-processing exception: " << e.msg() << std::endl;
204
+ return;
205
+ }
206
+
207
+ auto input_to_net = ToInput(tensor); // input to the network
208
+
209
+ // inference
210
+ output;
211
+ try {
212
+ output = module.forward(input_to_net).toTensor(); // run inference
213
+ }
214
+ catch (const c10::Error& e)
215
+ {
216
+ std::cerr << " module.forward() exception: " << e.msg() << std::endl;
217
+ return;
218
+ }
219
+
220
+ output = output.detach().to(torch::kF32);
221
+
222
+ // move to CPU temporary
223
+ at::Tensor output_tmp = output;
224
+ output_tmp = output_tmp.to(torch::kCPU);
225
+
226
+ // normalization
227
+ float min_val = std::numeric_limits<float>::max();
228
+ float max_val = std::numeric_limits<float>::min();
229
+
230
+ for (int i = 0; i < net_width * net_height; ++i) {
231
+ float val = output_tmp.data_ptr<float>()[i];
232
+ if (min_val > val) min_val = val;
233
+ if (max_val < val) max_val = val;
234
+ }
235
+ float range_val = max_val - min_val;
236
+
237
+ output = output.sub(min_val).div(range_val).mul(255.0F).clamp(0, 255).to(torch::kF32); // .to(torch::kU8);
238
+
239
+ // resize to the original size if required
240
+ if (out_orig_size) {
241
+ try {
242
+ output = at::upsample_bilinear2d(output.unsqueeze(0), { cv_ptr->image.size().height, cv_ptr->image.size().width }, true);
243
+ output = output.squeeze(0);
244
+ }
245
+ catch (const c10::Error& e)
246
+ {
247
+ std::cout << " upsample_bilinear2d() exception: " << e.msg() << std::endl;
248
+ return;
249
+ }
250
+ }
251
+ output = output.permute({ 1, 2, 0 }).to(torch::kCPU);
252
+
253
+ int cv_type = CV_32FC1; // CV_8UC1;
254
+ auto cv_img = ToCvImage(output, cv_type);
255
+
256
+ sensor_msgs::Image img_msg;
257
+
258
+ try {
259
+ // cv::Mat -> sensor_msgs::Image
260
+ std_msgs::Header header; // empty header
261
+ header.seq = 0; // user defined counter
262
+ header.stamp = ros::Time::now();// time
263
+ //cv_bridge::CvImage img_bridge = cv_bridge::CvImage(header, sensor_msgs::image_encodings::MONO8, cv_img);
264
+ cv_bridge::CvImage img_bridge = cv_bridge::CvImage(header, sensor_msgs::image_encodings::TYPE_32FC1, cv_img);
265
+
266
+ img_bridge.toImageMsg(img_msg); // cv_bridge -> sensor_msgs::Image
267
+ }
268
+ catch (cv_bridge::Exception& e)
269
+ {
270
+ ROS_ERROR("cv_bridge exception: %s", e.what());
271
+ return;
272
+ }
273
+
274
+ // Output modified video stream
275
+ image_pub_.publish(img_msg);
276
+ }
277
+ };
278
+
279
+ int main(int argc, char** argv)
280
+ {
281
+ ros::init(argc, argv, "midas", ros::init_options::AnonymousName);
282
+ Midas ic;
283
+ ros::spin();
284
+ return 0;
285
+ }
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/run_talker_listener_test.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # place any test.mp4 file near with this file
2
+
3
+ # roscore
4
+ # rosnode kill -a
5
+
6
+ source ~/catkin_ws/devel/setup.bash
7
+
8
+ roscore &
9
+ P1=$!
10
+ rosrun midas_cpp talker.py &
11
+ P2=$!
12
+ rosrun midas_cpp listener_original.py &
13
+ P3=$!
14
+ rosrun midas_cpp listener.py &
15
+ P4=$!
16
+ wait $P1 $P2 $P3 $P4
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/run.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Compute depth maps for images in the input folder.
2
+ """
3
+ import os
4
+ import glob
5
+ import torch
6
+ import utils
7
+ import cv2
8
+ import argparse
9
+ import time
10
+
11
+ import numpy as np
12
+
13
+ from imutils.video import VideoStream
14
+ from midas.model_loader import default_models, load_model
15
+
16
+ first_execution = True
17
+ def process(device, model, model_type, image, input_size, target_size, optimize, use_camera):
18
+ """
19
+ Run the inference and interpolate.
20
+
21
+ Args:
22
+ device (torch.device): the torch device used
23
+ model: the model used for inference
24
+ model_type: the type of the model
25
+ image: the image fed into the neural network
26
+ input_size: the size (width, height) of the neural network input (for OpenVINO)
27
+ target_size: the size (width, height) the neural network output is interpolated to
28
+ optimize: optimize the model to half-floats on CUDA?
29
+ use_camera: is the camera used?
30
+
31
+ Returns:
32
+ the prediction
33
+ """
34
+ global first_execution
35
+
36
+ if "openvino" in model_type:
37
+ if first_execution or not use_camera:
38
+ print(f" Input resized to {input_size[0]}x{input_size[1]} before entering the encoder")
39
+ first_execution = False
40
+
41
+ sample = [np.reshape(image, (1, 3, *input_size))]
42
+ prediction = model(sample)[model.output(0)][0]
43
+ prediction = cv2.resize(prediction, dsize=target_size,
44
+ interpolation=cv2.INTER_CUBIC)
45
+ else:
46
+ sample = torch.from_numpy(image).to(device).unsqueeze(0)
47
+
48
+ if optimize and device == torch.device("cuda"):
49
+ if first_execution:
50
+ print(" Optimization to half-floats activated. Use with caution, because models like Swin require\n"
51
+ " float precision to work properly and may yield non-finite depth values to some extent for\n"
52
+ " half-floats.")
53
+ sample = sample.to(memory_format=torch.channels_last)
54
+ sample = sample.half()
55
+
56
+ if first_execution or not use_camera:
57
+ height, width = sample.shape[2:]
58
+ print(f" Input resized to {width}x{height} before entering the encoder")
59
+ first_execution = False
60
+
61
+ prediction = model.forward(sample)
62
+ prediction = (
63
+ torch.nn.functional.interpolate(
64
+ prediction.unsqueeze(1),
65
+ size=target_size[::-1],
66
+ mode="bicubic",
67
+ align_corners=False,
68
+ )
69
+ .squeeze()
70
+ .cpu()
71
+ .numpy()
72
+ )
73
+
74
+ return prediction
75
+
76
+
77
+ def create_side_by_side(image, depth, grayscale):
78
+ """
79
+ Take an RGB image and depth map and place them side by side. This includes a proper normalization of the depth map
80
+ for better visibility.
81
+
82
+ Args:
83
+ image: the RGB image
84
+ depth: the depth map
85
+ grayscale: use a grayscale colormap?
86
+
87
+ Returns:
88
+ the image and depth map place side by side
89
+ """
90
+ depth_min = depth.min()
91
+ depth_max = depth.max()
92
+ normalized_depth = 255 * (depth - depth_min) / (depth_max - depth_min)
93
+ normalized_depth *= 3
94
+
95
+ right_side = np.repeat(np.expand_dims(normalized_depth, 2), 3, axis=2) / 3
96
+ if not grayscale:
97
+ right_side = cv2.applyColorMap(np.uint8(right_side), cv2.COLORMAP_INFERNO)
98
+
99
+ if image is None:
100
+ return right_side
101
+ else:
102
+ return np.concatenate((image, right_side), axis=1)
103
+
104
+
105
+ def run(input_path, output_path, model_path, model_type="dpt_beit_large_512", optimize=False, side=False, height=None,
106
+ square=False, grayscale=False):
107
+ """Run MonoDepthNN to compute depth maps.
108
+
109
+ Args:
110
+ input_path (str): path to input folder
111
+ output_path (str): path to output folder
112
+ model_path (str): path to saved model
113
+ model_type (str): the model type
114
+ optimize (bool): optimize the model to half-floats on CUDA?
115
+ side (bool): RGB and depth side by side in output images?
116
+ height (int): inference encoder image height
117
+ square (bool): resize to a square resolution?
118
+ grayscale (bool): use a grayscale colormap?
119
+ """
120
+ print("Initialize")
121
+
122
+ # select device
123
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
124
+ print("Device: %s" % device)
125
+
126
+ model, transform, net_w, net_h = load_model(device, model_path, model_type, optimize, height, square)
127
+
128
+ # get input
129
+ if input_path is not None:
130
+ image_names = glob.glob(os.path.join(input_path, "*"))
131
+ num_images = len(image_names)
132
+ else:
133
+ print("No input path specified. Grabbing images from camera.")
134
+
135
+ # create output folder
136
+ if output_path is not None:
137
+ os.makedirs(output_path, exist_ok=True)
138
+
139
+ print("Start processing")
140
+
141
+ if input_path is not None:
142
+ if output_path is None:
143
+ print("Warning: No output path specified. Images will be processed but not shown or stored anywhere.")
144
+ for index, image_name in enumerate(image_names):
145
+
146
+ print(" Processing {} ({}/{})".format(image_name, index + 1, num_images))
147
+
148
+ # input
149
+ original_image_rgb = utils.read_image(image_name) # in [0, 1]
150
+ image = transform({"image": original_image_rgb})["image"]
151
+
152
+ # compute
153
+ with torch.no_grad():
154
+ prediction = process(device, model, model_type, image, (net_w, net_h), original_image_rgb.shape[1::-1],
155
+ optimize, False)
156
+
157
+ # output
158
+ if output_path is not None:
159
+ filename = os.path.join(
160
+ output_path, os.path.splitext(os.path.basename(image_name))[0] + '-' + model_type
161
+ )
162
+ if not side:
163
+ utils.write_depth(filename, prediction, grayscale, bits=2)
164
+ else:
165
+ original_image_bgr = np.flip(original_image_rgb, 2)
166
+ content = create_side_by_side(original_image_bgr*255, prediction, grayscale)
167
+ cv2.imwrite(filename + ".png", content)
168
+ utils.write_pfm(filename + ".pfm", prediction.astype(np.float32))
169
+
170
+ else:
171
+ with torch.no_grad():
172
+ fps = 1
173
+ video = VideoStream(0).start()
174
+ time_start = time.time()
175
+ frame_index = 0
176
+ while True:
177
+ frame = video.read()
178
+ if frame is not None:
179
+ original_image_rgb = np.flip(frame, 2) # in [0, 255] (flip required to get RGB)
180
+ image = transform({"image": original_image_rgb/255})["image"]
181
+
182
+ prediction = process(device, model, model_type, image, (net_w, net_h),
183
+ original_image_rgb.shape[1::-1], optimize, True)
184
+
185
+ original_image_bgr = np.flip(original_image_rgb, 2) if side else None
186
+ content = create_side_by_side(original_image_bgr, prediction, grayscale)
187
+ cv2.imshow('MiDaS Depth Estimation - Press Escape to close window ', content/255)
188
+
189
+ if output_path is not None:
190
+ filename = os.path.join(output_path, 'Camera' + '-' + model_type + '_' + str(frame_index))
191
+ cv2.imwrite(filename + ".png", content)
192
+
193
+ alpha = 0.1
194
+ if time.time()-time_start > 0:
195
+ fps = (1 - alpha) * fps + alpha * 1 / (time.time()-time_start) # exponential moving average
196
+ time_start = time.time()
197
+ print(f"\rFPS: {round(fps,2)}", end="")
198
+
199
+ if cv2.waitKey(1) == 27: # Escape key
200
+ break
201
+
202
+ frame_index += 1
203
+ print()
204
+
205
+ print("Finished")
206
+
207
+
208
+ if __name__ == "__main__":
209
+ parser = argparse.ArgumentParser()
210
+
211
+ parser.add_argument('-i', '--input_path',
212
+ default=None,
213
+ help='Folder with input images (if no input path is specified, images are tried to be grabbed '
214
+ 'from camera)'
215
+ )
216
+
217
+ parser.add_argument('-o', '--output_path',
218
+ default=None,
219
+ help='Folder for output images'
220
+ )
221
+
222
+ parser.add_argument('-m', '--model_weights',
223
+ default=None,
224
+ help='Path to the trained weights of model'
225
+ )
226
+
227
+ parser.add_argument('-t', '--model_type',
228
+ default='dpt_beit_large_512',
229
+ help='Model type: '
230
+ 'dpt_beit_large_512, dpt_beit_large_384, dpt_beit_base_384, dpt_swin2_large_384, '
231
+ 'dpt_swin2_base_384, dpt_swin2_tiny_256, dpt_swin_large_384, dpt_next_vit_large_384, '
232
+ 'dpt_levit_224, dpt_large_384, dpt_hybrid_384, midas_v21_384, midas_v21_small_256 or '
233
+ 'openvino_midas_v21_small_256'
234
+ )
235
+
236
+ parser.add_argument('-s', '--side',
237
+ action='store_true',
238
+ help='Output images contain RGB and depth images side by side'
239
+ )
240
+
241
+ parser.add_argument('--optimize', dest='optimize', action='store_true', help='Use half-float optimization')
242
+ parser.set_defaults(optimize=False)
243
+
244
+ parser.add_argument('--height',
245
+ type=int, default=None,
246
+ help='Preferred height of images feed into the encoder during inference. Note that the '
247
+ 'preferred height may differ from the actual height, because an alignment to multiples of '
248
+ '32 takes place. Many models support only the height chosen during training, which is '
249
+ 'used automatically if this parameter is not set.'
250
+ )
251
+ parser.add_argument('--square',
252
+ action='store_true',
253
+ help='Option to resize images to a square resolution by changing their widths when images are '
254
+ 'fed into the encoder during inference. If this parameter is not set, the aspect ratio of '
255
+ 'images is tried to be preserved if supported by the model.'
256
+ )
257
+ parser.add_argument('--grayscale',
258
+ action='store_true',
259
+ help='Use a grayscale colormap instead of the inferno one. Although the inferno colormap, '
260
+ 'which is used by default, is better for visibility, it does not allow storing 16-bit '
261
+ 'depth values in PNGs but only 8-bit ones due to the precision limitation of this '
262
+ 'colormap.'
263
+ )
264
+
265
+ args = parser.parse_args()
266
+
267
+
268
+ if args.model_weights is None:
269
+ args.model_weights = default_models[args.model_type]
270
+
271
+ # set torch options
272
+ torch.backends.cudnn.enabled = True
273
+ torch.backends.cudnn.benchmark = True
274
+
275
+ # compute depth maps
276
+ run(args.input_path, args.output_path, args.model_weights, args.model_type, args.optimize, args.side, args.height,
277
+ args.square, args.grayscale)
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/README.md ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer
2
+
3
+ ### TensorFlow inference using `.pb` and `.onnx` models
4
+
5
+ 1. [Run inference on TensorFlow-model by using TensorFlow](#run-inference-on-tensorflow-model-by-using-tensorFlow)
6
+
7
+ 2. [Run inference on ONNX-model by using TensorFlow](#run-inference-on-onnx-model-by-using-tensorflow)
8
+
9
+ 3. [Make ONNX model from downloaded Pytorch model file](#make-onnx-model-from-downloaded-pytorch-model-file)
10
+
11
+
12
+ ### Run inference on TensorFlow-model by using TensorFlow
13
+
14
+ 1) Download the model weights [model-f6b98070.pb](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-f6b98070.pb)
15
+ and [model-small.pb](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-small.pb) and place the
16
+ file in the `/tf/` folder.
17
+
18
+ 2) Set up dependencies:
19
+
20
+ ```shell
21
+ # install OpenCV
22
+ pip install --upgrade pip
23
+ pip install opencv-python
24
+
25
+ # install TensorFlow
26
+ pip install -I grpcio tensorflow==2.3.0 tensorflow-addons==0.11.2 numpy==1.18.0
27
+ ```
28
+
29
+ #### Usage
30
+
31
+ 1) Place one or more input images in the folder `tf/input`.
32
+
33
+ 2) Run the model:
34
+
35
+ ```shell
36
+ python tf/run_pb.py
37
+ ```
38
+
39
+ Or run the small model:
40
+
41
+ ```shell
42
+ python tf/run_pb.py --model_weights model-small.pb --model_type small
43
+ ```
44
+
45
+ 3) The resulting inverse depth maps are written to the `tf/output` folder.
46
+
47
+
48
+ ### Run inference on ONNX-model by using ONNX-Runtime
49
+
50
+ 1) Download the model weights [model-f6b98070.onnx](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-f6b98070.onnx)
51
+ and [model-small.onnx](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-small.onnx) and place the
52
+ file in the `/tf/` folder.
53
+
54
+ 2) Set up dependencies:
55
+
56
+ ```shell
57
+ # install OpenCV
58
+ pip install --upgrade pip
59
+ pip install opencv-python
60
+
61
+ # install ONNX
62
+ pip install onnx==1.7.0
63
+
64
+ # install ONNX Runtime
65
+ pip install onnxruntime==1.5.2
66
+ ```
67
+
68
+ #### Usage
69
+
70
+ 1) Place one or more input images in the folder `tf/input`.
71
+
72
+ 2) Run the model:
73
+
74
+ ```shell
75
+ python tf/run_onnx.py
76
+ ```
77
+
78
+ Or run the small model:
79
+
80
+ ```shell
81
+ python tf/run_onnx.py --model_weights model-small.onnx --model_type small
82
+ ```
83
+
84
+ 3) The resulting inverse depth maps are written to the `tf/output` folder.
85
+
86
+
87
+
88
+ ### Make ONNX model from downloaded Pytorch model file
89
+
90
+ 1) Download the model weights [model-f6b98070.pt](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-f6b98070.pt) and place the
91
+ file in the root folder.
92
+
93
+ 2) Set up dependencies:
94
+
95
+ ```shell
96
+ # install OpenCV
97
+ pip install --upgrade pip
98
+ pip install opencv-python
99
+
100
+ # install PyTorch TorchVision
101
+ pip install -I torch==1.7.0 torchvision==0.8.0
102
+
103
+ # install TensorFlow
104
+ pip install -I grpcio tensorflow==2.3.0 tensorflow-addons==0.11.2 numpy==1.18.0
105
+
106
+ # install ONNX
107
+ pip install onnx==1.7.0
108
+
109
+ # install ONNX-TensorFlow
110
+ git clone https://github.com/onnx/onnx-tensorflow.git
111
+ cd onnx-tensorflow
112
+ git checkout 095b51b88e35c4001d70f15f80f31014b592b81e
113
+ pip install -e .
114
+ ```
115
+
116
+ #### Usage
117
+
118
+ 1) Run the converter:
119
+
120
+ ```shell
121
+ python tf/make_onnx_model.py
122
+ ```
123
+
124
+ 2) The resulting `model-f6b98070.onnx` file is written to the `/tf/` folder.
125
+
126
+
127
+ ### Requirements
128
+
129
+ The code was tested with Python 3.6.9, PyTorch 1.5.1, TensorFlow 2.2.0, TensorFlow-addons 0.8.3, ONNX 1.7.0, ONNX-TensorFlow (GitHub-master-17.07.2020) and OpenCV 4.3.0.
130
+
131
+ ### Citation
132
+
133
+ Please cite our paper if you use this code or any of the models:
134
+ ```
135
+ @article{Ranftl2019,
136
+ author = {Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun},
137
+ title = {Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer},
138
+ journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},
139
+ year = {2020},
140
+ }
141
+ ```
142
+
143
+ ### License
144
+
145
+ MIT License
146
+
147
+
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/input/.placeholder ADDED
File without changes
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/make_onnx_model.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Compute depth maps for images in the input folder.
2
+ """
3
+ import os
4
+ import ntpath
5
+ import glob
6
+ import torch
7
+ import utils
8
+ import cv2
9
+ import numpy as np
10
+ from torchvision.transforms import Compose, Normalize
11
+ from torchvision import transforms
12
+
13
+ from shutil import copyfile
14
+ import fileinput
15
+ import sys
16
+ sys.path.append(os.getcwd() + '/..')
17
+
18
+ def modify_file():
19
+ modify_filename = '../midas/blocks.py'
20
+ copyfile(modify_filename, modify_filename+'.bak')
21
+
22
+ with open(modify_filename, 'r') as file :
23
+ filedata = file.read()
24
+
25
+ filedata = filedata.replace('align_corners=True', 'align_corners=False')
26
+ filedata = filedata.replace('import torch.nn as nn', 'import torch.nn as nn\nimport torchvision.models as models')
27
+ filedata = filedata.replace('torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl")', 'models.resnext101_32x8d()')
28
+
29
+ with open(modify_filename, 'w') as file:
30
+ file.write(filedata)
31
+
32
+ def restore_file():
33
+ modify_filename = '../midas/blocks.py'
34
+ copyfile(modify_filename+'.bak', modify_filename)
35
+
36
+ modify_file()
37
+
38
+ from midas.midas_net import MidasNet
39
+ from midas.transforms import Resize, NormalizeImage, PrepareForNet
40
+
41
+ restore_file()
42
+
43
+
44
+ class MidasNet_preprocessing(MidasNet):
45
+ """Network for monocular depth estimation.
46
+ """
47
+ def forward(self, x):
48
+ """Forward pass.
49
+
50
+ Args:
51
+ x (tensor): input data (image)
52
+
53
+ Returns:
54
+ tensor: depth
55
+ """
56
+
57
+ mean = torch.tensor([0.485, 0.456, 0.406])
58
+ std = torch.tensor([0.229, 0.224, 0.225])
59
+ x.sub_(mean[None, :, None, None]).div_(std[None, :, None, None])
60
+
61
+ return MidasNet.forward(self, x)
62
+
63
+
64
+ def run(model_path):
65
+ """Run MonoDepthNN to compute depth maps.
66
+
67
+ Args:
68
+ model_path (str): path to saved model
69
+ """
70
+ print("initialize")
71
+
72
+ # select device
73
+
74
+ # load network
75
+ #model = MidasNet(model_path, non_negative=True)
76
+ model = MidasNet_preprocessing(model_path, non_negative=True)
77
+
78
+ model.eval()
79
+
80
+ print("start processing")
81
+
82
+ # input
83
+ img_input = np.zeros((3, 384, 384), np.float32)
84
+
85
+ # compute
86
+ with torch.no_grad():
87
+ sample = torch.from_numpy(img_input).unsqueeze(0)
88
+ prediction = model.forward(sample)
89
+ prediction = (
90
+ torch.nn.functional.interpolate(
91
+ prediction.unsqueeze(1),
92
+ size=img_input.shape[:2],
93
+ mode="bicubic",
94
+ align_corners=False,
95
+ )
96
+ .squeeze()
97
+ .cpu()
98
+ .numpy()
99
+ )
100
+
101
+ torch.onnx.export(model, sample, ntpath.basename(model_path).rsplit('.', 1)[0]+'.onnx', opset_version=9)
102
+
103
+ print("finished")
104
+
105
+
106
+ if __name__ == "__main__":
107
+ # set paths
108
+ # MODEL_PATH = "model.pt"
109
+ MODEL_PATH = "../model-f6b98070.pt"
110
+
111
+ # compute depth maps
112
+ run(MODEL_PATH)
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/output/.placeholder ADDED
File without changes
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_onnx.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Compute depth maps for images in the input folder.
2
+ """
3
+ import os
4
+ import glob
5
+ import utils
6
+ import cv2
7
+ import sys
8
+ import numpy as np
9
+ import argparse
10
+
11
+ import onnx
12
+ import onnxruntime as rt
13
+
14
+ from transforms import Resize, NormalizeImage, PrepareForNet
15
+
16
+
17
+ def run(input_path, output_path, model_path, model_type="large"):
18
+ """Run MonoDepthNN to compute depth maps.
19
+
20
+ Args:
21
+ input_path (str): path to input folder
22
+ output_path (str): path to output folder
23
+ model_path (str): path to saved model
24
+ """
25
+ print("initialize")
26
+
27
+ # select device
28
+ device = "CUDA:0"
29
+ #device = "CPU"
30
+ print("device: %s" % device)
31
+
32
+ # network resolution
33
+ if model_type == "large":
34
+ net_w, net_h = 384, 384
35
+ elif model_type == "small":
36
+ net_w, net_h = 256, 256
37
+ else:
38
+ print(f"model_type '{model_type}' not implemented, use: --model_type large")
39
+ assert False
40
+
41
+ # load network
42
+ print("loading model...")
43
+ model = rt.InferenceSession(model_path)
44
+ input_name = model.get_inputs()[0].name
45
+ output_name = model.get_outputs()[0].name
46
+
47
+ resize_image = Resize(
48
+ net_w,
49
+ net_h,
50
+ resize_target=None,
51
+ keep_aspect_ratio=False,
52
+ ensure_multiple_of=32,
53
+ resize_method="upper_bound",
54
+ image_interpolation_method=cv2.INTER_CUBIC,
55
+ )
56
+
57
+ def compose2(f1, f2):
58
+ return lambda x: f2(f1(x))
59
+
60
+ transform = compose2(resize_image, PrepareForNet())
61
+
62
+ # get input
63
+ img_names = glob.glob(os.path.join(input_path, "*"))
64
+ num_images = len(img_names)
65
+
66
+ # create output folder
67
+ os.makedirs(output_path, exist_ok=True)
68
+
69
+ print("start processing")
70
+
71
+ for ind, img_name in enumerate(img_names):
72
+
73
+ print(" processing {} ({}/{})".format(img_name, ind + 1, num_images))
74
+
75
+ # input
76
+ img = utils.read_image(img_name)
77
+ img_input = transform({"image": img})["image"]
78
+
79
+ # compute
80
+ output = model.run([output_name], {input_name: img_input.reshape(1, 3, net_h, net_w).astype(np.float32)})[0]
81
+ prediction = np.array(output).reshape(net_h, net_w)
82
+ prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC)
83
+
84
+ # output
85
+ filename = os.path.join(
86
+ output_path, os.path.splitext(os.path.basename(img_name))[0]
87
+ )
88
+ utils.write_depth(filename, prediction, bits=2)
89
+
90
+ print("finished")
91
+
92
+
93
+ if __name__ == "__main__":
94
+ parser = argparse.ArgumentParser()
95
+
96
+ parser.add_argument('-i', '--input_path',
97
+ default='input',
98
+ help='folder with input images'
99
+ )
100
+
101
+ parser.add_argument('-o', '--output_path',
102
+ default='output',
103
+ help='folder for output images'
104
+ )
105
+
106
+ parser.add_argument('-m', '--model_weights',
107
+ default='model-f6b98070.onnx',
108
+ help='path to the trained weights of model'
109
+ )
110
+
111
+ parser.add_argument('-t', '--model_type',
112
+ default='large',
113
+ help='model type: large or small'
114
+ )
115
+
116
+ args = parser.parse_args()
117
+
118
+ # compute depth maps
119
+ run(args.input_path, args.output_path, args.model_weights, args.model_type)
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_pb.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Compute depth maps for images in the input folder.
2
+ """
3
+ import os
4
+ import glob
5
+ import utils
6
+ import cv2
7
+ import argparse
8
+
9
+ import tensorflow as tf
10
+
11
+ from transforms import Resize, NormalizeImage, PrepareForNet
12
+
13
+ def run(input_path, output_path, model_path, model_type="large"):
14
+ """Run MonoDepthNN to compute depth maps.
15
+
16
+ Args:
17
+ input_path (str): path to input folder
18
+ output_path (str): path to output folder
19
+ model_path (str): path to saved model
20
+ """
21
+ print("initialize")
22
+
23
+ # the runtime initialization will not allocate all memory on the device to avoid out of GPU memory
24
+ gpus = tf.config.experimental.list_physical_devices('GPU')
25
+ if gpus:
26
+ try:
27
+ for gpu in gpus:
28
+ #tf.config.experimental.set_memory_growth(gpu, True)
29
+ tf.config.experimental.set_virtual_device_configuration(gpu,
30
+ [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)])
31
+ except RuntimeError as e:
32
+ print(e)
33
+
34
+ # network resolution
35
+ if model_type == "large":
36
+ net_w, net_h = 384, 384
37
+ elif model_type == "small":
38
+ net_w, net_h = 256, 256
39
+ else:
40
+ print(f"model_type '{model_type}' not implemented, use: --model_type large")
41
+ assert False
42
+
43
+ # load network
44
+ graph_def = tf.compat.v1.GraphDef()
45
+ with tf.io.gfile.GFile(model_path, 'rb') as f:
46
+ graph_def.ParseFromString(f.read())
47
+ tf.import_graph_def(graph_def, name='')
48
+
49
+
50
+ model_operations = tf.compat.v1.get_default_graph().get_operations()
51
+ input_node = '0:0'
52
+ output_layer = model_operations[len(model_operations) - 1].name + ':0'
53
+ print("Last layer name: ", output_layer)
54
+
55
+ resize_image = Resize(
56
+ net_w,
57
+ net_h,
58
+ resize_target=None,
59
+ keep_aspect_ratio=False,
60
+ ensure_multiple_of=32,
61
+ resize_method="upper_bound",
62
+ image_interpolation_method=cv2.INTER_CUBIC,
63
+ )
64
+
65
+ def compose2(f1, f2):
66
+ return lambda x: f2(f1(x))
67
+
68
+ transform = compose2(resize_image, PrepareForNet())
69
+
70
+ # get input
71
+ img_names = glob.glob(os.path.join(input_path, "*"))
72
+ num_images = len(img_names)
73
+
74
+ # create output folder
75
+ os.makedirs(output_path, exist_ok=True)
76
+
77
+ print("start processing")
78
+
79
+ with tf.compat.v1.Session() as sess:
80
+ try:
81
+ # load images
82
+ for ind, img_name in enumerate(img_names):
83
+
84
+ print(" processing {} ({}/{})".format(img_name, ind + 1, num_images))
85
+
86
+ # input
87
+ img = utils.read_image(img_name)
88
+ img_input = transform({"image": img})["image"]
89
+
90
+ # compute
91
+ prob_tensor = sess.graph.get_tensor_by_name(output_layer)
92
+ prediction, = sess.run(prob_tensor, {input_node: [img_input] })
93
+ prediction = prediction.reshape(net_h, net_w)
94
+ prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC)
95
+
96
+ # output
97
+ filename = os.path.join(
98
+ output_path, os.path.splitext(os.path.basename(img_name))[0]
99
+ )
100
+ utils.write_depth(filename, prediction, bits=2)
101
+
102
+ except KeyError:
103
+ print ("Couldn't find input node: ' + input_node + ' or output layer: " + output_layer + ".")
104
+ exit(-1)
105
+
106
+ print("finished")
107
+
108
+
109
+ if __name__ == "__main__":
110
+ parser = argparse.ArgumentParser()
111
+
112
+ parser.add_argument('-i', '--input_path',
113
+ default='input',
114
+ help='folder with input images'
115
+ )
116
+
117
+ parser.add_argument('-o', '--output_path',
118
+ default='output',
119
+ help='folder for output images'
120
+ )
121
+
122
+ parser.add_argument('-m', '--model_weights',
123
+ default='model-f6b98070.pb',
124
+ help='path to the trained weights of model'
125
+ )
126
+
127
+ parser.add_argument('-t', '--model_type',
128
+ default='large',
129
+ help='model type: large or small'
130
+ )
131
+
132
+ args = parser.parse_args()
133
+
134
+ # compute depth maps
135
+ run(args.input_path, args.output_path, args.model_weights, args.model_type)
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/transforms.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ import math
4
+
5
+
6
+ def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
7
+ """Rezise the sample to ensure the given size. Keeps aspect ratio.
8
+
9
+ Args:
10
+ sample (dict): sample
11
+ size (tuple): image size
12
+
13
+ Returns:
14
+ tuple: new size
15
+ """
16
+ shape = list(sample["disparity"].shape)
17
+
18
+ if shape[0] >= size[0] and shape[1] >= size[1]:
19
+ return sample
20
+
21
+ scale = [0, 0]
22
+ scale[0] = size[0] / shape[0]
23
+ scale[1] = size[1] / shape[1]
24
+
25
+ scale = max(scale)
26
+
27
+ shape[0] = math.ceil(scale * shape[0])
28
+ shape[1] = math.ceil(scale * shape[1])
29
+
30
+ # resize
31
+ sample["image"] = cv2.resize(
32
+ sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method
33
+ )
34
+
35
+ sample["disparity"] = cv2.resize(
36
+ sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST
37
+ )
38
+ sample["mask"] = cv2.resize(
39
+ sample["mask"].astype(np.float32),
40
+ tuple(shape[::-1]),
41
+ interpolation=cv2.INTER_NEAREST,
42
+ )
43
+ sample["mask"] = sample["mask"].astype(bool)
44
+
45
+ return tuple(shape)
46
+
47
+
48
+ class Resize(object):
49
+ """Resize sample to given size (width, height).
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ width,
55
+ height,
56
+ resize_target=True,
57
+ keep_aspect_ratio=False,
58
+ ensure_multiple_of=1,
59
+ resize_method="lower_bound",
60
+ image_interpolation_method=cv2.INTER_AREA,
61
+ ):
62
+ """Init.
63
+
64
+ Args:
65
+ width (int): desired output width
66
+ height (int): desired output height
67
+ resize_target (bool, optional):
68
+ True: Resize the full sample (image, mask, target).
69
+ False: Resize image only.
70
+ Defaults to True.
71
+ keep_aspect_ratio (bool, optional):
72
+ True: Keep the aspect ratio of the input sample.
73
+ Output sample might not have the given width and height, and
74
+ resize behaviour depends on the parameter 'resize_method'.
75
+ Defaults to False.
76
+ ensure_multiple_of (int, optional):
77
+ Output width and height is constrained to be multiple of this parameter.
78
+ Defaults to 1.
79
+ resize_method (str, optional):
80
+ "lower_bound": Output will be at least as large as the given size.
81
+ "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
82
+ "minimal": Scale as least as possible. (Output size might be smaller than given size.)
83
+ Defaults to "lower_bound".
84
+ """
85
+ self.__width = width
86
+ self.__height = height
87
+
88
+ self.__resize_target = resize_target
89
+ self.__keep_aspect_ratio = keep_aspect_ratio
90
+ self.__multiple_of = ensure_multiple_of
91
+ self.__resize_method = resize_method
92
+ self.__image_interpolation_method = image_interpolation_method
93
+
94
+ def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
95
+ y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
96
+
97
+ if max_val is not None and y > max_val:
98
+ y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int)
99
+
100
+ if y < min_val:
101
+ y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int)
102
+
103
+ return y
104
+
105
+ def get_size(self, width, height):
106
+ # determine new height and width
107
+ scale_height = self.__height / height
108
+ scale_width = self.__width / width
109
+
110
+ if self.__keep_aspect_ratio:
111
+ if self.__resize_method == "lower_bound":
112
+ # scale such that output size is lower bound
113
+ if scale_width > scale_height:
114
+ # fit width
115
+ scale_height = scale_width
116
+ else:
117
+ # fit height
118
+ scale_width = scale_height
119
+ elif self.__resize_method == "upper_bound":
120
+ # scale such that output size is upper bound
121
+ if scale_width < scale_height:
122
+ # fit width
123
+ scale_height = scale_width
124
+ else:
125
+ # fit height
126
+ scale_width = scale_height
127
+ elif self.__resize_method == "minimal":
128
+ # scale as least as possbile
129
+ if abs(1 - scale_width) < abs(1 - scale_height):
130
+ # fit width
131
+ scale_height = scale_width
132
+ else:
133
+ # fit height
134
+ scale_width = scale_height
135
+ else:
136
+ raise ValueError(
137
+ f"resize_method {self.__resize_method} not implemented"
138
+ )
139
+
140
+ if self.__resize_method == "lower_bound":
141
+ new_height = self.constrain_to_multiple_of(
142
+ scale_height * height, min_val=self.__height
143
+ )
144
+ new_width = self.constrain_to_multiple_of(
145
+ scale_width * width, min_val=self.__width
146
+ )
147
+ elif self.__resize_method == "upper_bound":
148
+ new_height = self.constrain_to_multiple_of(
149
+ scale_height * height, max_val=self.__height
150
+ )
151
+ new_width = self.constrain_to_multiple_of(
152
+ scale_width * width, max_val=self.__width
153
+ )
154
+ elif self.__resize_method == "minimal":
155
+ new_height = self.constrain_to_multiple_of(scale_height * height)
156
+ new_width = self.constrain_to_multiple_of(scale_width * width)
157
+ else:
158
+ raise ValueError(f"resize_method {self.__resize_method} not implemented")
159
+
160
+ return (new_width, new_height)
161
+
162
+ def __call__(self, sample):
163
+ width, height = self.get_size(
164
+ sample["image"].shape[1], sample["image"].shape[0]
165
+ )
166
+
167
+ # resize sample
168
+ sample["image"] = cv2.resize(
169
+ sample["image"],
170
+ (width, height),
171
+ interpolation=self.__image_interpolation_method,
172
+ )
173
+
174
+ if self.__resize_target:
175
+ if "disparity" in sample:
176
+ sample["disparity"] = cv2.resize(
177
+ sample["disparity"],
178
+ (width, height),
179
+ interpolation=cv2.INTER_NEAREST,
180
+ )
181
+
182
+ if "depth" in sample:
183
+ sample["depth"] = cv2.resize(
184
+ sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST
185
+ )
186
+
187
+ sample["mask"] = cv2.resize(
188
+ sample["mask"].astype(np.float32),
189
+ (width, height),
190
+ interpolation=cv2.INTER_NEAREST,
191
+ )
192
+ sample["mask"] = sample["mask"].astype(bool)
193
+
194
+ return sample
195
+
196
+
197
+ class NormalizeImage(object):
198
+ """Normlize image by given mean and std.
199
+ """
200
+
201
+ def __init__(self, mean, std):
202
+ self.__mean = mean
203
+ self.__std = std
204
+
205
+ def __call__(self, sample):
206
+ sample["image"] = (sample["image"] - self.__mean) / self.__std
207
+
208
+ return sample
209
+
210
+
211
+ class PrepareForNet(object):
212
+ """Prepare sample for usage as network input.
213
+ """
214
+
215
+ def __init__(self):
216
+ pass
217
+
218
+ def __call__(self, sample):
219
+ image = np.transpose(sample["image"], (2, 0, 1))
220
+ sample["image"] = np.ascontiguousarray(image).astype(np.float32)
221
+
222
+ if "mask" in sample:
223
+ sample["mask"] = sample["mask"].astype(np.float32)
224
+ sample["mask"] = np.ascontiguousarray(sample["mask"])
225
+
226
+ if "disparity" in sample:
227
+ disparity = sample["disparity"].astype(np.float32)
228
+ sample["disparity"] = np.ascontiguousarray(disparity)
229
+
230
+ if "depth" in sample:
231
+ depth = sample["depth"].astype(np.float32)
232
+ sample["depth"] = np.ascontiguousarray(depth)
233
+
234
+ return sample
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/utils.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import sys
3
+ import cv2
4
+
5
+
6
+ def write_pfm(path, image, scale=1):
7
+ """Write pfm file.
8
+ Args:
9
+ path (str): pathto file
10
+ image (array): data
11
+ scale (int, optional): Scale. Defaults to 1.
12
+ """
13
+
14
+ with open(path, "wb") as file:
15
+ color = None
16
+
17
+ if image.dtype.name != "float32":
18
+ raise Exception("Image dtype must be float32.")
19
+
20
+ image = np.flipud(image)
21
+
22
+ if len(image.shape) == 3 and image.shape[2] == 3: # color image
23
+ color = True
24
+ elif (
25
+ len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1
26
+ ): # greyscale
27
+ color = False
28
+ else:
29
+ raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.")
30
+
31
+ file.write("PF\n" if color else "Pf\n".encode())
32
+ file.write("%d %d\n".encode() % (image.shape[1], image.shape[0]))
33
+
34
+ endian = image.dtype.byteorder
35
+
36
+ if endian == "<" or endian == "=" and sys.byteorder == "little":
37
+ scale = -scale
38
+
39
+ file.write("%f\n".encode() % scale)
40
+
41
+ image.tofile(file)
42
+
43
+ def read_image(path):
44
+ """Read image and output RGB image (0-1).
45
+ Args:
46
+ path (str): path to file
47
+ Returns:
48
+ array: RGB image (0-1)
49
+ """
50
+ img = cv2.imread(path)
51
+
52
+ if img.ndim == 2:
53
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
54
+
55
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
56
+
57
+ return img
58
+
59
+ def write_depth(path, depth, bits=1):
60
+ """Write depth map to pfm and png file.
61
+ Args:
62
+ path (str): filepath without extension
63
+ depth (array): depth
64
+ """
65
+ write_pfm(path + ".pfm", depth.astype(np.float32))
66
+
67
+ depth_min = depth.min()
68
+ depth_max = depth.max()
69
+
70
+ max_val = (2**(8*bits))-1
71
+
72
+ if depth_max - depth_min > np.finfo("float").eps:
73
+ out = max_val * (depth - depth_min) / (depth_max - depth_min)
74
+ else:
75
+ out = 0
76
+
77
+ if bits == 1:
78
+ cv2.imwrite(path + ".png", out.astype("uint8"))
79
+ elif bits == 2:
80
+ cv2.imwrite(path + ".png", out.astype("uint16"))
81
+
82
+ return
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/utils.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utils for monoDepth.
2
+ """
3
+ import sys
4
+ import re
5
+ import numpy as np
6
+ import cv2
7
+ import torch
8
+
9
+
10
+ def read_pfm(path):
11
+ """Read pfm file.
12
+
13
+ Args:
14
+ path (str): path to file
15
+
16
+ Returns:
17
+ tuple: (data, scale)
18
+ """
19
+ with open(path, "rb") as file:
20
+
21
+ color = None
22
+ width = None
23
+ height = None
24
+ scale = None
25
+ endian = None
26
+
27
+ header = file.readline().rstrip()
28
+ if header.decode("ascii") == "PF":
29
+ color = True
30
+ elif header.decode("ascii") == "Pf":
31
+ color = False
32
+ else:
33
+ raise Exception("Not a PFM file: " + path)
34
+
35
+ dim_match = re.match(r"^(\d+)\s(\d+)\s$", file.readline().decode("ascii"))
36
+ if dim_match:
37
+ width, height = list(map(int, dim_match.groups()))
38
+ else:
39
+ raise Exception("Malformed PFM header.")
40
+
41
+ scale = float(file.readline().decode("ascii").rstrip())
42
+ if scale < 0:
43
+ # little-endian
44
+ endian = "<"
45
+ scale = -scale
46
+ else:
47
+ # big-endian
48
+ endian = ">"
49
+
50
+ data = np.fromfile(file, endian + "f")
51
+ shape = (height, width, 3) if color else (height, width)
52
+
53
+ data = np.reshape(data, shape)
54
+ data = np.flipud(data)
55
+
56
+ return data, scale
57
+
58
+
59
+ def write_pfm(path, image, scale=1):
60
+ """Write pfm file.
61
+
62
+ Args:
63
+ path (str): pathto file
64
+ image (array): data
65
+ scale (int, optional): Scale. Defaults to 1.
66
+ """
67
+
68
+ with open(path, "wb") as file:
69
+ color = None
70
+
71
+ if image.dtype.name != "float32":
72
+ raise Exception("Image dtype must be float32.")
73
+
74
+ image = np.flipud(image)
75
+
76
+ if len(image.shape) == 3 and image.shape[2] == 3: # color image
77
+ color = True
78
+ elif (
79
+ len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1
80
+ ): # greyscale
81
+ color = False
82
+ else:
83
+ raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.")
84
+
85
+ file.write("PF\n" if color else "Pf\n".encode())
86
+ file.write("%d %d\n".encode() % (image.shape[1], image.shape[0]))
87
+
88
+ endian = image.dtype.byteorder
89
+
90
+ if endian == "<" or endian == "=" and sys.byteorder == "little":
91
+ scale = -scale
92
+
93
+ file.write("%f\n".encode() % scale)
94
+
95
+ image.tofile(file)
96
+
97
+
98
+ def read_image(path):
99
+ """Read image and output RGB image (0-1).
100
+
101
+ Args:
102
+ path (str): path to file
103
+
104
+ Returns:
105
+ array: RGB image (0-1)
106
+ """
107
+ img = cv2.imread(path)
108
+
109
+ if img.ndim == 2:
110
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
111
+
112
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
113
+
114
+ return img
115
+
116
+
117
+ def resize_image(img):
118
+ """Resize image and make it fit for network.
119
+
120
+ Args:
121
+ img (array): image
122
+
123
+ Returns:
124
+ tensor: data ready for network
125
+ """
126
+ height_orig = img.shape[0]
127
+ width_orig = img.shape[1]
128
+
129
+ if width_orig > height_orig:
130
+ scale = width_orig / 384
131
+ else:
132
+ scale = height_orig / 384
133
+
134
+ height = (np.ceil(height_orig / scale / 32) * 32).astype(int)
135
+ width = (np.ceil(width_orig / scale / 32) * 32).astype(int)
136
+
137
+ img_resized = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)
138
+
139
+ img_resized = (
140
+ torch.from_numpy(np.transpose(img_resized, (2, 0, 1))).contiguous().float()
141
+ )
142
+ img_resized = img_resized.unsqueeze(0)
143
+
144
+ return img_resized
145
+
146
+
147
+ def resize_depth(depth, width, height):
148
+ """Resize depth map and bring to CPU (numpy).
149
+
150
+ Args:
151
+ depth (tensor): depth
152
+ width (int): image width
153
+ height (int): image height
154
+
155
+ Returns:
156
+ array: processed depth
157
+ """
158
+ depth = torch.squeeze(depth[0, :, :, :]).to("cpu")
159
+
160
+ depth_resized = cv2.resize(
161
+ depth.numpy(), (width, height), interpolation=cv2.INTER_CUBIC
162
+ )
163
+
164
+ return depth_resized
165
+
166
+ def write_depth(path, depth, grayscale, bits=1):
167
+ """Write depth map to png file.
168
+
169
+ Args:
170
+ path (str): filepath without extension
171
+ depth (array): depth
172
+ grayscale (bool): use a grayscale colormap?
173
+ """
174
+ if not grayscale:
175
+ bits = 1
176
+
177
+ if not np.isfinite(depth).all():
178
+ depth=np.nan_to_num(depth, nan=0.0, posinf=0.0, neginf=0.0)
179
+ print("WARNING: Non-finite depth values present")
180
+
181
+ depth_min = depth.min()
182
+ depth_max = depth.max()
183
+
184
+ max_val = (2**(8*bits))-1
185
+
186
+ if depth_max - depth_min > np.finfo("float").eps:
187
+ out = max_val * (depth - depth_min) / (depth_max - depth_min)
188
+ else:
189
+ out = np.zeros(depth.shape, dtype=depth.dtype)
190
+
191
+ if not grayscale:
192
+ out = cv2.applyColorMap(np.uint8(out), cv2.COLORMAP_INFERNO)
193
+
194
+ if bits == 1:
195
+ cv2.imwrite(path + ".png", out.astype("uint8"))
196
+ elif bits == 2:
197
+ cv2.imwrite(path + ".png", out.astype("uint16"))
198
+
199
+ return
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/weights/.placeholder ADDED
File without changes
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/builder.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ from importlib import import_module
26
+ from .depth_model import DepthModel
27
+
28
+ def build_model(config) -> DepthModel:
29
+ """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface.
30
+ This function should be used to construct models for training and evaluation.
31
+
32
+ Args:
33
+ config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder.
34
+
35
+ Returns:
36
+ torch.nn.Module: Model corresponding to name and version as specified in config
37
+ """
38
+ module_name = f"zoedepth.models.{config.model}"
39
+ try:
40
+ module = import_module(module_name)
41
+ except ModuleNotFoundError as e:
42
+ # print the original error message
43
+ print(e)
44
+ raise ValueError(
45
+ f"Model {config.model} not found. Refer above error for details.") from e
46
+ try:
47
+ get_version = getattr(module, "get_version")
48
+ except AttributeError as e:
49
+ raise ValueError(
50
+ f"Model {config.model} has no get_version function.") from e
51
+ return get_version(config.version_name).build_from_config(config)
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/depth_model.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import numpy as np
26
+ import torch
27
+ import torch.nn as nn
28
+ import torch.nn.functional as F
29
+ from torchvision import transforms
30
+ import PIL.Image
31
+ from PIL import Image
32
+ from typing import Union
33
+
34
+
35
+ class DepthModel(nn.Module):
36
+ def __init__(self):
37
+ super().__init__()
38
+ self.device = 'cpu'
39
+
40
+ def to(self, device) -> nn.Module:
41
+ self.device = device
42
+ return super().to(device)
43
+
44
+ def forward(self, x, *args, **kwargs):
45
+ raise NotImplementedError
46
+
47
+ def _infer(self, x: torch.Tensor):
48
+ """
49
+ Inference interface for the model
50
+ Args:
51
+ x (torch.Tensor): input tensor of shape (b, c, h, w)
52
+ Returns:
53
+ torch.Tensor: output tensor of shape (b, 1, h, w)
54
+ """
55
+ return self(x)['metric_depth']
56
+
57
+ def _infer_with_pad_aug(self, x: torch.Tensor, pad_input: bool=True, fh: float=3, fw: float=3, upsampling_mode: str='bicubic', padding_mode="reflect", **kwargs) -> torch.Tensor:
58
+ """
59
+ Inference interface for the model with padding augmentation
60
+ Padding augmentation fixes the boundary artifacts in the output depth map.
61
+ Boundary artifacts are sometimes caused by the fact that the model is trained on NYU raw dataset which has a black or white border around the image.
62
+ This augmentation pads the input image and crops the prediction back to the original size / view.
63
+
64
+ Note: This augmentation is not required for the models trained with 'avoid_boundary'=True.
65
+ Args:
66
+ x (torch.Tensor): input tensor of shape (b, c, h, w)
67
+ pad_input (bool, optional): whether to pad the input or not. Defaults to True.
68
+ fh (float, optional): height padding factor. The padding is calculated as sqrt(h/2) * fh. Defaults to 3.
69
+ fw (float, optional): width padding factor. The padding is calculated as sqrt(w/2) * fw. Defaults to 3.
70
+ upsampling_mode (str, optional): upsampling mode. Defaults to 'bicubic'.
71
+ padding_mode (str, optional): padding mode. Defaults to "reflect".
72
+ Returns:
73
+ torch.Tensor: output tensor of shape (b, 1, h, w)
74
+ """
75
+ # assert x is nchw and c = 3
76
+ assert x.dim() == 4, "x must be 4 dimensional, got {}".format(x.dim())
77
+ assert x.shape[1] == 3, "x must have 3 channels, got {}".format(x.shape[1])
78
+
79
+ if pad_input:
80
+ assert fh > 0 or fw > 0, "atlease one of fh and fw must be greater than 0"
81
+ pad_h = int(np.sqrt(x.shape[2]/2) * fh)
82
+ pad_w = int(np.sqrt(x.shape[3]/2) * fw)
83
+ padding = [pad_w, pad_w]
84
+ if pad_h > 0:
85
+ padding += [pad_h, pad_h]
86
+
87
+ x = F.pad(x, padding, mode=padding_mode, **kwargs)
88
+ out = self._infer(x)
89
+ if out.shape[-2:] != x.shape[-2:]:
90
+ out = F.interpolate(out, size=(x.shape[2], x.shape[3]), mode=upsampling_mode, align_corners=False)
91
+ if pad_input:
92
+ # crop to the original size, handling the case where pad_h and pad_w is 0
93
+ if pad_h > 0:
94
+ out = out[:, :, pad_h:-pad_h,:]
95
+ if pad_w > 0:
96
+ out = out[:, :, :, pad_w:-pad_w]
97
+ return out
98
+
99
+ def infer_with_flip_aug(self, x, pad_input: bool=True, **kwargs) -> torch.Tensor:
100
+ """
101
+ Inference interface for the model with horizontal flip augmentation
102
+ Horizontal flip augmentation improves the accuracy of the model by averaging the output of the model with and without horizontal flip.
103
+ Args:
104
+ x (torch.Tensor): input tensor of shape (b, c, h, w)
105
+ pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
106
+ Returns:
107
+ torch.Tensor: output tensor of shape (b, 1, h, w)
108
+ """
109
+ # infer with horizontal flip and average
110
+ out = self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs)
111
+ out_flip = self._infer_with_pad_aug(torch.flip(x, dims=[3]), pad_input=pad_input, **kwargs)
112
+ out = (out + torch.flip(out_flip, dims=[3])) / 2
113
+ return out
114
+
115
+ def infer(self, x, pad_input: bool=True, with_flip_aug: bool=True, **kwargs) -> torch.Tensor:
116
+ """
117
+ Inference interface for the model
118
+ Args:
119
+ x (torch.Tensor): input tensor of shape (b, c, h, w)
120
+ pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
121
+ with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True.
122
+ Returns:
123
+ torch.Tensor: output tensor of shape (b, 1, h, w)
124
+ """
125
+ if with_flip_aug:
126
+ return self.infer_with_flip_aug(x, pad_input=pad_input, **kwargs)
127
+ else:
128
+ return self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs)
129
+
130
+ @torch.no_grad()
131
+ def infer_pil(self, pil_img, pad_input: bool=True, with_flip_aug: bool=True, output_type: str="numpy", **kwargs) -> Union[np.ndarray, PIL.Image.Image, torch.Tensor]:
132
+ """
133
+ Inference interface for the model for PIL image
134
+ Args:
135
+ pil_img (PIL.Image.Image): input PIL image
136
+ pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
137
+ with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True.
138
+ output_type (str, optional): output type. Supported values are 'numpy', 'pil' and 'tensor'. Defaults to "numpy".
139
+ """
140
+ x = transforms.ToTensor()(pil_img).unsqueeze(0).to(self.device)
141
+ out_tensor = self.infer(x, pad_input=pad_input, with_flip_aug=with_flip_aug, **kwargs)
142
+ if output_type == "numpy":
143
+ return out_tensor.squeeze().cpu().numpy()
144
+ elif output_type == "pil":
145
+ # uint16 is required for depth pil image
146
+ out_16bit_numpy = (out_tensor.squeeze().cpu().numpy()*256).astype(np.uint16)
147
+ return Image.fromarray(out_16bit_numpy)
148
+ elif output_type == "tensor":
149
+ return out_tensor.squeeze().cpu()
150
+ else:
151
+ raise ValueError(f"output_type {output_type} not supported. Supported values are 'numpy', 'pil' and 'tensor'")
152
+
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/attractor.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+
29
+ @torch.jit.script
30
+ def exp_attractor(dx, alpha: float = 300, gamma: int = 2):
31
+ """Exponential attractor: dc = exp(-alpha*|dx|^gamma) * dx , where dx = a - c, a = attractor point, c = bin center, dc = shift in bin centermmary for exp_attractor
32
+
33
+ Args:
34
+ dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center.
35
+ alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300.
36
+ gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2.
37
+
38
+ Returns:
39
+ torch.Tensor : Delta shifts - dc; New bin centers = Old bin centers + dc
40
+ """
41
+ return torch.exp(-alpha*(torch.abs(dx)**gamma)) * (dx)
42
+
43
+
44
+ @torch.jit.script
45
+ def inv_attractor(dx, alpha: float = 300, gamma: int = 2):
46
+ """Inverse attractor: dc = dx / (1 + alpha*dx^gamma), where dx = a - c, a = attractor point, c = bin center, dc = shift in bin center
47
+ This is the default one according to the accompanying paper.
48
+
49
+ Args:
50
+ dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center.
51
+ alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300.
52
+ gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2.
53
+
54
+ Returns:
55
+ torch.Tensor: Delta shifts - dc; New bin centers = Old bin centers + dc
56
+ """
57
+ return dx.div(1+alpha*dx.pow(gamma))
58
+
59
+
60
+ class AttractorLayer(nn.Module):
61
+ def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10,
62
+ alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False):
63
+ """
64
+ Attractor layer for bin centers. Bin centers are bounded on the interval (min_depth, max_depth)
65
+ """
66
+ super().__init__()
67
+
68
+ self.n_attractors = n_attractors
69
+ self.n_bins = n_bins
70
+ self.min_depth = min_depth
71
+ self.max_depth = max_depth
72
+ self.alpha = alpha
73
+ self.gamma = gamma
74
+ self.kind = kind
75
+ self.attractor_type = attractor_type
76
+ self.memory_efficient = memory_efficient
77
+
78
+ self._net = nn.Sequential(
79
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
80
+ nn.ReLU(inplace=True),
81
+ nn.Conv2d(mlp_dim, n_attractors*2, 1, 1, 0), # x2 for linear norm
82
+ nn.ReLU(inplace=True)
83
+ )
84
+
85
+ def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
86
+ """
87
+ Args:
88
+ x (torch.Tensor) : feature block; shape - n, c, h, w
89
+ b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w
90
+
91
+ Returns:
92
+ tuple(torch.Tensor,torch.Tensor) : new bin centers normed and scaled; shape - n, nbins, h, w
93
+ """
94
+ if prev_b_embedding is not None:
95
+ if interpolate:
96
+ prev_b_embedding = nn.functional.interpolate(
97
+ prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
98
+ x = x + prev_b_embedding
99
+
100
+ A = self._net(x)
101
+ eps = 1e-3
102
+ A = A + eps
103
+ n, c, h, w = A.shape
104
+ A = A.view(n, self.n_attractors, 2, h, w)
105
+ A_normed = A / A.sum(dim=2, keepdim=True) # n, a, 2, h, w
106
+ A_normed = A[:, :, 0, ...] # n, na, h, w
107
+
108
+ b_prev = nn.functional.interpolate(
109
+ b_prev, (h, w), mode='bilinear', align_corners=True)
110
+ b_centers = b_prev
111
+
112
+ if self.attractor_type == 'exp':
113
+ dist = exp_attractor
114
+ else:
115
+ dist = inv_attractor
116
+
117
+ if not self.memory_efficient:
118
+ func = {'mean': torch.mean, 'sum': torch.sum}[self.kind]
119
+ # .shape N, nbins, h, w
120
+ delta_c = func(dist(A_normed.unsqueeze(
121
+ 2) - b_centers.unsqueeze(1)), dim=1)
122
+ else:
123
+ delta_c = torch.zeros_like(b_centers, device=b_centers.device)
124
+ for i in range(self.n_attractors):
125
+ # .shape N, nbins, h, w
126
+ delta_c += dist(A_normed[:, i, ...].unsqueeze(1) - b_centers)
127
+
128
+ if self.kind == 'mean':
129
+ delta_c = delta_c / self.n_attractors
130
+
131
+ b_new_centers = b_centers + delta_c
132
+ B_centers = (self.max_depth - self.min_depth) * \
133
+ b_new_centers + self.min_depth
134
+ B_centers, _ = torch.sort(B_centers, dim=1)
135
+ B_centers = torch.clip(B_centers, self.min_depth, self.max_depth)
136
+ return b_new_centers, B_centers
137
+
138
+
139
+ class AttractorLayerUnnormed(nn.Module):
140
+ def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10,
141
+ alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False):
142
+ """
143
+ Attractor layer for bin centers. Bin centers are unbounded
144
+ """
145
+ super().__init__()
146
+
147
+ self.n_attractors = n_attractors
148
+ self.n_bins = n_bins
149
+ self.min_depth = min_depth
150
+ self.max_depth = max_depth
151
+ self.alpha = alpha
152
+ self.gamma = gamma
153
+ self.kind = kind
154
+ self.attractor_type = attractor_type
155
+ self.memory_efficient = memory_efficient
156
+
157
+ self._net = nn.Sequential(
158
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
159
+ nn.ReLU(inplace=True),
160
+ nn.Conv2d(mlp_dim, n_attractors, 1, 1, 0),
161
+ nn.Softplus()
162
+ )
163
+
164
+ def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
165
+ """
166
+ Args:
167
+ x (torch.Tensor) : feature block; shape - n, c, h, w
168
+ b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w
169
+
170
+ Returns:
171
+ tuple(torch.Tensor,torch.Tensor) : new bin centers unbounded; shape - n, nbins, h, w. Two outputs just to keep the API consistent with the normed version
172
+ """
173
+ if prev_b_embedding is not None:
174
+ if interpolate:
175
+ prev_b_embedding = nn.functional.interpolate(
176
+ prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
177
+ x = x + prev_b_embedding
178
+
179
+ A = self._net(x)
180
+ n, c, h, w = A.shape
181
+
182
+ b_prev = nn.functional.interpolate(
183
+ b_prev, (h, w), mode='bilinear', align_corners=True)
184
+ b_centers = b_prev
185
+
186
+ if self.attractor_type == 'exp':
187
+ dist = exp_attractor
188
+ else:
189
+ dist = inv_attractor
190
+
191
+ if not self.memory_efficient:
192
+ func = {'mean': torch.mean, 'sum': torch.sum}[self.kind]
193
+ # .shape N, nbins, h, w
194
+ delta_c = func(
195
+ dist(A.unsqueeze(2) - b_centers.unsqueeze(1)), dim=1)
196
+ else:
197
+ delta_c = torch.zeros_like(b_centers, device=b_centers.device)
198
+ for i in range(self.n_attractors):
199
+ delta_c += dist(A[:, i, ...].unsqueeze(1) -
200
+ b_centers) # .shape N, nbins, h, w
201
+
202
+ if self.kind == 'mean':
203
+ delta_c = delta_c / self.n_attractors
204
+
205
+ b_new_centers = b_centers + delta_c
206
+ B_centers = b_new_centers
207
+
208
+ return b_new_centers, B_centers
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/dist_layers.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+
29
+ def log_binom(n, k, eps=1e-7):
30
+ """ log(nCk) using stirling approximation """
31
+ n = n + eps
32
+ k = k + eps
33
+ return n * torch.log(n) - k * torch.log(k) - (n-k) * torch.log(n-k+eps)
34
+
35
+
36
+ class LogBinomial(nn.Module):
37
+ def __init__(self, n_classes=256, act=torch.softmax):
38
+ """Compute log binomial distribution for n_classes
39
+
40
+ Args:
41
+ n_classes (int, optional): number of output classes. Defaults to 256.
42
+ """
43
+ super().__init__()
44
+ self.K = n_classes
45
+ self.act = act
46
+ self.register_buffer('k_idx', torch.arange(
47
+ 0, n_classes).view(1, -1, 1, 1))
48
+ self.register_buffer('K_minus_1', torch.Tensor(
49
+ [self.K-1]).view(1, -1, 1, 1))
50
+
51
+ def forward(self, x, t=1., eps=1e-4):
52
+ """Compute log binomial distribution for x
53
+
54
+ Args:
55
+ x (torch.Tensor - NCHW): probabilities
56
+ t (float, torch.Tensor - NCHW, optional): Temperature of distribution. Defaults to 1..
57
+ eps (float, optional): Small number for numerical stability. Defaults to 1e-4.
58
+
59
+ Returns:
60
+ torch.Tensor -NCHW: log binomial distribution logbinomial(p;t)
61
+ """
62
+ if x.ndim == 3:
63
+ x = x.unsqueeze(1) # make it nchw
64
+
65
+ one_minus_x = torch.clamp(1 - x, eps, 1)
66
+ x = torch.clamp(x, eps, 1)
67
+ y = log_binom(self.K_minus_1, self.k_idx) + self.k_idx * \
68
+ torch.log(x) + (self.K - 1 - self.k_idx) * torch.log(one_minus_x)
69
+ return self.act(y/t, dim=1)
70
+
71
+
72
+ class ConditionalLogBinomial(nn.Module):
73
+ def __init__(self, in_features, condition_dim, n_classes=256, bottleneck_factor=2, p_eps=1e-4, max_temp=50, min_temp=1e-7, act=torch.softmax):
74
+ """Conditional Log Binomial distribution
75
+
76
+ Args:
77
+ in_features (int): number of input channels in main feature
78
+ condition_dim (int): number of input channels in condition feature
79
+ n_classes (int, optional): Number of classes. Defaults to 256.
80
+ bottleneck_factor (int, optional): Hidden dim factor. Defaults to 2.
81
+ p_eps (float, optional): small eps value. Defaults to 1e-4.
82
+ max_temp (float, optional): Maximum temperature of output distribution. Defaults to 50.
83
+ min_temp (float, optional): Minimum temperature of output distribution. Defaults to 1e-7.
84
+ """
85
+ super().__init__()
86
+ self.p_eps = p_eps
87
+ self.max_temp = max_temp
88
+ self.min_temp = min_temp
89
+ self.log_binomial_transform = LogBinomial(n_classes, act=act)
90
+ bottleneck = (in_features + condition_dim) // bottleneck_factor
91
+ self.mlp = nn.Sequential(
92
+ nn.Conv2d(in_features + condition_dim, bottleneck,
93
+ kernel_size=1, stride=1, padding=0),
94
+ nn.GELU(),
95
+ # 2 for p linear norm, 2 for t linear norm
96
+ nn.Conv2d(bottleneck, 2+2, kernel_size=1, stride=1, padding=0),
97
+ nn.Softplus()
98
+ )
99
+
100
+ def forward(self, x, cond):
101
+ """Forward pass
102
+
103
+ Args:
104
+ x (torch.Tensor - NCHW): Main feature
105
+ cond (torch.Tensor - NCHW): condition feature
106
+
107
+ Returns:
108
+ torch.Tensor: Output log binomial distribution
109
+ """
110
+ pt = self.mlp(torch.concat((x, cond), dim=1))
111
+ p, t = pt[:, :2, ...], pt[:, 2:, ...]
112
+
113
+ p = p + self.p_eps
114
+ p = p[:, 0, ...] / (p[:, 0, ...] + p[:, 1, ...])
115
+
116
+ t = t + self.p_eps
117
+ t = t[:, 0, ...] / (t[:, 0, ...] + t[:, 1, ...])
118
+ t = t.unsqueeze(1)
119
+ t = (self.max_temp - self.min_temp) * t + self.min_temp
120
+
121
+ return self.log_binomial_transform(p, t)
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/localbins_layers.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+
29
+ class SeedBinRegressor(nn.Module):
30
+ def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10):
31
+ """Bin center regressor network. Bin centers are bounded on (min_depth, max_depth) interval.
32
+
33
+ Args:
34
+ in_features (int): input channels
35
+ n_bins (int, optional): Number of bin centers. Defaults to 16.
36
+ mlp_dim (int, optional): Hidden dimension. Defaults to 256.
37
+ min_depth (float, optional): Min depth value. Defaults to 1e-3.
38
+ max_depth (float, optional): Max depth value. Defaults to 10.
39
+ """
40
+ super().__init__()
41
+ self.version = "1_1"
42
+ self.min_depth = min_depth
43
+ self.max_depth = max_depth
44
+
45
+ self._net = nn.Sequential(
46
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
47
+ nn.ReLU(inplace=True),
48
+ nn.Conv2d(mlp_dim, n_bins, 1, 1, 0),
49
+ nn.ReLU(inplace=True)
50
+ )
51
+
52
+ def forward(self, x):
53
+ """
54
+ Returns tensor of bin_width vectors (centers). One vector b for every pixel
55
+ """
56
+ B = self._net(x)
57
+ eps = 1e-3
58
+ B = B + eps
59
+ B_widths_normed = B / B.sum(dim=1, keepdim=True)
60
+ B_widths = (self.max_depth - self.min_depth) * \
61
+ B_widths_normed # .shape NCHW
62
+ # pad has the form (left, right, top, bottom, front, back)
63
+ B_widths = nn.functional.pad(
64
+ B_widths, (0, 0, 0, 0, 1, 0), mode='constant', value=self.min_depth)
65
+ B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW
66
+
67
+ B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:, 1:, ...])
68
+ return B_widths_normed, B_centers
69
+
70
+
71
+ class SeedBinRegressorUnnormed(nn.Module):
72
+ def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10):
73
+ """Bin center regressor network. Bin centers are unbounded
74
+
75
+ Args:
76
+ in_features (int): input channels
77
+ n_bins (int, optional): Number of bin centers. Defaults to 16.
78
+ mlp_dim (int, optional): Hidden dimension. Defaults to 256.
79
+ min_depth (float, optional): Not used. (for compatibility with SeedBinRegressor)
80
+ max_depth (float, optional): Not used. (for compatibility with SeedBinRegressor)
81
+ """
82
+ super().__init__()
83
+ self.version = "1_1"
84
+ self._net = nn.Sequential(
85
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
86
+ nn.ReLU(inplace=True),
87
+ nn.Conv2d(mlp_dim, n_bins, 1, 1, 0),
88
+ nn.Softplus()
89
+ )
90
+
91
+ def forward(self, x):
92
+ """
93
+ Returns tensor of bin_width vectors (centers). One vector b for every pixel
94
+ """
95
+ B_centers = self._net(x)
96
+ return B_centers, B_centers
97
+
98
+
99
+ class Projector(nn.Module):
100
+ def __init__(self, in_features, out_features, mlp_dim=128):
101
+ """Projector MLP
102
+
103
+ Args:
104
+ in_features (int): input channels
105
+ out_features (int): output channels
106
+ mlp_dim (int, optional): hidden dimension. Defaults to 128.
107
+ """
108
+ super().__init__()
109
+
110
+ self._net = nn.Sequential(
111
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
112
+ nn.ReLU(inplace=True),
113
+ nn.Conv2d(mlp_dim, out_features, 1, 1, 0),
114
+ )
115
+
116
+ def forward(self, x):
117
+ return self._net(x)
118
+
119
+
120
+
121
+ class LinearSplitter(nn.Module):
122
+ def __init__(self, in_features, prev_nbins, split_factor=2, mlp_dim=128, min_depth=1e-3, max_depth=10):
123
+ super().__init__()
124
+
125
+ self.prev_nbins = prev_nbins
126
+ self.split_factor = split_factor
127
+ self.min_depth = min_depth
128
+ self.max_depth = max_depth
129
+
130
+ self._net = nn.Sequential(
131
+ nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
132
+ nn.GELU(),
133
+ nn.Conv2d(mlp_dim, prev_nbins * split_factor, 1, 1, 0),
134
+ nn.ReLU()
135
+ )
136
+
137
+ def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
138
+ """
139
+ x : feature block; shape - n, c, h, w
140
+ b_prev : previous bin widths normed; shape - n, prev_nbins, h, w
141
+ """
142
+ if prev_b_embedding is not None:
143
+ if interpolate:
144
+ prev_b_embedding = nn.functional.interpolate(prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
145
+ x = x + prev_b_embedding
146
+ S = self._net(x)
147
+ eps = 1e-3
148
+ S = S + eps
149
+ n, c, h, w = S.shape
150
+ S = S.view(n, self.prev_nbins, self.split_factor, h, w)
151
+ S_normed = S / S.sum(dim=2, keepdim=True) # fractional splits
152
+
153
+ b_prev = nn.functional.interpolate(b_prev, (h,w), mode='bilinear', align_corners=True)
154
+
155
+
156
+ b_prev = b_prev / b_prev.sum(dim=1, keepdim=True) # renormalize for gurantees
157
+ # print(b_prev.shape, S_normed.shape)
158
+ # if is_for_query:(1).expand(-1, b_prev.size(0)//n, -1, -1, -1, -1).flatten(0,1) # TODO ? can replace all this with a single torch.repeat?
159
+ b = b_prev.unsqueeze(2) * S_normed
160
+ b = b.flatten(1,2) # .shape n, prev_nbins * split_factor, h, w
161
+
162
+ # calculate bin centers for loss calculation
163
+ B_widths = (self.max_depth - self.min_depth) * b # .shape N, nprev * splitfactor, H, W
164
+ # pad has the form (left, right, top, bottom, front, back)
165
+ B_widths = nn.functional.pad(B_widths, (0,0,0,0,1,0), mode='constant', value=self.min_depth)
166
+ B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW
167
+
168
+ B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:,1:,...])
169
+ return b, B_centers
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/patch_transformer.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+
29
+ class PatchTransformerEncoder(nn.Module):
30
+ def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False):
31
+ """ViT-like transformer block
32
+
33
+ Args:
34
+ in_channels (int): Input channels
35
+ patch_size (int, optional): patch size. Defaults to 10.
36
+ embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128.
37
+ num_heads (int, optional): number of attention heads. Defaults to 4.
38
+ use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False.
39
+ """
40
+ super(PatchTransformerEncoder, self).__init__()
41
+ self.use_class_token = use_class_token
42
+ encoder_layers = nn.TransformerEncoderLayer(
43
+ embedding_dim, num_heads, dim_feedforward=1024)
44
+ self.transformer_encoder = nn.TransformerEncoder(
45
+ encoder_layers, num_layers=4) # takes shape S,N,E
46
+
47
+ self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim,
48
+ kernel_size=patch_size, stride=patch_size, padding=0)
49
+
50
+ def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'):
51
+ """Generate positional encodings
52
+
53
+ Args:
54
+ sequence_length (int): Sequence length
55
+ embedding_dim (int): Embedding dimension
56
+
57
+ Returns:
58
+ torch.Tensor SBE: Positional encodings
59
+ """
60
+ position = torch.arange(
61
+ 0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1)
62
+ index = torch.arange(
63
+ 0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0)
64
+ div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim))
65
+ pos_encoding = position * div_term
66
+ pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1)
67
+ pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1)
68
+ return pos_encoding
69
+
70
+
71
+ def forward(self, x):
72
+ """Forward pass
73
+
74
+ Args:
75
+ x (torch.Tensor - NCHW): Input feature tensor
76
+
77
+ Returns:
78
+ torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim
79
+ """
80
+ embeddings = self.embedding_convPxP(x).flatten(
81
+ 2) # .shape = n,c,s = n, embedding_dim, s
82
+ if self.use_class_token:
83
+ # extra special token at start ?
84
+ embeddings = nn.functional.pad(embeddings, (1, 0))
85
+
86
+ # change to S,N,E format required by transformer
87
+ embeddings = embeddings.permute(2, 0, 1)
88
+ S, N, E = embeddings.shape
89
+ embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device)
90
+ x = self.transformer_encoder(embeddings) # .shape = S, N, E
91
+ return x
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/model_io.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import torch
26
+
27
+ def load_state_dict(model, state_dict):
28
+ """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict.
29
+
30
+ DataParallel prefixes state_dict keys with 'module.' when saving.
31
+ If the model is not a DataParallel model but the state_dict is, then prefixes are removed.
32
+ If the model is a DataParallel model but the state_dict is not, then prefixes are added.
33
+ """
34
+ state_dict = state_dict.get('model', state_dict)
35
+ # if model is a DataParallel model, then state_dict keys are prefixed with 'module.'
36
+
37
+ do_prefix = isinstance(
38
+ model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel))
39
+ state = {}
40
+ for k, v in state_dict.items():
41
+ if k.startswith('module.') and not do_prefix:
42
+ k = k[7:]
43
+
44
+ if not k.startswith('module.') and do_prefix:
45
+ k = 'module.' + k
46
+
47
+ state[k] = v
48
+
49
+ model.load_state_dict(state)
50
+ print("Loaded successfully")
51
+ return model
52
+
53
+
54
+ def load_wts(model, checkpoint_path):
55
+ ckpt = torch.load(checkpoint_path, map_location='cpu')
56
+ return load_state_dict(model, ckpt)
57
+
58
+
59
+ def load_state_dict_from_url(model, url, **kwargs):
60
+ state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs)
61
+ return load_state_dict(model, state_dict)
62
+
63
+
64
+ def load_state_from_resource(model, resource: str):
65
+ """Loads weights to the model from a given resource. A resource can be of following types:
66
+ 1. URL. Prefixed with "url::"
67
+ e.g. url::http(s)://url.resource.com/ckpt.pt
68
+
69
+ 2. Local path. Prefixed with "local::"
70
+ e.g. local::/path/to/ckpt.pt
71
+
72
+
73
+ Args:
74
+ model (torch.nn.Module): Model
75
+ resource (str): resource string
76
+
77
+ Returns:
78
+ torch.nn.Module: Model with loaded weights
79
+ """
80
+ print(f"Using pretrained resource {resource}")
81
+
82
+ if resource.startswith('url::'):
83
+ url = resource.split('url::')[1]
84
+ return load_state_dict_from_url(model, url, progress=True)
85
+
86
+ elif resource.startswith('local::'):
87
+ path = resource.split('local::')[1]
88
+ return load_wts(model, path)
89
+
90
+ else:
91
+ raise ValueError("Invalid resource type, only url:: and local:: are supported")
92
+
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/__init__.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ from .zoedepth_v1 import ZoeDepth
26
+
27
+ all_versions = {
28
+ "v1": ZoeDepth,
29
+ }
30
+
31
+ get_version = lambda v : all_versions[v]
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "ZoeDepth",
4
+ "version_name": "v1",
5
+ "n_bins": 64,
6
+ "bin_embedding_dim": 128,
7
+ "bin_centers_type": "softplus",
8
+ "n_attractors":[16, 8, 4, 1],
9
+ "attractor_alpha": 1000,
10
+ "attractor_gamma": 2,
11
+ "attractor_kind" : "mean",
12
+ "attractor_type" : "inv",
13
+ "midas_model_type" : "DPT_BEiT_L_384",
14
+ "min_temp": 0.0212,
15
+ "max_temp": 50.0,
16
+ "output_distribution": "logbinomial",
17
+ "memory_efficient": true,
18
+ "inverse_midas": false,
19
+ "img_size": [384, 512]
20
+ },
21
+
22
+ "train": {
23
+ "train_midas": true,
24
+ "use_pretrained_midas": true,
25
+ "trainer": "zoedepth",
26
+ "epochs": 5,
27
+ "bs": 16,
28
+ "optim_kwargs": {"lr": 0.000161, "wd": 0.01},
29
+ "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
30
+ "same_lr": false,
31
+ "w_si": 1,
32
+ "w_domain": 0.2,
33
+ "w_reg": 0,
34
+ "w_grad": 0,
35
+ "avoid_boundary": false,
36
+ "random_crop": false,
37
+ "input_width": 640,
38
+ "input_height": 480,
39
+ "midas_lr_factor": 1,
40
+ "encoder_lr_factor":10,
41
+ "pos_enc_lr_factor":10,
42
+ "freeze_midas_bn": true
43
+
44
+ },
45
+
46
+ "infer":{
47
+ "train_midas": false,
48
+ "use_pretrained_midas": false,
49
+ "pretrained_resource" : null,
50
+ "force_keep_ar": true
51
+ },
52
+
53
+ "eval":{
54
+ "train_midas": false,
55
+ "use_pretrained_midas": false,
56
+ "pretrained_resource" : null
57
+ }
58
+ }
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "bin_centers_type": "normed",
4
+ "img_size": [384, 768]
5
+ },
6
+
7
+ "train": {
8
+ },
9
+
10
+ "infer":{
11
+ "train_midas": false,
12
+ "use_pretrained_midas": false,
13
+ "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt",
14
+ "force_keep_ar": true
15
+ },
16
+
17
+ "eval":{
18
+ "train_midas": false,
19
+ "use_pretrained_midas": false,
20
+ "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt"
21
+ }
22
+ }
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/zoedepth_v1.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import itertools
26
+
27
+ import torch
28
+ import torch.nn as nn
29
+ from ..depth_model import DepthModel
30
+ from ..base_models.midas import MidasCore
31
+ from ..layers.attractor import AttractorLayer, AttractorLayerUnnormed
32
+ from ..layers.dist_layers import ConditionalLogBinomial
33
+ from ..layers.localbins_layers import (Projector, SeedBinRegressor,
34
+ SeedBinRegressorUnnormed)
35
+ from ..model_io import load_state_from_resource
36
+
37
+
38
+ class ZoeDepth(DepthModel):
39
+ def __init__(self, core, n_bins=64, bin_centers_type="softplus", bin_embedding_dim=128, min_depth=1e-3, max_depth=10,
40
+ n_attractors=[16, 8, 4, 1], attractor_alpha=300, attractor_gamma=2, attractor_kind='sum', attractor_type='exp', min_temp=5, max_temp=50, train_midas=True,
41
+ midas_lr_factor=10, encoder_lr_factor=10, pos_enc_lr_factor=10, inverse_midas=False, **kwargs):
42
+ """ZoeDepth model. This is the version of ZoeDepth that has a single metric head
43
+
44
+ Args:
45
+ core (models.base_models.midas.MidasCore): The base midas model that is used for extraction of "relative" features
46
+ n_bins (int, optional): Number of bin centers. Defaults to 64.
47
+ bin_centers_type (str, optional): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
48
+ For "softplus", softplus activation is used and thus are unbounded. Defaults to "softplus".
49
+ bin_embedding_dim (int, optional): bin embedding dimension. Defaults to 128.
50
+ min_depth (float, optional): Lower bound for normed bin centers. Defaults to 1e-3.
51
+ max_depth (float, optional): Upper bound for normed bin centers. Defaults to 10.
52
+ n_attractors (List[int], optional): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
53
+ attractor_alpha (int, optional): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 300.
54
+ attractor_gamma (int, optional): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
55
+ attractor_kind (str, optional): Attraction aggregation "sum" or "mean". Defaults to 'sum'.
56
+ attractor_type (str, optional): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'exp'.
57
+ min_temp (int, optional): Lower bound for temperature of output probability distribution. Defaults to 5.
58
+ max_temp (int, optional): Upper bound for temperature of output probability distribution. Defaults to 50.
59
+ train_midas (bool, optional): Whether to train "core", the base midas model. Defaults to True.
60
+ midas_lr_factor (int, optional): Learning rate reduction factor for base midas model except its encoder and positional encodings. Defaults to 10.
61
+ encoder_lr_factor (int, optional): Learning rate reduction factor for the encoder in midas model. Defaults to 10.
62
+ pos_enc_lr_factor (int, optional): Learning rate reduction factor for positional encodings in the base midas model. Defaults to 10.
63
+ """
64
+ super().__init__()
65
+
66
+ self.core = core
67
+ self.max_depth = max_depth
68
+ self.min_depth = min_depth
69
+ self.min_temp = min_temp
70
+ self.bin_centers_type = bin_centers_type
71
+
72
+ self.midas_lr_factor = midas_lr_factor
73
+ self.encoder_lr_factor = encoder_lr_factor
74
+ self.pos_enc_lr_factor = pos_enc_lr_factor
75
+ self.train_midas = train_midas
76
+ self.inverse_midas = inverse_midas
77
+
78
+ if self.encoder_lr_factor <= 0:
79
+ self.core.freeze_encoder(
80
+ freeze_rel_pos=self.pos_enc_lr_factor <= 0)
81
+
82
+ N_MIDAS_OUT = 32
83
+ btlnck_features = self.core.output_channels[0]
84
+ num_out_features = self.core.output_channels[1:]
85
+
86
+ self.conv2 = nn.Conv2d(btlnck_features, btlnck_features,
87
+ kernel_size=1, stride=1, padding=0) # btlnck conv
88
+
89
+ if bin_centers_type == "normed":
90
+ SeedBinRegressorLayer = SeedBinRegressor
91
+ Attractor = AttractorLayer
92
+ elif bin_centers_type == "softplus":
93
+ SeedBinRegressorLayer = SeedBinRegressorUnnormed
94
+ Attractor = AttractorLayerUnnormed
95
+ elif bin_centers_type == "hybrid1":
96
+ SeedBinRegressorLayer = SeedBinRegressor
97
+ Attractor = AttractorLayerUnnormed
98
+ elif bin_centers_type == "hybrid2":
99
+ SeedBinRegressorLayer = SeedBinRegressorUnnormed
100
+ Attractor = AttractorLayer
101
+ else:
102
+ raise ValueError(
103
+ "bin_centers_type should be one of 'normed', 'softplus', 'hybrid1', 'hybrid2'")
104
+
105
+ self.seed_bin_regressor = SeedBinRegressorLayer(
106
+ btlnck_features, n_bins=n_bins, min_depth=min_depth, max_depth=max_depth)
107
+ self.seed_projector = Projector(btlnck_features, bin_embedding_dim)
108
+ self.projectors = nn.ModuleList([
109
+ Projector(num_out, bin_embedding_dim)
110
+ for num_out in num_out_features
111
+ ])
112
+ self.attractors = nn.ModuleList([
113
+ Attractor(bin_embedding_dim, n_bins, n_attractors=n_attractors[i], min_depth=min_depth, max_depth=max_depth,
114
+ alpha=attractor_alpha, gamma=attractor_gamma, kind=attractor_kind, attractor_type=attractor_type)
115
+ for i in range(len(num_out_features))
116
+ ])
117
+
118
+ last_in = N_MIDAS_OUT + 1 # +1 for relative depth
119
+
120
+ # use log binomial instead of softmax
121
+ self.conditional_log_binomial = ConditionalLogBinomial(
122
+ last_in, bin_embedding_dim, n_classes=n_bins, min_temp=min_temp, max_temp=max_temp)
123
+
124
+ def forward(self, x, return_final_centers=False, denorm=False, return_probs=False, **kwargs):
125
+ """
126
+ Args:
127
+ x (torch.Tensor): Input image tensor of shape (B, C, H, W)
128
+ return_final_centers (bool, optional): Whether to return the final bin centers. Defaults to False.
129
+ denorm (bool, optional): Whether to denormalize the input image. This reverses ImageNet normalization as midas normalization is different. Defaults to False.
130
+ return_probs (bool, optional): Whether to return the output probability distribution. Defaults to False.
131
+
132
+ Returns:
133
+ dict: Dictionary containing the following keys:
134
+ - rel_depth (torch.Tensor): Relative depth map of shape (B, H, W)
135
+ - metric_depth (torch.Tensor): Metric depth map of shape (B, 1, H, W)
136
+ - bin_centers (torch.Tensor): Bin centers of shape (B, n_bins). Present only if return_final_centers is True
137
+ - probs (torch.Tensor): Output probability distribution of shape (B, n_bins, H, W). Present only if return_probs is True
138
+
139
+ """
140
+ b, c, h, w = x.shape
141
+ # print("input shape ", x.shape)
142
+ self.orig_input_width = w
143
+ self.orig_input_height = h
144
+ rel_depth, out = self.core(x, denorm=denorm, return_rel_depth=True)
145
+ # print("output shapes", rel_depth.shape, out.shape)
146
+
147
+ outconv_activation = out[0]
148
+ btlnck = out[1]
149
+ x_blocks = out[2:]
150
+
151
+ x_d0 = self.conv2(btlnck)
152
+ x = x_d0
153
+ _, seed_b_centers = self.seed_bin_regressor(x)
154
+
155
+ if self.bin_centers_type == 'normed' or self.bin_centers_type == 'hybrid2':
156
+ b_prev = (seed_b_centers - self.min_depth) / \
157
+ (self.max_depth - self.min_depth)
158
+ else:
159
+ b_prev = seed_b_centers
160
+
161
+ prev_b_embedding = self.seed_projector(x)
162
+
163
+ # unroll this loop for better performance
164
+ for projector, attractor, x in zip(self.projectors, self.attractors, x_blocks):
165
+ b_embedding = projector(x)
166
+ b, b_centers = attractor(
167
+ b_embedding, b_prev, prev_b_embedding, interpolate=True)
168
+ b_prev = b.clone()
169
+ prev_b_embedding = b_embedding.clone()
170
+
171
+ last = outconv_activation
172
+
173
+ if self.inverse_midas:
174
+ # invert depth followed by normalization
175
+ rel_depth = 1.0 / (rel_depth + 1e-6)
176
+ rel_depth = (rel_depth - rel_depth.min()) / \
177
+ (rel_depth.max() - rel_depth.min())
178
+ # concat rel depth with last. First interpolate rel depth to last size
179
+ rel_cond = rel_depth.unsqueeze(1)
180
+ rel_cond = nn.functional.interpolate(
181
+ rel_cond, size=last.shape[2:], mode='bilinear', align_corners=True)
182
+ last = torch.cat([last, rel_cond], dim=1)
183
+
184
+ b_embedding = nn.functional.interpolate(
185
+ b_embedding, last.shape[-2:], mode='bilinear', align_corners=True)
186
+ x = self.conditional_log_binomial(last, b_embedding)
187
+
188
+ # Now depth value is Sum px * cx , where cx are bin_centers from the last bin tensor
189
+ # print(x.shape, b_centers.shape)
190
+ b_centers = nn.functional.interpolate(
191
+ b_centers, x.shape[-2:], mode='bilinear', align_corners=True)
192
+ out = torch.sum(x * b_centers, dim=1, keepdim=True)
193
+
194
+ # Structure output dict
195
+ output = dict(metric_depth=out)
196
+ if return_final_centers or return_probs:
197
+ output['bin_centers'] = b_centers
198
+
199
+ if return_probs:
200
+ output['probs'] = x
201
+
202
+ return output
203
+
204
+ def get_lr_params(self, lr):
205
+ """
206
+ Learning rate configuration for different layers of the model
207
+ Args:
208
+ lr (float) : Base learning rate
209
+ Returns:
210
+ list : list of parameters to optimize and their learning rates, in the format required by torch optimizers.
211
+ """
212
+ param_conf = []
213
+ if self.train_midas:
214
+ if self.encoder_lr_factor > 0:
215
+ param_conf.append({'params': self.core.get_enc_params_except_rel_pos(
216
+ ), 'lr': lr / self.encoder_lr_factor})
217
+
218
+ if self.pos_enc_lr_factor > 0:
219
+ param_conf.append(
220
+ {'params': self.core.get_rel_pos_params(), 'lr': lr / self.pos_enc_lr_factor})
221
+
222
+ midas_params = self.core.core.scratch.parameters()
223
+ midas_lr_factor = self.midas_lr_factor
224
+ param_conf.append(
225
+ {'params': midas_params, 'lr': lr / midas_lr_factor})
226
+
227
+ remaining_modules = []
228
+ for name, child in self.named_children():
229
+ if name != 'core':
230
+ remaining_modules.append(child)
231
+ remaining_params = itertools.chain(
232
+ *[child.parameters() for child in remaining_modules])
233
+
234
+ param_conf.append({'params': remaining_params, 'lr': lr})
235
+
236
+ return param_conf
237
+
238
+ @staticmethod
239
+ def build(midas_model_type="DPT_BEiT_L_384", pretrained_resource=None, use_pretrained_midas=False, train_midas=False, freeze_midas_bn=True, **kwargs):
240
+ core = MidasCore.build(midas_model_type=midas_model_type, use_pretrained_midas=use_pretrained_midas,
241
+ train_midas=train_midas, fetch_features=True, freeze_bn=freeze_midas_bn, **kwargs)
242
+ model = ZoeDepth(core, **kwargs)
243
+ if pretrained_resource:
244
+ assert isinstance(pretrained_resource, str), "pretrained_resource must be a string"
245
+ model = load_state_from_resource(model, pretrained_resource)
246
+ return model
247
+
248
+ @staticmethod
249
+ def build_from_config(config):
250
+ return ZoeDepth.build(**config)
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth_nk/__init__.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ from .zoedepth_nk_v1 import ZoeDepthNK
26
+
27
+ all_versions = {
28
+ "v1": ZoeDepthNK,
29
+ }
30
+
31
+ get_version = lambda v : all_versions[v]
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "name": "ZoeDepthNK",
4
+ "version_name": "v1",
5
+ "bin_conf" : [
6
+ {
7
+ "name": "nyu",
8
+ "n_bins": 64,
9
+ "min_depth": 1e-3,
10
+ "max_depth": 10.0
11
+ },
12
+ {
13
+ "name": "kitti",
14
+ "n_bins": 64,
15
+ "min_depth": 1e-3,
16
+ "max_depth": 80.0
17
+ }
18
+ ],
19
+ "bin_embedding_dim": 128,
20
+ "bin_centers_type": "softplus",
21
+ "n_attractors":[16, 8, 4, 1],
22
+ "attractor_alpha": 1000,
23
+ "attractor_gamma": 2,
24
+ "attractor_kind" : "mean",
25
+ "attractor_type" : "inv",
26
+ "min_temp": 0.0212,
27
+ "max_temp": 50.0,
28
+ "memory_efficient": true,
29
+ "midas_model_type" : "DPT_BEiT_L_384",
30
+ "img_size": [384, 512]
31
+ },
32
+
33
+ "train": {
34
+ "train_midas": true,
35
+ "use_pretrained_midas": true,
36
+ "trainer": "zoedepth_nk",
37
+ "epochs": 5,
38
+ "bs": 16,
39
+ "optim_kwargs": {"lr": 0.0002512, "wd": 0.01},
40
+ "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
41
+ "same_lr": false,
42
+ "w_si": 1,
43
+ "w_domain": 100,
44
+ "avoid_boundary": false,
45
+ "random_crop": false,
46
+ "input_width": 640,
47
+ "input_height": 480,
48
+ "w_grad": 0,
49
+ "w_reg": 0,
50
+ "midas_lr_factor": 10,
51
+ "encoder_lr_factor":10,
52
+ "pos_enc_lr_factor":10
53
+ },
54
+
55
+ "infer": {
56
+ "train_midas": false,
57
+ "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
58
+ "use_pretrained_midas": false,
59
+ "force_keep_ar": true
60
+ },
61
+
62
+ "eval": {
63
+ "train_midas": false,
64
+ "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
65
+ "use_pretrained_midas": false
66
+ }
67
+ }
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth_nk/zoedepth_nk_v1.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import itertools
26
+
27
+ import torch
28
+ import torch.nn as nn
29
+
30
+ from zoedepth.models.depth_model import DepthModel
31
+ from zoedepth.models.base_models.midas import MidasCore
32
+ from zoedepth.models.layers.attractor import AttractorLayer, AttractorLayerUnnormed
33
+ from zoedepth.models.layers.dist_layers import ConditionalLogBinomial
34
+ from zoedepth.models.layers.localbins_layers import (Projector, SeedBinRegressor,
35
+ SeedBinRegressorUnnormed)
36
+ from zoedepth.models.layers.patch_transformer import PatchTransformerEncoder
37
+ from zoedepth.models.model_io import load_state_from_resource
38
+
39
+
40
+ class ZoeDepthNK(DepthModel):
41
+ def __init__(self, core, bin_conf, bin_centers_type="softplus", bin_embedding_dim=128,
42
+ n_attractors=[16, 8, 4, 1], attractor_alpha=300, attractor_gamma=2, attractor_kind='sum', attractor_type='exp',
43
+ min_temp=5, max_temp=50,
44
+ memory_efficient=False, train_midas=True,
45
+ is_midas_pretrained=True, midas_lr_factor=1, encoder_lr_factor=10, pos_enc_lr_factor=10, inverse_midas=False, **kwargs):
46
+ """ZoeDepthNK model. This is the version of ZoeDepth that has two metric heads and uses a learned router to route to experts.
47
+
48
+ Args:
49
+ core (models.base_models.midas.MidasCore): The base midas model that is used for extraction of "relative" features
50
+
51
+ bin_conf (List[dict]): A list of dictionaries that contain the bin configuration for each metric head. Each dictionary should contain the following keys:
52
+ "name" (str, typically same as the dataset name), "n_bins" (int), "min_depth" (float), "max_depth" (float)
53
+
54
+ The length of this list determines the number of metric heads.
55
+ bin_centers_type (str, optional): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
56
+ For "softplus", softplus activation is used and thus are unbounded. Defaults to "normed".
57
+ bin_embedding_dim (int, optional): bin embedding dimension. Defaults to 128.
58
+
59
+ n_attractors (List[int], optional): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
60
+ attractor_alpha (int, optional): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 300.
61
+ attractor_gamma (int, optional): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
62
+ attractor_kind (str, optional): Attraction aggregation "sum" or "mean". Defaults to 'sum'.
63
+ attractor_type (str, optional): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'exp'.
64
+
65
+ min_temp (int, optional): Lower bound for temperature of output probability distribution. Defaults to 5.
66
+ max_temp (int, optional): Upper bound for temperature of output probability distribution. Defaults to 50.
67
+
68
+ memory_efficient (bool, optional): Whether to use memory efficient version of attractor layers. Memory efficient version is slower but is recommended incase of multiple metric heads in order save GPU memory. Defaults to False.
69
+
70
+ train_midas (bool, optional): Whether to train "core", the base midas model. Defaults to True.
71
+ is_midas_pretrained (bool, optional): Is "core" pretrained? Defaults to True.
72
+ midas_lr_factor (int, optional): Learning rate reduction factor for base midas model except its encoder and positional encodings. Defaults to 10.
73
+ encoder_lr_factor (int, optional): Learning rate reduction factor for the encoder in midas model. Defaults to 10.
74
+ pos_enc_lr_factor (int, optional): Learning rate reduction factor for positional encodings in the base midas model. Defaults to 10.
75
+
76
+ """
77
+
78
+ super().__init__()
79
+
80
+ self.core = core
81
+ self.bin_conf = bin_conf
82
+ self.min_temp = min_temp
83
+ self.max_temp = max_temp
84
+ self.memory_efficient = memory_efficient
85
+ self.train_midas = train_midas
86
+ self.is_midas_pretrained = is_midas_pretrained
87
+ self.midas_lr_factor = midas_lr_factor
88
+ self.encoder_lr_factor = encoder_lr_factor
89
+ self.pos_enc_lr_factor = pos_enc_lr_factor
90
+ self.inverse_midas = inverse_midas
91
+
92
+ N_MIDAS_OUT = 32
93
+ btlnck_features = self.core.output_channels[0]
94
+ num_out_features = self.core.output_channels[1:]
95
+ # self.scales = [16, 8, 4, 2] # spatial scale factors
96
+
97
+ self.conv2 = nn.Conv2d(
98
+ btlnck_features, btlnck_features, kernel_size=1, stride=1, padding=0)
99
+
100
+ # Transformer classifier on the bottleneck
101
+ self.patch_transformer = PatchTransformerEncoder(
102
+ btlnck_features, 1, 128, use_class_token=True)
103
+ self.mlp_classifier = nn.Sequential(
104
+ nn.Linear(128, 128),
105
+ nn.ReLU(),
106
+ nn.Linear(128, 2)
107
+ )
108
+
109
+ if bin_centers_type == "normed":
110
+ SeedBinRegressorLayer = SeedBinRegressor
111
+ Attractor = AttractorLayer
112
+ elif bin_centers_type == "softplus":
113
+ SeedBinRegressorLayer = SeedBinRegressorUnnormed
114
+ Attractor = AttractorLayerUnnormed
115
+ elif bin_centers_type == "hybrid1":
116
+ SeedBinRegressorLayer = SeedBinRegressor
117
+ Attractor = AttractorLayerUnnormed
118
+ elif bin_centers_type == "hybrid2":
119
+ SeedBinRegressorLayer = SeedBinRegressorUnnormed
120
+ Attractor = AttractorLayer
121
+ else:
122
+ raise ValueError(
123
+ "bin_centers_type should be one of 'normed', 'softplus', 'hybrid1', 'hybrid2'")
124
+ self.bin_centers_type = bin_centers_type
125
+ # We have bins for each bin conf.
126
+ # Create a map (ModuleDict) of 'name' -> seed_bin_regressor
127
+ self.seed_bin_regressors = nn.ModuleDict(
128
+ {conf['name']: SeedBinRegressorLayer(btlnck_features, conf["n_bins"], mlp_dim=bin_embedding_dim//2, min_depth=conf["min_depth"], max_depth=conf["max_depth"])
129
+ for conf in bin_conf}
130
+ )
131
+
132
+ self.seed_projector = Projector(
133
+ btlnck_features, bin_embedding_dim, mlp_dim=bin_embedding_dim//2)
134
+ self.projectors = nn.ModuleList([
135
+ Projector(num_out, bin_embedding_dim, mlp_dim=bin_embedding_dim//2)
136
+ for num_out in num_out_features
137
+ ])
138
+
139
+ # Create a map (ModuleDict) of 'name' -> attractors (ModuleList)
140
+ self.attractors = nn.ModuleDict(
141
+ {conf['name']: nn.ModuleList([
142
+ Attractor(bin_embedding_dim, n_attractors[i],
143
+ mlp_dim=bin_embedding_dim, alpha=attractor_alpha,
144
+ gamma=attractor_gamma, kind=attractor_kind,
145
+ attractor_type=attractor_type, memory_efficient=memory_efficient,
146
+ min_depth=conf["min_depth"], max_depth=conf["max_depth"])
147
+ for i in range(len(n_attractors))
148
+ ])
149
+ for conf in bin_conf}
150
+ )
151
+
152
+ last_in = N_MIDAS_OUT
153
+ # conditional log binomial for each bin conf
154
+ self.conditional_log_binomial = nn.ModuleDict(
155
+ {conf['name']: ConditionalLogBinomial(last_in, bin_embedding_dim, conf['n_bins'], bottleneck_factor=4, min_temp=self.min_temp, max_temp=self.max_temp)
156
+ for conf in bin_conf}
157
+ )
158
+
159
+ def forward(self, x, return_final_centers=False, denorm=False, return_probs=False, **kwargs):
160
+ """
161
+ Args:
162
+ x (torch.Tensor): Input image tensor of shape (B, C, H, W). Assumes all images are from the same domain.
163
+ return_final_centers (bool, optional): Whether to return the final centers of the attractors. Defaults to False.
164
+ denorm (bool, optional): Whether to denormalize the input image. Defaults to False.
165
+ return_probs (bool, optional): Whether to return the probabilities of the bins. Defaults to False.
166
+
167
+ Returns:
168
+ dict: Dictionary of outputs with keys:
169
+ - "rel_depth": Relative depth map of shape (B, 1, H, W)
170
+ - "metric_depth": Metric depth map of shape (B, 1, H, W)
171
+ - "domain_logits": Domain logits of shape (B, 2)
172
+ - "bin_centers": Bin centers of shape (B, N, H, W). Present only if return_final_centers is True
173
+ - "probs": Bin probabilities of shape (B, N, H, W). Present only if return_probs is True
174
+ """
175
+ b, c, h, w = x.shape
176
+ self.orig_input_width = w
177
+ self.orig_input_height = h
178
+ rel_depth, out = self.core(x, denorm=denorm, return_rel_depth=True)
179
+
180
+ outconv_activation = out[0]
181
+ btlnck = out[1]
182
+ x_blocks = out[2:]
183
+
184
+ x_d0 = self.conv2(btlnck)
185
+ x = x_d0
186
+
187
+ # Predict which path to take
188
+ embedding = self.patch_transformer(x)[0] # N, E
189
+ domain_logits = self.mlp_classifier(embedding) # N, 2
190
+ domain_vote = torch.softmax(domain_logits.sum(
191
+ dim=0, keepdim=True), dim=-1) # 1, 2
192
+
193
+ # Get the path
194
+ bin_conf_name = ["nyu", "kitti"][torch.argmax(
195
+ domain_vote, dim=-1).squeeze().item()]
196
+
197
+ try:
198
+ conf = [c for c in self.bin_conf if c.name == bin_conf_name][0]
199
+ except IndexError:
200
+ raise ValueError(
201
+ f"bin_conf_name {bin_conf_name} not found in bin_confs")
202
+
203
+ min_depth = conf['min_depth']
204
+ max_depth = conf['max_depth']
205
+
206
+ seed_bin_regressor = self.seed_bin_regressors[bin_conf_name]
207
+ _, seed_b_centers = seed_bin_regressor(x)
208
+ if self.bin_centers_type == 'normed' or self.bin_centers_type == 'hybrid2':
209
+ b_prev = (seed_b_centers - min_depth)/(max_depth - min_depth)
210
+ else:
211
+ b_prev = seed_b_centers
212
+ prev_b_embedding = self.seed_projector(x)
213
+
214
+ attractors = self.attractors[bin_conf_name]
215
+ for projector, attractor, x in zip(self.projectors, attractors, x_blocks):
216
+ b_embedding = projector(x)
217
+ b, b_centers = attractor(
218
+ b_embedding, b_prev, prev_b_embedding, interpolate=True)
219
+ b_prev = b
220
+ prev_b_embedding = b_embedding
221
+
222
+ last = outconv_activation
223
+
224
+ b_centers = nn.functional.interpolate(
225
+ b_centers, last.shape[-2:], mode='bilinear', align_corners=True)
226
+ b_embedding = nn.functional.interpolate(
227
+ b_embedding, last.shape[-2:], mode='bilinear', align_corners=True)
228
+
229
+ clb = self.conditional_log_binomial[bin_conf_name]
230
+ x = clb(last, b_embedding)
231
+
232
+ # Now depth value is Sum px * cx , where cx are bin_centers from the last bin tensor
233
+ # print(x.shape, b_centers.shape)
234
+ # b_centers = nn.functional.interpolate(b_centers, x.shape[-2:], mode='bilinear', align_corners=True)
235
+ out = torch.sum(x * b_centers, dim=1, keepdim=True)
236
+
237
+ output = dict(domain_logits=domain_logits, metric_depth=out)
238
+ if return_final_centers or return_probs:
239
+ output['bin_centers'] = b_centers
240
+
241
+ if return_probs:
242
+ output['probs'] = x
243
+ return output
244
+
245
+ def get_lr_params(self, lr):
246
+ """
247
+ Learning rate configuration for different layers of the model
248
+
249
+ Args:
250
+ lr (float) : Base learning rate
251
+ Returns:
252
+ list : list of parameters to optimize and their learning rates, in the format required by torch optimizers.
253
+ """
254
+ param_conf = []
255
+ if self.train_midas:
256
+ def get_rel_pos_params():
257
+ for name, p in self.core.core.pretrained.named_parameters():
258
+ if "relative_position" in name:
259
+ yield p
260
+
261
+ def get_enc_params_except_rel_pos():
262
+ for name, p in self.core.core.pretrained.named_parameters():
263
+ if "relative_position" not in name:
264
+ yield p
265
+
266
+ encoder_params = get_enc_params_except_rel_pos()
267
+ rel_pos_params = get_rel_pos_params()
268
+ midas_params = self.core.core.scratch.parameters()
269
+ midas_lr_factor = self.midas_lr_factor if self.is_midas_pretrained else 1.0
270
+ param_conf.extend([
271
+ {'params': encoder_params, 'lr': lr / self.encoder_lr_factor},
272
+ {'params': rel_pos_params, 'lr': lr / self.pos_enc_lr_factor},
273
+ {'params': midas_params, 'lr': lr / midas_lr_factor}
274
+ ])
275
+
276
+ remaining_modules = []
277
+ for name, child in self.named_children():
278
+ if name != 'core':
279
+ remaining_modules.append(child)
280
+ remaining_params = itertools.chain(
281
+ *[child.parameters() for child in remaining_modules])
282
+ param_conf.append({'params': remaining_params, 'lr': lr})
283
+ return param_conf
284
+
285
+ def get_conf_parameters(self, conf_name):
286
+ """
287
+ Returns parameters of all the ModuleDicts children that are exclusively used for the given bin configuration
288
+ """
289
+ params = []
290
+ for name, child in self.named_children():
291
+ if isinstance(child, nn.ModuleDict):
292
+ for bin_conf_name, module in child.items():
293
+ if bin_conf_name == conf_name:
294
+ params += list(module.parameters())
295
+ return params
296
+
297
+ def freeze_conf(self, conf_name):
298
+ """
299
+ Freezes all the parameters of all the ModuleDicts children that are exclusively used for the given bin configuration
300
+ """
301
+ for p in self.get_conf_parameters(conf_name):
302
+ p.requires_grad = False
303
+
304
+ def unfreeze_conf(self, conf_name):
305
+ """
306
+ Unfreezes all the parameters of all the ModuleDicts children that are exclusively used for the given bin configuration
307
+ """
308
+ for p in self.get_conf_parameters(conf_name):
309
+ p.requires_grad = True
310
+
311
+ def freeze_all_confs(self):
312
+ """
313
+ Freezes all the parameters of all the ModuleDicts children
314
+ """
315
+ for name, child in self.named_children():
316
+ if isinstance(child, nn.ModuleDict):
317
+ for bin_conf_name, module in child.items():
318
+ for p in module.parameters():
319
+ p.requires_grad = False
320
+
321
+ @staticmethod
322
+ def build(midas_model_type="DPT_BEiT_L_384", pretrained_resource=None, use_pretrained_midas=False, train_midas=False, freeze_midas_bn=True, **kwargs):
323
+ core = MidasCore.build(midas_model_type=midas_model_type, use_pretrained_midas=use_pretrained_midas,
324
+ train_midas=train_midas, fetch_features=True, freeze_bn=freeze_midas_bn, **kwargs)
325
+ model = ZoeDepthNK(core, **kwargs)
326
+ if pretrained_resource:
327
+ assert isinstance(pretrained_resource, str), "pretrained_resource must be a string"
328
+ model = load_state_from_resource(model, pretrained_resource)
329
+ return model
330
+
331
+ @staticmethod
332
+ def build_from_config(config):
333
+ return ZoeDepthNK.build(**config)
microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/__init__.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/arg_utils.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ def infer_type(x): # hacky way to infer type from string args
4
+ if not isinstance(x, str):
5
+ return x
6
+
7
+ try:
8
+ x = int(x)
9
+ return x
10
+ except ValueError:
11
+ pass
12
+
13
+ try:
14
+ x = float(x)
15
+ return x
16
+ except ValueError:
17
+ pass
18
+
19
+ return x
20
+
21
+
22
+ def parse_unknown(unknown_args):
23
+ clean = []
24
+ for a in unknown_args:
25
+ if "=" in a:
26
+ k, v = a.split("=")
27
+ clean.extend([k, v])
28
+ else:
29
+ clean.append(a)
30
+
31
+ keys = clean[::2]
32
+ values = clean[1::2]
33
+ return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)}
microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/config.py ADDED
@@ -0,0 +1,437 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import json
26
+ import os
27
+
28
+ from .easydict import EasyDict as edict
29
+ from .arg_utils import infer_type
30
+
31
+ import pathlib
32
+ import platform
33
+
34
+ ROOT = pathlib.Path(__file__).parent.parent.resolve()
35
+
36
+ HOME_DIR = os.path.expanduser("~")
37
+
38
+ COMMON_CONFIG = {
39
+ "save_dir": os.path.expanduser("~/shortcuts/monodepth3_checkpoints"),
40
+ "project": "ZoeDepth",
41
+ "tags": '',
42
+ "notes": "",
43
+ "gpu": None,
44
+ "root": ".",
45
+ "uid": None,
46
+ "print_losses": False
47
+ }
48
+
49
+ DATASETS_CONFIG = {
50
+ "kitti": {
51
+ "dataset": "kitti",
52
+ "min_depth": 0.001,
53
+ "max_depth": 80,
54
+ "data_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"),
55
+ "gt_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"),
56
+ "filenames_file": "./train_test_inputs/kitti_eigen_train_files_with_gt.txt",
57
+ "input_height": 352,
58
+ "input_width": 1216, # 704
59
+ "data_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"),
60
+ "gt_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"),
61
+ "filenames_file_eval": "./train_test_inputs/kitti_eigen_test_files_with_gt.txt",
62
+
63
+ "min_depth_eval": 1e-3,
64
+ "max_depth_eval": 80,
65
+
66
+ "do_random_rotate": True,
67
+ "degree": 1.0,
68
+ "do_kb_crop": True,
69
+ "garg_crop": True,
70
+ "eigen_crop": False,
71
+ "use_right": False
72
+ },
73
+ "kitti_test": {
74
+ "dataset": "kitti",
75
+ "min_depth": 0.001,
76
+ "max_depth": 80,
77
+ "data_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"),
78
+ "gt_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"),
79
+ "filenames_file": "./train_test_inputs/kitti_eigen_train_files_with_gt.txt",
80
+ "input_height": 352,
81
+ "input_width": 1216,
82
+ "data_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"),
83
+ "gt_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"),
84
+ "filenames_file_eval": "./train_test_inputs/kitti_eigen_test_files_with_gt.txt",
85
+
86
+ "min_depth_eval": 1e-3,
87
+ "max_depth_eval": 80,
88
+
89
+ "do_random_rotate": False,
90
+ "degree": 1.0,
91
+ "do_kb_crop": True,
92
+ "garg_crop": True,
93
+ "eigen_crop": False,
94
+ "use_right": False
95
+ },
96
+ "nyu": {
97
+ "dataset": "nyu",
98
+ "avoid_boundary": False,
99
+ "min_depth": 1e-3, # originally 0.1
100
+ "max_depth": 10,
101
+ "data_path": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/sync/"),
102
+ "gt_path": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/sync/"),
103
+ "filenames_file": "./train_test_inputs/nyudepthv2_train_files_with_gt.txt",
104
+ "input_height": 480,
105
+ "input_width": 640,
106
+ "data_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/official_splits/test/"),
107
+ "gt_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/official_splits/test/"),
108
+ "filenames_file_eval": "./train_test_inputs/nyudepthv2_test_files_with_gt.txt",
109
+ "min_depth_eval": 1e-3,
110
+ "max_depth_eval": 10,
111
+ "min_depth_diff": -10,
112
+ "max_depth_diff": 10,
113
+
114
+ "do_random_rotate": True,
115
+ "degree": 1.0,
116
+ "do_kb_crop": False,
117
+ "garg_crop": False,
118
+ "eigen_crop": True
119
+ },
120
+ "ibims": {
121
+ "dataset": "ibims",
122
+ "ibims_root": os.path.join(HOME_DIR, "shortcuts/datasets/ibims/ibims1_core_raw/"),
123
+ "eigen_crop": True,
124
+ "garg_crop": False,
125
+ "do_kb_crop": False,
126
+ "min_depth_eval": 0,
127
+ "max_depth_eval": 10,
128
+ "min_depth": 1e-3,
129
+ "max_depth": 10
130
+ },
131
+ "sunrgbd": {
132
+ "dataset": "sunrgbd",
133
+ "sunrgbd_root": os.path.join(HOME_DIR, "shortcuts/datasets/SUNRGBD/test/"),
134
+ "eigen_crop": True,
135
+ "garg_crop": False,
136
+ "do_kb_crop": False,
137
+ "min_depth_eval": 0,
138
+ "max_depth_eval": 8,
139
+ "min_depth": 1e-3,
140
+ "max_depth": 10
141
+ },
142
+ "diml_indoor": {
143
+ "dataset": "diml_indoor",
144
+ "diml_indoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diml_indoor_test/"),
145
+ "eigen_crop": True,
146
+ "garg_crop": False,
147
+ "do_kb_crop": False,
148
+ "min_depth_eval": 0,
149
+ "max_depth_eval": 10,
150
+ "min_depth": 1e-3,
151
+ "max_depth": 10
152
+ },
153
+ "diml_outdoor": {
154
+ "dataset": "diml_outdoor",
155
+ "diml_outdoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diml_outdoor_test/"),
156
+ "eigen_crop": False,
157
+ "garg_crop": True,
158
+ "do_kb_crop": False,
159
+ "min_depth_eval": 2,
160
+ "max_depth_eval": 80,
161
+ "min_depth": 1e-3,
162
+ "max_depth": 80
163
+ },
164
+ "diode_indoor": {
165
+ "dataset": "diode_indoor",
166
+ "diode_indoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diode_indoor/"),
167
+ "eigen_crop": True,
168
+ "garg_crop": False,
169
+ "do_kb_crop": False,
170
+ "min_depth_eval": 1e-3,
171
+ "max_depth_eval": 10,
172
+ "min_depth": 1e-3,
173
+ "max_depth": 10
174
+ },
175
+ "diode_outdoor": {
176
+ "dataset": "diode_outdoor",
177
+ "diode_outdoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diode_outdoor/"),
178
+ "eigen_crop": False,
179
+ "garg_crop": True,
180
+ "do_kb_crop": False,
181
+ "min_depth_eval": 1e-3,
182
+ "max_depth_eval": 80,
183
+ "min_depth": 1e-3,
184
+ "max_depth": 80
185
+ },
186
+ "hypersim_test": {
187
+ "dataset": "hypersim_test",
188
+ "hypersim_test_root": os.path.join(HOME_DIR, "shortcuts/datasets/hypersim_test/"),
189
+ "eigen_crop": True,
190
+ "garg_crop": False,
191
+ "do_kb_crop": False,
192
+ "min_depth_eval": 1e-3,
193
+ "max_depth_eval": 80,
194
+ "min_depth": 1e-3,
195
+ "max_depth": 10
196
+ },
197
+ "vkitti": {
198
+ "dataset": "vkitti",
199
+ "vkitti_root": os.path.join(HOME_DIR, "shortcuts/datasets/vkitti_test/"),
200
+ "eigen_crop": False,
201
+ "garg_crop": True,
202
+ "do_kb_crop": True,
203
+ "min_depth_eval": 1e-3,
204
+ "max_depth_eval": 80,
205
+ "min_depth": 1e-3,
206
+ "max_depth": 80
207
+ },
208
+ "vkitti2": {
209
+ "dataset": "vkitti2",
210
+ "vkitti2_root": os.path.join(HOME_DIR, "shortcuts/datasets/vkitti2/"),
211
+ "eigen_crop": False,
212
+ "garg_crop": True,
213
+ "do_kb_crop": True,
214
+ "min_depth_eval": 1e-3,
215
+ "max_depth_eval": 80,
216
+ "min_depth": 1e-3,
217
+ "max_depth": 80,
218
+ },
219
+ "ddad": {
220
+ "dataset": "ddad",
221
+ "ddad_root": os.path.join(HOME_DIR, "shortcuts/datasets/ddad/ddad_val/"),
222
+ "eigen_crop": False,
223
+ "garg_crop": True,
224
+ "do_kb_crop": True,
225
+ "min_depth_eval": 1e-3,
226
+ "max_depth_eval": 80,
227
+ "min_depth": 1e-3,
228
+ "max_depth": 80,
229
+ },
230
+ }
231
+
232
+ ALL_INDOOR = ["nyu", "ibims", "sunrgbd", "diode_indoor", "hypersim_test"]
233
+ ALL_OUTDOOR = ["kitti", "diml_outdoor", "diode_outdoor", "vkitti2", "ddad"]
234
+ ALL_EVAL_DATASETS = ALL_INDOOR + ALL_OUTDOOR
235
+
236
+ COMMON_TRAINING_CONFIG = {
237
+ "dataset": "nyu",
238
+ "distributed": True,
239
+ "workers": 16,
240
+ "clip_grad": 0.1,
241
+ "use_shared_dict": False,
242
+ "shared_dict": None,
243
+ "use_amp": False,
244
+
245
+ "aug": True,
246
+ "random_crop": False,
247
+ "random_translate": False,
248
+ "translate_prob": 0.2,
249
+ "max_translation": 100,
250
+
251
+ "validate_every": 0.25,
252
+ "log_images_every": 0.1,
253
+ "prefetch": False,
254
+ }
255
+
256
+
257
+ def flatten(config, except_keys=('bin_conf')):
258
+ def recurse(inp):
259
+ if isinstance(inp, dict):
260
+ for key, value in inp.items():
261
+ if key in except_keys:
262
+ yield (key, value)
263
+ if isinstance(value, dict):
264
+ yield from recurse(value)
265
+ else:
266
+ yield (key, value)
267
+
268
+ return dict(list(recurse(config)))
269
+
270
+
271
+ def split_combined_args(kwargs):
272
+ """Splits the arguments that are combined with '__' into multiple arguments.
273
+ Combined arguments should have equal number of keys and values.
274
+ Keys are separated by '__' and Values are separated with ';'.
275
+ For example, '__n_bins__lr=256;0.001'
276
+
277
+ Args:
278
+ kwargs (dict): key-value pairs of arguments where key-value is optionally combined according to the above format.
279
+
280
+ Returns:
281
+ dict: Parsed dict with the combined arguments split into individual key-value pairs.
282
+ """
283
+ new_kwargs = dict(kwargs)
284
+ for key, value in kwargs.items():
285
+ if key.startswith("__"):
286
+ keys = key.split("__")[1:]
287
+ values = value.split(";")
288
+ assert len(keys) == len(
289
+ values), f"Combined arguments should have equal number of keys and values. Keys are separated by '__' and Values are separated with ';'. For example, '__n_bins__lr=256;0.001. Given (keys,values) is ({keys}, {values})"
290
+ for k, v in zip(keys, values):
291
+ new_kwargs[k] = v
292
+ return new_kwargs
293
+
294
+
295
+ def parse_list(config, key, dtype=int):
296
+ """Parse a list of values for the key if the value is a string. The values are separated by a comma.
297
+ Modifies the config in place.
298
+ """
299
+ if key in config:
300
+ if isinstance(config[key], str):
301
+ config[key] = list(map(dtype, config[key].split(',')))
302
+ assert isinstance(config[key], list) and all([isinstance(e, dtype) for e in config[key]]
303
+ ), f"{key} should be a list of values dtype {dtype}. Given {config[key]} of type {type(config[key])} with values of type {[type(e) for e in config[key]]}."
304
+
305
+
306
+ def get_model_config(model_name, model_version=None):
307
+ """Find and parse the .json config file for the model.
308
+
309
+ Args:
310
+ model_name (str): name of the model. The config file should be named config_{model_name}[_{model_version}].json under the models/{model_name} directory.
311
+ model_version (str, optional): Specific config version. If specified config_{model_name}_{model_version}.json is searched for and used. Otherwise config_{model_name}.json is used. Defaults to None.
312
+
313
+ Returns:
314
+ easydict: the config dictionary for the model.
315
+ """
316
+ config_fname = f"config_{model_name}_{model_version}.json" if model_version is not None else f"config_{model_name}.json"
317
+ config_file = os.path.join(ROOT, "models", model_name, config_fname)
318
+ if not os.path.exists(config_file):
319
+ return None
320
+
321
+ with open(config_file, "r") as f:
322
+ config = edict(json.load(f))
323
+
324
+ # handle dictionary inheritance
325
+ # only training config is supported for inheritance
326
+ if "inherit" in config.train and config.train.inherit is not None:
327
+ inherit_config = get_model_config(config.train["inherit"]).train
328
+ for key, value in inherit_config.items():
329
+ if key not in config.train:
330
+ config.train[key] = value
331
+ return edict(config)
332
+
333
+
334
+ def update_model_config(config, mode, model_name, model_version=None, strict=False):
335
+ model_config = get_model_config(model_name, model_version)
336
+ if model_config is not None:
337
+ config = {**config, **
338
+ flatten({**model_config.model, **model_config[mode]})}
339
+ elif strict:
340
+ raise ValueError(f"Config file for model {model_name} not found.")
341
+ return config
342
+
343
+
344
+ def check_choices(name, value, choices):
345
+ # return # No checks in dev branch
346
+ if value not in choices:
347
+ raise ValueError(f"{name} {value} not in supported choices {choices}")
348
+
349
+
350
+ KEYS_TYPE_BOOL = ["use_amp", "distributed", "use_shared_dict", "same_lr", "aug", "three_phase",
351
+ "prefetch", "cycle_momentum"] # Casting is not necessary as their int casted values in config are 0 or 1
352
+
353
+
354
+ def get_config(model_name, mode='train', dataset=None, **overwrite_kwargs):
355
+ """Main entry point to get the config for the model.
356
+
357
+ Args:
358
+ model_name (str): name of the desired model.
359
+ mode (str, optional): "train" or "infer". Defaults to 'train'.
360
+ dataset (str, optional): If specified, the corresponding dataset configuration is loaded as well. Defaults to None.
361
+
362
+ Keyword Args: key-value pairs of arguments to overwrite the default config.
363
+
364
+ The order of precedence for overwriting the config is (Higher precedence first):
365
+ # 1. overwrite_kwargs
366
+ # 2. "config_version": Config file version if specified in overwrite_kwargs. The corresponding config loaded is config_{model_name}_{config_version}.json
367
+ # 3. "version_name": Default Model version specific config specified in overwrite_kwargs. The corresponding config loaded is config_{model_name}_{version_name}.json
368
+ # 4. common_config: Default config for all models specified in COMMON_CONFIG
369
+
370
+ Returns:
371
+ easydict: The config dictionary for the model.
372
+ """
373
+
374
+
375
+ check_choices("Model", model_name, ["zoedepth", "zoedepth_nk"])
376
+ check_choices("Mode", mode, ["train", "infer", "eval"])
377
+ if mode == "train":
378
+ check_choices("Dataset", dataset, ["nyu", "kitti", "mix", None])
379
+
380
+ config = flatten({**COMMON_CONFIG, **COMMON_TRAINING_CONFIG})
381
+ config = update_model_config(config, mode, model_name)
382
+
383
+ # update with model version specific config
384
+ version_name = overwrite_kwargs.get("version_name", config["version_name"])
385
+ config = update_model_config(config, mode, model_name, version_name)
386
+
387
+ # update with config version if specified
388
+ config_version = overwrite_kwargs.get("config_version", None)
389
+ if config_version is not None:
390
+ print("Overwriting config with config_version", config_version)
391
+ config = update_model_config(config, mode, model_name, config_version)
392
+
393
+ # update with overwrite_kwargs
394
+ # Combined args are useful for hyperparameter search
395
+ overwrite_kwargs = split_combined_args(overwrite_kwargs)
396
+ config = {**config, **overwrite_kwargs}
397
+
398
+ # Casting to bool # TODO: Not necessary. Remove and test
399
+ for key in KEYS_TYPE_BOOL:
400
+ if key in config:
401
+ config[key] = bool(config[key])
402
+
403
+ # Model specific post processing of config
404
+ parse_list(config, "n_attractors")
405
+
406
+ # adjust n_bins for each bin configuration if bin_conf is given and n_bins is passed in overwrite_kwargs
407
+ if 'bin_conf' in config and 'n_bins' in overwrite_kwargs:
408
+ bin_conf = config['bin_conf'] # list of dicts
409
+ n_bins = overwrite_kwargs['n_bins']
410
+ new_bin_conf = []
411
+ for conf in bin_conf:
412
+ conf['n_bins'] = n_bins
413
+ new_bin_conf.append(conf)
414
+ config['bin_conf'] = new_bin_conf
415
+
416
+ if mode == "train":
417
+ orig_dataset = dataset
418
+ if dataset == "mix":
419
+ dataset = 'nyu' # Use nyu as default for mix. Dataset config is changed accordingly while loading the dataloader
420
+ if dataset is not None:
421
+ config['project'] = f"MonoDepth3-{orig_dataset}" # Set project for wandb
422
+
423
+ if dataset is not None:
424
+ config['dataset'] = dataset
425
+ config = {**DATASETS_CONFIG[dataset], **config}
426
+
427
+
428
+ config['model'] = model_name
429
+ typed_config = {k: infer_type(v) for k, v in config.items()}
430
+ # add hostname to config
431
+ config['hostname'] = platform.node()
432
+ return edict(typed_config)
433
+
434
+
435
+ def change_dataset(config, new_dataset):
436
+ config.update(DATASETS_CONFIG[new_dataset])
437
+ return config
microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/easydict/__init__.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ EasyDict
3
+ Copy/pasted from https://github.com/makinacorpus/easydict
4
+ Original author: Mathieu Leplatre <mathieu.leplatre@makina-corpus.com>
5
+ """
6
+
7
+ class EasyDict(dict):
8
+ """
9
+ Get attributes
10
+
11
+ >>> d = EasyDict({'foo':3})
12
+ >>> d['foo']
13
+ 3
14
+ >>> d.foo
15
+ 3
16
+ >>> d.bar
17
+ Traceback (most recent call last):
18
+ ...
19
+ AttributeError: 'EasyDict' object has no attribute 'bar'
20
+
21
+ Works recursively
22
+
23
+ >>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}})
24
+ >>> isinstance(d.bar, dict)
25
+ True
26
+ >>> d.bar.x
27
+ 1
28
+
29
+ Bullet-proof
30
+
31
+ >>> EasyDict({})
32
+ {}
33
+ >>> EasyDict(d={})
34
+ {}
35
+ >>> EasyDict(None)
36
+ {}
37
+ >>> d = {'a': 1}
38
+ >>> EasyDict(**d)
39
+ {'a': 1}
40
+ >>> EasyDict((('a', 1), ('b', 2)))
41
+ {'a': 1, 'b': 2}
42
+
43
+ Set attributes
44
+
45
+ >>> d = EasyDict()
46
+ >>> d.foo = 3
47
+ >>> d.foo
48
+ 3
49
+ >>> d.bar = {'prop': 'value'}
50
+ >>> d.bar.prop
51
+ 'value'
52
+ >>> d
53
+ {'foo': 3, 'bar': {'prop': 'value'}}
54
+ >>> d.bar.prop = 'newer'
55
+ >>> d.bar.prop
56
+ 'newer'
57
+
58
+
59
+ Values extraction
60
+
61
+ >>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]})
62
+ >>> isinstance(d.bar, list)
63
+ True
64
+ >>> from operator import attrgetter
65
+ >>> list(map(attrgetter('x'), d.bar))
66
+ [1, 3]
67
+ >>> list(map(attrgetter('y'), d.bar))
68
+ [2, 4]
69
+ >>> d = EasyDict()
70
+ >>> list(d.keys())
71
+ []
72
+ >>> d = EasyDict(foo=3, bar=dict(x=1, y=2))
73
+ >>> d.foo
74
+ 3
75
+ >>> d.bar.x
76
+ 1
77
+
78
+ Still like a dict though
79
+
80
+ >>> o = EasyDict({'clean':True})
81
+ >>> list(o.items())
82
+ [('clean', True)]
83
+
84
+ And like a class
85
+
86
+ >>> class Flower(EasyDict):
87
+ ... power = 1
88
+ ...
89
+ >>> f = Flower()
90
+ >>> f.power
91
+ 1
92
+ >>> f = Flower({'height': 12})
93
+ >>> f.height
94
+ 12
95
+ >>> f['power']
96
+ 1
97
+ >>> sorted(f.keys())
98
+ ['height', 'power']
99
+
100
+ update and pop items
101
+ >>> d = EasyDict(a=1, b='2')
102
+ >>> e = EasyDict(c=3.0, a=9.0)
103
+ >>> d.update(e)
104
+ >>> d.c
105
+ 3.0
106
+ >>> d['c']
107
+ 3.0
108
+ >>> d.get('c')
109
+ 3.0
110
+ >>> d.update(a=4, b=4)
111
+ >>> d.b
112
+ 4
113
+ >>> d.pop('a')
114
+ 4
115
+ >>> d.a
116
+ Traceback (most recent call last):
117
+ ...
118
+ AttributeError: 'EasyDict' object has no attribute 'a'
119
+ """
120
+ def __init__(self, d=None, **kwargs):
121
+ if d is None:
122
+ d = {}
123
+ else:
124
+ d = dict(d)
125
+ if kwargs:
126
+ d.update(**kwargs)
127
+ for k, v in d.items():
128
+ setattr(self, k, v)
129
+ # Class attributes
130
+ for k in self.__class__.__dict__.keys():
131
+ if not (k.startswith('__') and k.endswith('__')) and not k in ('update', 'pop'):
132
+ setattr(self, k, getattr(self, k))
133
+
134
+ def __setattr__(self, name, value):
135
+ if isinstance(value, (list, tuple)):
136
+ value = [self.__class__(x)
137
+ if isinstance(x, dict) else x for x in value]
138
+ elif isinstance(value, dict) and not isinstance(value, self.__class__):
139
+ value = self.__class__(value)
140
+ super(EasyDict, self).__setattr__(name, value)
141
+ super(EasyDict, self).__setitem__(name, value)
142
+
143
+ __setitem__ = __setattr__
144
+
145
+ def update(self, e=None, **f):
146
+ d = e or dict()
147
+ d.update(f)
148
+ for k in d:
149
+ setattr(self, k, d[k])
150
+
151
+ def pop(self, k, d=None):
152
+ delattr(self, k)
153
+ return super(EasyDict, self).pop(k, d)
154
+
155
+
156
+ if __name__ == "__main__":
157
+ import doctest
158
+ doctest.testmod()
microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/geometry.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ import numpy as np
26
+
27
+ def get_intrinsics(H,W):
28
+ """
29
+ Intrinsics for a pinhole camera model.
30
+ Assume fov of 55 degrees and central principal point.
31
+ """
32
+ f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0)
33
+ cx = 0.5 * W
34
+ cy = 0.5 * H
35
+ return np.array([[f, 0, cx],
36
+ [0, f, cy],
37
+ [0, 0, 1]])
38
+
39
+ def depth_to_points(depth, R=None, t=None):
40
+
41
+ K = get_intrinsics(depth.shape[1], depth.shape[2])
42
+ Kinv = np.linalg.inv(K)
43
+ if R is None:
44
+ R = np.eye(3)
45
+ if t is None:
46
+ t = np.zeros(3)
47
+
48
+ # M converts from your coordinate to PyTorch3D's coordinate system
49
+ M = np.eye(3)
50
+ M[0, 0] = -1.0
51
+ M[1, 1] = -1.0
52
+
53
+ height, width = depth.shape[1:3]
54
+
55
+ x = np.arange(width)
56
+ y = np.arange(height)
57
+ coord = np.stack(np.meshgrid(x, y), -1)
58
+ coord = np.concatenate((coord, np.ones_like(coord)[:, :, [0]]), -1) # z=1
59
+ coord = coord.astype(np.float32)
60
+ # coord = torch.as_tensor(coord, dtype=torch.float32, device=device)
61
+ coord = coord[None] # bs, h, w, 3
62
+
63
+ D = depth[:, :, :, None, None]
64
+ # print(D.shape, Kinv[None, None, None, ...].shape, coord[:, :, :, :, None].shape )
65
+ pts3D_1 = D * Kinv[None, None, None, ...] @ coord[:, :, :, :, None]
66
+ # pts3D_1 live in your coordinate system. Convert them to Py3D's
67
+ pts3D_1 = M[None, None, None, ...] @ pts3D_1
68
+ # from reference to targe tviewpoint
69
+ pts3D_2 = R[None, None, None, ...] @ pts3D_1 + t[None, None, None, :, None]
70
+ # pts3D_2 = pts3D_1
71
+ # depth_2 = pts3D_2[:, :, :, 2, :] # b,1,h,w
72
+ return pts3D_2[:, :, :, :3, 0][0]
73
+
74
+
75
+ def create_triangles(h, w, mask=None):
76
+ """
77
+ Reference: https://github.com/google-research/google-research/blob/e96197de06613f1b027d20328e06d69829fa5a89/infinite_nature/render_utils.py#L68
78
+ Creates mesh triangle indices from a given pixel grid size.
79
+ This function is not and need not be differentiable as triangle indices are
80
+ fixed.
81
+ Args:
82
+ h: (int) denoting the height of the image.
83
+ w: (int) denoting the width of the image.
84
+ Returns:
85
+ triangles: 2D numpy array of indices (int) with shape (2(W-1)(H-1) x 3)
86
+ """
87
+ x, y = np.meshgrid(range(w - 1), range(h - 1))
88
+ tl = y * w + x
89
+ tr = y * w + x + 1
90
+ bl = (y + 1) * w + x
91
+ br = (y + 1) * w + x + 1
92
+ triangles = np.array([tl, bl, tr, br, tr, bl])
93
+ triangles = np.transpose(triangles, (1, 2, 0)).reshape(
94
+ ((w - 1) * (h - 1) * 2, 3))
95
+ if mask is not None:
96
+ mask = mask.reshape(-1)
97
+ triangles = triangles[mask[triangles].all(1)]
98
+ return triangles
microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/misc.py ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Intelligent Systems Lab Org
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ # File author: Shariq Farooq Bhat
24
+
25
+ """Miscellaneous utility functions."""
26
+
27
+ from scipy import ndimage
28
+
29
+ import base64
30
+ import math
31
+ import re
32
+ from io import BytesIO
33
+
34
+ import matplotlib
35
+ import matplotlib.cm
36
+ import numpy as np
37
+ import requests
38
+ import torch
39
+ import torch.distributed as dist
40
+ import torch.nn
41
+ import torch.nn as nn
42
+ import torch.utils.data.distributed
43
+ from PIL import Image
44
+ from torchvision.transforms import ToTensor
45
+
46
+
47
+ class RunningAverage:
48
+ def __init__(self):
49
+ self.avg = 0
50
+ self.count = 0
51
+
52
+ def append(self, value):
53
+ self.avg = (value + self.count * self.avg) / (self.count + 1)
54
+ self.count += 1
55
+
56
+ def get_value(self):
57
+ return self.avg
58
+
59
+
60
+ def denormalize(x):
61
+ """Reverses the imagenet normalization applied to the input.
62
+
63
+ Args:
64
+ x (torch.Tensor - shape(N,3,H,W)): input tensor
65
+
66
+ Returns:
67
+ torch.Tensor - shape(N,3,H,W): Denormalized input
68
+ """
69
+ mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device)
70
+ std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device)
71
+ return x * std + mean
72
+
73
+
74
+ class RunningAverageDict:
75
+ """A dictionary of running averages."""
76
+ def __init__(self):
77
+ self._dict = None
78
+
79
+ def update(self, new_dict):
80
+ if new_dict is None:
81
+ return
82
+
83
+ if self._dict is None:
84
+ self._dict = dict()
85
+ for key, value in new_dict.items():
86
+ self._dict[key] = RunningAverage()
87
+
88
+ for key, value in new_dict.items():
89
+ self._dict[key].append(value)
90
+
91
+ def get_value(self):
92
+ if self._dict is None:
93
+ return None
94
+ return {key: value.get_value() for key, value in self._dict.items()}
95
+
96
+
97
+ def colorize(value, vmin=None, vmax=None, cmap='gray_r', invalid_val=-99, invalid_mask=None, background_color=(128, 128, 128, 255), gamma_corrected=False, value_transform=None):
98
+ """Converts a depth map to a color image.
99
+
100
+ Args:
101
+ value (torch.Tensor, numpy.ndarry): Input depth map. Shape: (H, W) or (1, H, W) or (1, 1, H, W). All singular dimensions are squeezed
102
+ vmin (float, optional): vmin-valued entries are mapped to start color of cmap. If None, value.min() is used. Defaults to None.
103
+ vmax (float, optional): vmax-valued entries are mapped to end color of cmap. If None, value.max() is used. Defaults to None.
104
+ cmap (str, optional): matplotlib colormap to use. Defaults to 'magma_r'.
105
+ invalid_val (int, optional): Specifies value of invalid pixels that should be colored as 'background_color'. Defaults to -99.
106
+ invalid_mask (numpy.ndarray, optional): Boolean mask for invalid regions. Defaults to None.
107
+ background_color (tuple[int], optional): 4-tuple RGB color to give to invalid pixels. Defaults to (128, 128, 128, 255).
108
+ gamma_corrected (bool, optional): Apply gamma correction to colored image. Defaults to False.
109
+ value_transform (Callable, optional): Apply transform function to valid pixels before coloring. Defaults to None.
110
+
111
+ Returns:
112
+ numpy.ndarray, dtype - uint8: Colored depth map. Shape: (H, W, 4)
113
+ """
114
+ if isinstance(value, torch.Tensor):
115
+ value = value.detach().cpu().numpy()
116
+
117
+ value = value.squeeze()
118
+ if invalid_mask is None:
119
+ invalid_mask = value == invalid_val
120
+ mask = np.logical_not(invalid_mask)
121
+
122
+ # normalize
123
+ vmin = np.percentile(value[mask],2) if vmin is None else vmin
124
+ vmax = np.percentile(value[mask],85) if vmax is None else vmax
125
+ if vmin != vmax:
126
+ value = (value - vmin) / (vmax - vmin) # vmin..vmax
127
+ else:
128
+ # Avoid 0-division
129
+ value = value * 0.
130
+
131
+ # squeeze last dim if it exists
132
+ # grey out the invalid values
133
+
134
+ value[invalid_mask] = np.nan
135
+ cmapper = matplotlib.cm.get_cmap(cmap)
136
+ if value_transform:
137
+ value = value_transform(value)
138
+ # value = value / value.max()
139
+ value = cmapper(value, bytes=True) # (nxmx4)
140
+
141
+ # img = value[:, :, :]
142
+ img = value[...]
143
+ img[invalid_mask] = background_color
144
+
145
+ # return img.transpose((2, 0, 1))
146
+ if gamma_corrected:
147
+ # gamma correction
148
+ img = img / 255
149
+ img = np.power(img, 2.2)
150
+ img = img * 255
151
+ img = img.astype(np.uint8)
152
+ return img
153
+
154
+
155
+ def count_parameters(model, include_all=False):
156
+ return sum(p.numel() for p in model.parameters() if p.requires_grad or include_all)
157
+
158
+
159
+ def compute_errors(gt, pred):
160
+ """Compute metrics for 'pred' compared to 'gt'
161
+
162
+ Args:
163
+ gt (numpy.ndarray): Ground truth values
164
+ pred (numpy.ndarray): Predicted values
165
+
166
+ gt.shape should be equal to pred.shape
167
+
168
+ Returns:
169
+ dict: Dictionary containing the following metrics:
170
+ 'a1': Delta1 accuracy: Fraction of pixels that are within a scale factor of 1.25
171
+ 'a2': Delta2 accuracy: Fraction of pixels that are within a scale factor of 1.25^2
172
+ 'a3': Delta3 accuracy: Fraction of pixels that are within a scale factor of 1.25^3
173
+ 'abs_rel': Absolute relative error
174
+ 'rmse': Root mean squared error
175
+ 'log_10': Absolute log10 error
176
+ 'sq_rel': Squared relative error
177
+ 'rmse_log': Root mean squared error on the log scale
178
+ 'silog': Scale invariant log error
179
+ """
180
+ thresh = np.maximum((gt / pred), (pred / gt))
181
+ a1 = (thresh < 1.25).mean()
182
+ a2 = (thresh < 1.25 ** 2).mean()
183
+ a3 = (thresh < 1.25 ** 3).mean()
184
+
185
+ abs_rel = np.mean(np.abs(gt - pred) / gt)
186
+ sq_rel = np.mean(((gt - pred) ** 2) / gt)
187
+
188
+ rmse = (gt - pred) ** 2
189
+ rmse = np.sqrt(rmse.mean())
190
+
191
+ rmse_log = (np.log(gt) - np.log(pred)) ** 2
192
+ rmse_log = np.sqrt(rmse_log.mean())
193
+
194
+ err = np.log(pred) - np.log(gt)
195
+ silog = np.sqrt(np.mean(err ** 2) - np.mean(err) ** 2) * 100
196
+
197
+ log_10 = (np.abs(np.log10(gt) - np.log10(pred))).mean()
198
+ return dict(a1=a1, a2=a2, a3=a3, abs_rel=abs_rel, rmse=rmse, log_10=log_10, rmse_log=rmse_log,
199
+ silog=silog, sq_rel=sq_rel)
200
+
201
+
202
+ def compute_metrics(gt, pred, interpolate=True, garg_crop=False, eigen_crop=True, dataset='nyu', min_depth_eval=0.1, max_depth_eval=10, **kwargs):
203
+ """Compute metrics of predicted depth maps. Applies cropping and masking as necessary or specified via arguments. Refer to compute_errors for more details on metrics.
204
+ """
205
+ if 'config' in kwargs:
206
+ config = kwargs['config']
207
+ garg_crop = config.garg_crop
208
+ eigen_crop = config.eigen_crop
209
+ min_depth_eval = config.min_depth_eval
210
+ max_depth_eval = config.max_depth_eval
211
+
212
+ if gt.shape[-2:] != pred.shape[-2:] and interpolate:
213
+ pred = nn.functional.interpolate(
214
+ pred, gt.shape[-2:], mode='bilinear', align_corners=True)
215
+
216
+ pred = pred.squeeze().cpu().numpy()
217
+ pred[pred < min_depth_eval] = min_depth_eval
218
+ pred[pred > max_depth_eval] = max_depth_eval
219
+ pred[np.isinf(pred)] = max_depth_eval
220
+ pred[np.isnan(pred)] = min_depth_eval
221
+
222
+ gt_depth = gt.squeeze().cpu().numpy()
223
+ valid_mask = np.logical_and(
224
+ gt_depth > min_depth_eval, gt_depth < max_depth_eval)
225
+
226
+ if garg_crop or eigen_crop:
227
+ gt_height, gt_width = gt_depth.shape
228
+ eval_mask = np.zeros(valid_mask.shape)
229
+
230
+ if garg_crop:
231
+ eval_mask[int(0.40810811 * gt_height):int(0.99189189 * gt_height),
232
+ int(0.03594771 * gt_width):int(0.96405229 * gt_width)] = 1
233
+
234
+ elif eigen_crop:
235
+ # print("-"*10, " EIGEN CROP ", "-"*10)
236
+ if dataset == 'kitti':
237
+ eval_mask[int(0.3324324 * gt_height):int(0.91351351 * gt_height),
238
+ int(0.0359477 * gt_width):int(0.96405229 * gt_width)] = 1
239
+ else:
240
+ # assert gt_depth.shape == (480, 640), "Error: Eigen crop is currently only valid for (480, 640) images"
241
+ eval_mask[45:471, 41:601] = 1
242
+ else:
243
+ eval_mask = np.ones(valid_mask.shape)
244
+ valid_mask = np.logical_and(valid_mask, eval_mask)
245
+ return compute_errors(gt_depth[valid_mask], pred[valid_mask])
246
+
247
+
248
+ #################################### Model uilts ################################################
249
+
250
+
251
+ def parallelize(config, model, find_unused_parameters=True):
252
+
253
+ if config.gpu is not None:
254
+ torch.cuda.set_device(config.gpu)
255
+ model = model.cuda(config.gpu)
256
+
257
+ config.multigpu = False
258
+ if config.distributed:
259
+ # Use DDP
260
+ config.multigpu = True
261
+ config.rank = config.rank * config.ngpus_per_node + config.gpu
262
+ dist.init_process_group(backend=config.dist_backend, init_method=config.dist_url,
263
+ world_size=config.world_size, rank=config.rank)
264
+ config.batch_size = int(config.batch_size / config.ngpus_per_node)
265
+ # config.batch_size = 8
266
+ config.workers = int(
267
+ (config.num_workers + config.ngpus_per_node - 1) / config.ngpus_per_node)
268
+ print("Device", config.gpu, "Rank", config.rank, "batch size",
269
+ config.batch_size, "Workers", config.workers)
270
+ torch.cuda.set_device(config.gpu)
271
+ model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
272
+ model = model.cuda(config.gpu)
273
+ model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.gpu], output_device=config.gpu,
274
+ find_unused_parameters=find_unused_parameters)
275
+
276
+ elif config.gpu is None:
277
+ # Use DP
278
+ config.multigpu = True
279
+ model = model.cuda()
280
+ model = torch.nn.DataParallel(model)
281
+
282
+ return model
283
+
284
+
285
+ #################################################################################################
286
+
287
+
288
+ #####################################################################################################
289
+
290
+
291
+ class colors:
292
+ '''Colors class:
293
+ Reset all colors with colors.reset
294
+ Two subclasses fg for foreground and bg for background.
295
+ Use as colors.subclass.colorname.
296
+ i.e. colors.fg.red or colors.bg.green
297
+ Also, the generic bold, disable, underline, reverse, strikethrough,
298
+ and invisible work with the main class
299
+ i.e. colors.bold
300
+ '''
301
+ reset = '\033[0m'
302
+ bold = '\033[01m'
303
+ disable = '\033[02m'
304
+ underline = '\033[04m'
305
+ reverse = '\033[07m'
306
+ strikethrough = '\033[09m'
307
+ invisible = '\033[08m'
308
+
309
+ class fg:
310
+ black = '\033[30m'
311
+ red = '\033[31m'
312
+ green = '\033[32m'
313
+ orange = '\033[33m'
314
+ blue = '\033[34m'
315
+ purple = '\033[35m'
316
+ cyan = '\033[36m'
317
+ lightgrey = '\033[37m'
318
+ darkgrey = '\033[90m'
319
+ lightred = '\033[91m'
320
+ lightgreen = '\033[92m'
321
+ yellow = '\033[93m'
322
+ lightblue = '\033[94m'
323
+ pink = '\033[95m'
324
+ lightcyan = '\033[96m'
325
+
326
+ class bg:
327
+ black = '\033[40m'
328
+ red = '\033[41m'
329
+ green = '\033[42m'
330
+ orange = '\033[43m'
331
+ blue = '\033[44m'
332
+ purple = '\033[45m'
333
+ cyan = '\033[46m'
334
+ lightgrey = '\033[47m'
335
+
336
+
337
+ def printc(text, color):
338
+ print(f"{color}{text}{colors.reset}")
339
+
340
+ ############################################
341
+
342
+ def get_image_from_url(url):
343
+ response = requests.get(url)
344
+ img = Image.open(BytesIO(response.content)).convert("RGB")
345
+ return img
346
+
347
+ def url_to_torch(url, size=(384, 384)):
348
+ img = get_image_from_url(url)
349
+ img = img.resize(size, Image.ANTIALIAS)
350
+ img = torch.from_numpy(np.asarray(img)).float()
351
+ img = img.permute(2, 0, 1)
352
+ img.div_(255)
353
+ return img
354
+
355
+ def pil_to_batched_tensor(img):
356
+ return ToTensor()(img).unsqueeze(0)
357
+
358
+ def save_raw_16bit(depth, fpath="raw.png"):
359
+ if isinstance(depth, torch.Tensor):
360
+ depth = depth.squeeze().cpu().numpy()
361
+
362
+ assert isinstance(depth, np.ndarray), "Depth must be a torch tensor or numpy array"
363
+ assert depth.ndim == 2, "Depth must be 2D"
364
+ depth = depth * 256 # scale for 16-bit png
365
+ depth = depth.astype(np.uint16)
366
+ depth = Image.fromarray(depth)
367
+ depth.save(fpath)
368
+ print("Saved raw depth to", fpath)