9e9d39220f9e6e307aecc00a29e8e24c648b930f8fc232b426bb2a4e5b4ffe21
Browse files- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/transforms.py +234 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/output/.placeholder +0 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/LICENSE +21 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/README.md +131 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/do_catkin_make.sh +5 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/downloads.sh +5 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_melodic_ubuntu_17_18.sh +34 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_noetic_ubuntu_20.sh +33 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/make_package_cpp.sh +16 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/launch_midas_cpp.sh +2 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/CMakeLists.txt +189 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_cpp.launch +19 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_talker_listener.launch +23 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/package.xml +77 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener.py +61 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener_original.py +61 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/talker.py +53 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/src/main.cpp +285 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/run_talker_listener_test.sh +16 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/run.py +277 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/README.md +147 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/input/.placeholder +0 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/make_onnx_model.py +112 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/output/.placeholder +0 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_onnx.py +119 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_pb.py +135 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/transforms.py +234 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/utils.py +82 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/utils.py +199 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/weights/.placeholder +0 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/builder.py +51 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/depth_model.py +152 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/attractor.py +208 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/dist_layers.py +121 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/localbins_layers.py +169 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/patch_transformer.py +91 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/model_io.py +92 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/__init__.py +31 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth.json +58 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json +22 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/zoedepth_v1.py +250 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth_nk/__init__.py +31 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json +67 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth_nk/zoedepth_nk_v1.py +333 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/__init__.py +24 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/arg_utils.py +33 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/config.py +437 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/easydict/__init__.py +158 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/geometry.py +98 -0
- microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/misc.py +368 -0
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/transforms.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import cv2
|
| 3 |
+
import math
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
|
| 7 |
+
"""Rezise the sample to ensure the given size. Keeps aspect ratio.
|
| 8 |
+
|
| 9 |
+
Args:
|
| 10 |
+
sample (dict): sample
|
| 11 |
+
size (tuple): image size
|
| 12 |
+
|
| 13 |
+
Returns:
|
| 14 |
+
tuple: new size
|
| 15 |
+
"""
|
| 16 |
+
shape = list(sample["disparity"].shape)
|
| 17 |
+
|
| 18 |
+
if shape[0] >= size[0] and shape[1] >= size[1]:
|
| 19 |
+
return sample
|
| 20 |
+
|
| 21 |
+
scale = [0, 0]
|
| 22 |
+
scale[0] = size[0] / shape[0]
|
| 23 |
+
scale[1] = size[1] / shape[1]
|
| 24 |
+
|
| 25 |
+
scale = max(scale)
|
| 26 |
+
|
| 27 |
+
shape[0] = math.ceil(scale * shape[0])
|
| 28 |
+
shape[1] = math.ceil(scale * shape[1])
|
| 29 |
+
|
| 30 |
+
# resize
|
| 31 |
+
sample["image"] = cv2.resize(
|
| 32 |
+
sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
sample["disparity"] = cv2.resize(
|
| 36 |
+
sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST
|
| 37 |
+
)
|
| 38 |
+
sample["mask"] = cv2.resize(
|
| 39 |
+
sample["mask"].astype(np.float32),
|
| 40 |
+
tuple(shape[::-1]),
|
| 41 |
+
interpolation=cv2.INTER_NEAREST,
|
| 42 |
+
)
|
| 43 |
+
sample["mask"] = sample["mask"].astype(bool)
|
| 44 |
+
|
| 45 |
+
return tuple(shape)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class Resize(object):
|
| 49 |
+
"""Resize sample to given size (width, height).
|
| 50 |
+
"""
|
| 51 |
+
|
| 52 |
+
def __init__(
|
| 53 |
+
self,
|
| 54 |
+
width,
|
| 55 |
+
height,
|
| 56 |
+
resize_target=True,
|
| 57 |
+
keep_aspect_ratio=False,
|
| 58 |
+
ensure_multiple_of=1,
|
| 59 |
+
resize_method="lower_bound",
|
| 60 |
+
image_interpolation_method=cv2.INTER_AREA,
|
| 61 |
+
):
|
| 62 |
+
"""Init.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
width (int): desired output width
|
| 66 |
+
height (int): desired output height
|
| 67 |
+
resize_target (bool, optional):
|
| 68 |
+
True: Resize the full sample (image, mask, target).
|
| 69 |
+
False: Resize image only.
|
| 70 |
+
Defaults to True.
|
| 71 |
+
keep_aspect_ratio (bool, optional):
|
| 72 |
+
True: Keep the aspect ratio of the input sample.
|
| 73 |
+
Output sample might not have the given width and height, and
|
| 74 |
+
resize behaviour depends on the parameter 'resize_method'.
|
| 75 |
+
Defaults to False.
|
| 76 |
+
ensure_multiple_of (int, optional):
|
| 77 |
+
Output width and height is constrained to be multiple of this parameter.
|
| 78 |
+
Defaults to 1.
|
| 79 |
+
resize_method (str, optional):
|
| 80 |
+
"lower_bound": Output will be at least as large as the given size.
|
| 81 |
+
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
|
| 82 |
+
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
|
| 83 |
+
Defaults to "lower_bound".
|
| 84 |
+
"""
|
| 85 |
+
self.__width = width
|
| 86 |
+
self.__height = height
|
| 87 |
+
|
| 88 |
+
self.__resize_target = resize_target
|
| 89 |
+
self.__keep_aspect_ratio = keep_aspect_ratio
|
| 90 |
+
self.__multiple_of = ensure_multiple_of
|
| 91 |
+
self.__resize_method = resize_method
|
| 92 |
+
self.__image_interpolation_method = image_interpolation_method
|
| 93 |
+
|
| 94 |
+
def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
|
| 95 |
+
y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
| 96 |
+
|
| 97 |
+
if max_val is not None and y > max_val:
|
| 98 |
+
y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
| 99 |
+
|
| 100 |
+
if y < min_val:
|
| 101 |
+
y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
| 102 |
+
|
| 103 |
+
return y
|
| 104 |
+
|
| 105 |
+
def get_size(self, width, height):
|
| 106 |
+
# determine new height and width
|
| 107 |
+
scale_height = self.__height / height
|
| 108 |
+
scale_width = self.__width / width
|
| 109 |
+
|
| 110 |
+
if self.__keep_aspect_ratio:
|
| 111 |
+
if self.__resize_method == "lower_bound":
|
| 112 |
+
# scale such that output size is lower bound
|
| 113 |
+
if scale_width > scale_height:
|
| 114 |
+
# fit width
|
| 115 |
+
scale_height = scale_width
|
| 116 |
+
else:
|
| 117 |
+
# fit height
|
| 118 |
+
scale_width = scale_height
|
| 119 |
+
elif self.__resize_method == "upper_bound":
|
| 120 |
+
# scale such that output size is upper bound
|
| 121 |
+
if scale_width < scale_height:
|
| 122 |
+
# fit width
|
| 123 |
+
scale_height = scale_width
|
| 124 |
+
else:
|
| 125 |
+
# fit height
|
| 126 |
+
scale_width = scale_height
|
| 127 |
+
elif self.__resize_method == "minimal":
|
| 128 |
+
# scale as least as possbile
|
| 129 |
+
if abs(1 - scale_width) < abs(1 - scale_height):
|
| 130 |
+
# fit width
|
| 131 |
+
scale_height = scale_width
|
| 132 |
+
else:
|
| 133 |
+
# fit height
|
| 134 |
+
scale_width = scale_height
|
| 135 |
+
else:
|
| 136 |
+
raise ValueError(
|
| 137 |
+
f"resize_method {self.__resize_method} not implemented"
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
if self.__resize_method == "lower_bound":
|
| 141 |
+
new_height = self.constrain_to_multiple_of(
|
| 142 |
+
scale_height * height, min_val=self.__height
|
| 143 |
+
)
|
| 144 |
+
new_width = self.constrain_to_multiple_of(
|
| 145 |
+
scale_width * width, min_val=self.__width
|
| 146 |
+
)
|
| 147 |
+
elif self.__resize_method == "upper_bound":
|
| 148 |
+
new_height = self.constrain_to_multiple_of(
|
| 149 |
+
scale_height * height, max_val=self.__height
|
| 150 |
+
)
|
| 151 |
+
new_width = self.constrain_to_multiple_of(
|
| 152 |
+
scale_width * width, max_val=self.__width
|
| 153 |
+
)
|
| 154 |
+
elif self.__resize_method == "minimal":
|
| 155 |
+
new_height = self.constrain_to_multiple_of(scale_height * height)
|
| 156 |
+
new_width = self.constrain_to_multiple_of(scale_width * width)
|
| 157 |
+
else:
|
| 158 |
+
raise ValueError(f"resize_method {self.__resize_method} not implemented")
|
| 159 |
+
|
| 160 |
+
return (new_width, new_height)
|
| 161 |
+
|
| 162 |
+
def __call__(self, sample):
|
| 163 |
+
width, height = self.get_size(
|
| 164 |
+
sample["image"].shape[1], sample["image"].shape[0]
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
# resize sample
|
| 168 |
+
sample["image"] = cv2.resize(
|
| 169 |
+
sample["image"],
|
| 170 |
+
(width, height),
|
| 171 |
+
interpolation=self.__image_interpolation_method,
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
if self.__resize_target:
|
| 175 |
+
if "disparity" in sample:
|
| 176 |
+
sample["disparity"] = cv2.resize(
|
| 177 |
+
sample["disparity"],
|
| 178 |
+
(width, height),
|
| 179 |
+
interpolation=cv2.INTER_NEAREST,
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
if "depth" in sample:
|
| 183 |
+
sample["depth"] = cv2.resize(
|
| 184 |
+
sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
sample["mask"] = cv2.resize(
|
| 188 |
+
sample["mask"].astype(np.float32),
|
| 189 |
+
(width, height),
|
| 190 |
+
interpolation=cv2.INTER_NEAREST,
|
| 191 |
+
)
|
| 192 |
+
sample["mask"] = sample["mask"].astype(bool)
|
| 193 |
+
|
| 194 |
+
return sample
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
class NormalizeImage(object):
|
| 198 |
+
"""Normlize image by given mean and std.
|
| 199 |
+
"""
|
| 200 |
+
|
| 201 |
+
def __init__(self, mean, std):
|
| 202 |
+
self.__mean = mean
|
| 203 |
+
self.__std = std
|
| 204 |
+
|
| 205 |
+
def __call__(self, sample):
|
| 206 |
+
sample["image"] = (sample["image"] - self.__mean) / self.__std
|
| 207 |
+
|
| 208 |
+
return sample
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
class PrepareForNet(object):
|
| 212 |
+
"""Prepare sample for usage as network input.
|
| 213 |
+
"""
|
| 214 |
+
|
| 215 |
+
def __init__(self):
|
| 216 |
+
pass
|
| 217 |
+
|
| 218 |
+
def __call__(self, sample):
|
| 219 |
+
image = np.transpose(sample["image"], (2, 0, 1))
|
| 220 |
+
sample["image"] = np.ascontiguousarray(image).astype(np.float32)
|
| 221 |
+
|
| 222 |
+
if "mask" in sample:
|
| 223 |
+
sample["mask"] = sample["mask"].astype(np.float32)
|
| 224 |
+
sample["mask"] = np.ascontiguousarray(sample["mask"])
|
| 225 |
+
|
| 226 |
+
if "disparity" in sample:
|
| 227 |
+
disparity = sample["disparity"].astype(np.float32)
|
| 228 |
+
sample["disparity"] = np.ascontiguousarray(disparity)
|
| 229 |
+
|
| 230 |
+
if "depth" in sample:
|
| 231 |
+
depth = sample["depth"].astype(np.float32)
|
| 232 |
+
sample["depth"] = np.ascontiguousarray(depth)
|
| 233 |
+
|
| 234 |
+
return sample
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/output/.placeholder
ADDED
|
File without changes
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2020 Alexey
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/README.md
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MiDaS for ROS1 by using LibTorch in C++
|
| 2 |
+
|
| 3 |
+
### Requirements
|
| 4 |
+
|
| 5 |
+
- Ubuntu 17.10 / 18.04 / 20.04, Debian Stretch
|
| 6 |
+
- ROS Melodic for Ubuntu (17.10 / 18.04) / Debian Stretch, ROS Noetic for Ubuntu 20.04
|
| 7 |
+
- C++11
|
| 8 |
+
- LibTorch >= 1.6
|
| 9 |
+
|
| 10 |
+
## Quick Start with a MiDaS Example
|
| 11 |
+
|
| 12 |
+
MiDaS is a neural network to compute depth from a single image.
|
| 13 |
+
|
| 14 |
+
* input from `image_topic`: `sensor_msgs/Image` - `RGB8` image with any shape
|
| 15 |
+
* output to `midas_topic`: `sensor_msgs/Image` - `TYPE_32FC1` inverse relative depth maps in range [0 - 255] with original size and channels=1
|
| 16 |
+
|
| 17 |
+
### Install Dependecies
|
| 18 |
+
|
| 19 |
+
* install ROS Melodic for Ubuntu 17.10 / 18.04:
|
| 20 |
+
```bash
|
| 21 |
+
wget https://raw.githubusercontent.com/isl-org/MiDaS/master/ros/additions/install_ros_melodic_ubuntu_17_18.sh
|
| 22 |
+
./install_ros_melodic_ubuntu_17_18.sh
|
| 23 |
+
```
|
| 24 |
+
|
| 25 |
+
or Noetic for Ubuntu 20.04:
|
| 26 |
+
|
| 27 |
+
```bash
|
| 28 |
+
wget https://raw.githubusercontent.com/isl-org/MiDaS/master/ros/additions/install_ros_noetic_ubuntu_20.sh
|
| 29 |
+
./install_ros_noetic_ubuntu_20.sh
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
* install LibTorch 1.7 with CUDA 11.0:
|
| 34 |
+
|
| 35 |
+
On **Jetson (ARM)**:
|
| 36 |
+
```bash
|
| 37 |
+
wget https://nvidia.box.com/shared/static/wa34qwrwtk9njtyarwt5nvo6imenfy26.whl -O torch-1.7.0-cp36-cp36m-linux_aarch64.whl
|
| 38 |
+
sudo apt-get install python3-pip libopenblas-base libopenmpi-dev
|
| 39 |
+
pip3 install Cython
|
| 40 |
+
pip3 install numpy torch-1.7.0-cp36-cp36m-linux_aarch64.whl
|
| 41 |
+
```
|
| 42 |
+
Or compile LibTorch from source: https://github.com/pytorch/pytorch#from-source
|
| 43 |
+
|
| 44 |
+
On **Linux (x86_64)**:
|
| 45 |
+
```bash
|
| 46 |
+
cd ~/
|
| 47 |
+
wget https://download.pytorch.org/libtorch/cu110/libtorch-cxx11-abi-shared-with-deps-1.7.0%2Bcu110.zip
|
| 48 |
+
unzip libtorch-cxx11-abi-shared-with-deps-1.7.0+cu110.zip
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
* create symlink for OpenCV:
|
| 52 |
+
|
| 53 |
+
```bash
|
| 54 |
+
sudo ln -s /usr/include/opencv4 /usr/include/opencv
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
* download and install MiDaS:
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
source ~/.bashrc
|
| 61 |
+
cd ~/
|
| 62 |
+
mkdir catkin_ws
|
| 63 |
+
cd catkin_ws
|
| 64 |
+
git clone https://github.com/isl-org/MiDaS
|
| 65 |
+
mkdir src
|
| 66 |
+
cp -r MiDaS/ros/* src
|
| 67 |
+
|
| 68 |
+
chmod +x src/additions/*.sh
|
| 69 |
+
chmod +x src/*.sh
|
| 70 |
+
chmod +x src/midas_cpp/scripts/*.py
|
| 71 |
+
cp src/additions/do_catkin_make.sh ./do_catkin_make.sh
|
| 72 |
+
./do_catkin_make.sh
|
| 73 |
+
./src/additions/downloads.sh
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
### Usage
|
| 77 |
+
|
| 78 |
+
* run only `midas` node: `~/catkin_ws/src/launch_midas_cpp.sh`
|
| 79 |
+
|
| 80 |
+
#### Test
|
| 81 |
+
|
| 82 |
+
* Test - capture video and show result in the window:
|
| 83 |
+
* place any `test.mp4` video file to the directory `~/catkin_ws/src/`
|
| 84 |
+
* run `midas` node: `~/catkin_ws/src/launch_midas_cpp.sh`
|
| 85 |
+
* run test nodes in another terminal: `cd ~/catkin_ws/src && ./run_talker_listener_test.sh` and wait 30 seconds
|
| 86 |
+
|
| 87 |
+
(to use Python 2, run command `sed -i 's/python3/python2/' ~/catkin_ws/src/midas_cpp/scripts/*.py` )
|
| 88 |
+
|
| 89 |
+
## Mobile version of MiDaS - Monocular Depth Estimation
|
| 90 |
+
|
| 91 |
+
### Accuracy
|
| 92 |
+
|
| 93 |
+
* MiDaS v2 small - ResNet50 default-decoder 384x384
|
| 94 |
+
* MiDaS v2.1 small - EfficientNet-Lite3 small-decoder 256x256
|
| 95 |
+
|
| 96 |
+
**Zero-shot error** (the lower - the better):
|
| 97 |
+
|
| 98 |
+
| Model | DIW WHDR | Eth3d AbsRel | Sintel AbsRel | Kitti δ>1.25 | NyuDepthV2 δ>1.25 | TUM δ>1.25 |
|
| 99 |
+
|---|---|---|---|---|---|---|
|
| 100 |
+
| MiDaS v2 small 384x384 | **0.1248** | 0.1550 | **0.3300** | **21.81** | 15.73 | 17.00 |
|
| 101 |
+
| MiDaS v2.1 small 256x256 | 0.1344 | **0.1344** | 0.3370 | 29.27 | **13.43** | **14.53** |
|
| 102 |
+
| Relative improvement, % | -8 % | **+13 %** | -2 % | -34 % | **+15 %** | **+15 %** |
|
| 103 |
+
|
| 104 |
+
None of Train/Valid/Test subsets of datasets (DIW, Eth3d, Sintel, Kitti, NyuDepthV2, TUM) were not involved in Training or Fine Tuning.
|
| 105 |
+
|
| 106 |
+
### Inference speed (FPS) on nVidia GPU
|
| 107 |
+
|
| 108 |
+
Inference speed excluding pre and post processing, batch=1, **Frames Per Second** (the higher - the better):
|
| 109 |
+
|
| 110 |
+
| Model | Jetson Nano, FPS | RTX 2080Ti, FPS |
|
| 111 |
+
|---|---|---|
|
| 112 |
+
| MiDaS v2 small 384x384 | 1.6 | 117 |
|
| 113 |
+
| MiDaS v2.1 small 256x256 | 8.1 | 232 |
|
| 114 |
+
| SpeedUp, X times | **5x** | **2x** |
|
| 115 |
+
|
| 116 |
+
### Citation
|
| 117 |
+
|
| 118 |
+
This repository contains code to compute depth from a single image. It accompanies our [paper](https://arxiv.org/abs/1907.01341v3):
|
| 119 |
+
|
| 120 |
+
>Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer
|
| 121 |
+
René Ranftl, Katrin Lasinger, David Hafner, Konrad Schindler, Vladlen Koltun
|
| 122 |
+
|
| 123 |
+
Please cite our paper if you use this code or any of the models:
|
| 124 |
+
```
|
| 125 |
+
@article{Ranftl2020,
|
| 126 |
+
author = {Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun},
|
| 127 |
+
title = {Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer},
|
| 128 |
+
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},
|
| 129 |
+
year = {2020},
|
| 130 |
+
}
|
| 131 |
+
```
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/do_catkin_make.sh
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
mkdir src
|
| 2 |
+
catkin_make
|
| 3 |
+
source devel/setup.bash
|
| 4 |
+
echo $ROS_PACKAGE_PATH
|
| 5 |
+
chmod +x ./devel/setup.bash
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/downloads.sh
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
mkdir ~/.ros
|
| 2 |
+
wget https://github.com/isl-org/MiDaS/releases/download/v2_1/model-small-traced.pt
|
| 3 |
+
cp ./model-small-traced.pt ~/.ros/model-small-traced.pt
|
| 4 |
+
|
| 5 |
+
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_melodic_ubuntu_17_18.sh
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#@title { display-mode: "code" }
|
| 2 |
+
|
| 3 |
+
#from http://wiki.ros.org/indigo/Installation/Ubuntu
|
| 4 |
+
|
| 5 |
+
#1.2 Setup sources.list
|
| 6 |
+
sudo sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list'
|
| 7 |
+
|
| 8 |
+
# 1.3 Setup keys
|
| 9 |
+
sudo apt-key adv --keyserver 'hkp://keyserver.ubuntu.com:80' --recv-key C1CF6E31E6BADE8868B172B4F42ED6FBAB17C654
|
| 10 |
+
sudo apt-key adv --keyserver 'hkp://ha.pool.sks-keyservers.net:80' --recv-key 421C365BD9FF1F717815A3895523BAEEB01FA116
|
| 11 |
+
|
| 12 |
+
curl -sSL 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xC1CF6E31E6BADE8868B172B4F42ED6FBAB17C654' | sudo apt-key add -
|
| 13 |
+
|
| 14 |
+
# 1.4 Installation
|
| 15 |
+
sudo apt-get update
|
| 16 |
+
sudo apt-get upgrade
|
| 17 |
+
|
| 18 |
+
# Desktop-Full Install:
|
| 19 |
+
sudo apt-get install ros-melodic-desktop-full
|
| 20 |
+
|
| 21 |
+
printf "\nsource /opt/ros/melodic/setup.bash\n" >> ~/.bashrc
|
| 22 |
+
|
| 23 |
+
# 1.5 Initialize rosdep
|
| 24 |
+
sudo rosdep init
|
| 25 |
+
rosdep update
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# 1.7 Getting rosinstall (python)
|
| 29 |
+
sudo apt-get install python-rosinstall
|
| 30 |
+
sudo apt-get install python-catkin-tools
|
| 31 |
+
sudo apt-get install python-rospy
|
| 32 |
+
sudo apt-get install python-rosdep
|
| 33 |
+
sudo apt-get install python-roscd
|
| 34 |
+
sudo apt-get install python-pip
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_noetic_ubuntu_20.sh
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#@title { display-mode: "code" }
|
| 2 |
+
|
| 3 |
+
#from http://wiki.ros.org/indigo/Installation/Ubuntu
|
| 4 |
+
|
| 5 |
+
#1.2 Setup sources.list
|
| 6 |
+
sudo sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list'
|
| 7 |
+
|
| 8 |
+
# 1.3 Setup keys
|
| 9 |
+
sudo apt-key adv --keyserver 'hkp://keyserver.ubuntu.com:80' --recv-key C1CF6E31E6BADE8868B172B4F42ED6FBAB17C654
|
| 10 |
+
|
| 11 |
+
curl -sSL 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xC1CF6E31E6BADE8868B172B4F42ED6FBAB17C654' | sudo apt-key add -
|
| 12 |
+
|
| 13 |
+
# 1.4 Installation
|
| 14 |
+
sudo apt-get update
|
| 15 |
+
sudo apt-get upgrade
|
| 16 |
+
|
| 17 |
+
# Desktop-Full Install:
|
| 18 |
+
sudo apt-get install ros-noetic-desktop-full
|
| 19 |
+
|
| 20 |
+
printf "\nsource /opt/ros/noetic/setup.bash\n" >> ~/.bashrc
|
| 21 |
+
|
| 22 |
+
# 1.5 Initialize rosdep
|
| 23 |
+
sudo rosdep init
|
| 24 |
+
rosdep update
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# 1.7 Getting rosinstall (python)
|
| 28 |
+
sudo apt-get install python3-rosinstall
|
| 29 |
+
sudo apt-get install python3-catkin-tools
|
| 30 |
+
sudo apt-get install python3-rospy
|
| 31 |
+
sudo apt-get install python3-rosdep
|
| 32 |
+
sudo apt-get install python3-roscd
|
| 33 |
+
sudo apt-get install python3-pip
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/make_package_cpp.sh
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd ~/catkin_ws/src
|
| 2 |
+
catkin_create_pkg midas_cpp std_msgs roscpp cv_bridge sensor_msgs image_transport
|
| 3 |
+
cd ~/catkin_ws
|
| 4 |
+
catkin_make
|
| 5 |
+
|
| 6 |
+
chmod +x ~/catkin_ws/devel/setup.bash
|
| 7 |
+
printf "\nsource ~/catkin_ws/devel/setup.bash" >> ~/.bashrc
|
| 8 |
+
source ~/catkin_ws/devel/setup.bash
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
sudo rosdep init
|
| 12 |
+
rosdep update
|
| 13 |
+
#rospack depends1 midas_cpp
|
| 14 |
+
roscd midas_cpp
|
| 15 |
+
#cat package.xml
|
| 16 |
+
#rospack depends midas_cpp
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/launch_midas_cpp.sh
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
source ~/catkin_ws/devel/setup.bash
|
| 2 |
+
roslaunch midas_cpp midas_cpp.launch model_name:="model-small-traced.pt" input_topic:="image_topic" output_topic:="midas_topic" out_orig_size:="true"
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/CMakeLists.txt
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cmake_minimum_required(VERSION 3.0.2)
|
| 2 |
+
project(midas_cpp)
|
| 3 |
+
|
| 4 |
+
## Compile as C++11, supported in ROS Kinetic and newer
|
| 5 |
+
# add_compile_options(-std=c++11)
|
| 6 |
+
|
| 7 |
+
## Find catkin macros and libraries
|
| 8 |
+
## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
|
| 9 |
+
## is used, also find other catkin packages
|
| 10 |
+
find_package(catkin REQUIRED COMPONENTS
|
| 11 |
+
cv_bridge
|
| 12 |
+
image_transport
|
| 13 |
+
roscpp
|
| 14 |
+
rospy
|
| 15 |
+
sensor_msgs
|
| 16 |
+
std_msgs
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
## System dependencies are found with CMake's conventions
|
| 20 |
+
# find_package(Boost REQUIRED COMPONENTS system)
|
| 21 |
+
|
| 22 |
+
list(APPEND CMAKE_PREFIX_PATH "~/libtorch")
|
| 23 |
+
list(APPEND CMAKE_PREFIX_PATH "/usr/local/lib/python3.6/dist-packages/torch/lib")
|
| 24 |
+
list(APPEND CMAKE_PREFIX_PATH "/usr/local/lib/python2.7/dist-packages/torch/lib")
|
| 25 |
+
|
| 26 |
+
if(NOT EXISTS "~/libtorch")
|
| 27 |
+
if (EXISTS "/usr/local/lib/python3.6/dist-packages/torch")
|
| 28 |
+
include_directories(/usr/local/include)
|
| 29 |
+
include_directories(/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include)
|
| 30 |
+
include_directories(/usr/local/lib/python3.6/dist-packages/torch/include)
|
| 31 |
+
|
| 32 |
+
link_directories(/usr/local/lib)
|
| 33 |
+
link_directories(/usr/local/lib/python3.6/dist-packages/torch/lib)
|
| 34 |
+
|
| 35 |
+
set(CMAKE_PREFIX_PATH /usr/local/lib/python3.6/dist-packages/torch)
|
| 36 |
+
set(Boost_USE_MULTITHREADED ON)
|
| 37 |
+
set(Torch_DIR /usr/local/lib/python3.6/dist-packages/torch)
|
| 38 |
+
|
| 39 |
+
elseif (EXISTS "/usr/local/lib/python2.7/dist-packages/torch")
|
| 40 |
+
|
| 41 |
+
include_directories(/usr/local/include)
|
| 42 |
+
include_directories(/usr/local/lib/python2.7/dist-packages/torch/include/torch/csrc/api/include)
|
| 43 |
+
include_directories(/usr/local/lib/python2.7/dist-packages/torch/include)
|
| 44 |
+
|
| 45 |
+
link_directories(/usr/local/lib)
|
| 46 |
+
link_directories(/usr/local/lib/python2.7/dist-packages/torch/lib)
|
| 47 |
+
|
| 48 |
+
set(CMAKE_PREFIX_PATH /usr/local/lib/python2.7/dist-packages/torch)
|
| 49 |
+
set(Boost_USE_MULTITHREADED ON)
|
| 50 |
+
set(Torch_DIR /usr/local/lib/python2.7/dist-packages/torch)
|
| 51 |
+
endif()
|
| 52 |
+
endif()
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
find_package(Torch REQUIRED)
|
| 57 |
+
find_package(OpenCV REQUIRED)
|
| 58 |
+
include_directories( ${OpenCV_INCLUDE_DIRS} )
|
| 59 |
+
|
| 60 |
+
add_executable(midas_cpp src/main.cpp)
|
| 61 |
+
target_link_libraries(midas_cpp "${TORCH_LIBRARIES}" "${OpenCV_LIBS} ${catkin_LIBRARIES}")
|
| 62 |
+
set_property(TARGET midas_cpp PROPERTY CXX_STANDARD 14)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
###################################
|
| 67 |
+
## catkin specific configuration ##
|
| 68 |
+
###################################
|
| 69 |
+
## The catkin_package macro generates cmake config files for your package
|
| 70 |
+
## Declare things to be passed to dependent projects
|
| 71 |
+
## INCLUDE_DIRS: uncomment this if your package contains header files
|
| 72 |
+
## LIBRARIES: libraries you create in this project that dependent projects also need
|
| 73 |
+
## CATKIN_DEPENDS: catkin_packages dependent projects also need
|
| 74 |
+
## DEPENDS: system dependencies of this project that dependent projects also need
|
| 75 |
+
catkin_package(
|
| 76 |
+
# INCLUDE_DIRS include
|
| 77 |
+
# LIBRARIES midas_cpp
|
| 78 |
+
# CATKIN_DEPENDS cv_bridge image_transport roscpp sensor_msgs std_msgs
|
| 79 |
+
# DEPENDS system_lib
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
###########
|
| 83 |
+
## Build ##
|
| 84 |
+
###########
|
| 85 |
+
|
| 86 |
+
## Specify additional locations of header files
|
| 87 |
+
## Your package locations should be listed before other locations
|
| 88 |
+
include_directories(
|
| 89 |
+
# include
|
| 90 |
+
${catkin_INCLUDE_DIRS}
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
## Declare a C++ library
|
| 94 |
+
# add_library(${PROJECT_NAME}
|
| 95 |
+
# src/${PROJECT_NAME}/midas_cpp.cpp
|
| 96 |
+
# )
|
| 97 |
+
|
| 98 |
+
## Add cmake target dependencies of the library
|
| 99 |
+
## as an example, code may need to be generated before libraries
|
| 100 |
+
## either from message generation or dynamic reconfigure
|
| 101 |
+
# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
|
| 102 |
+
|
| 103 |
+
## Declare a C++ executable
|
| 104 |
+
## With catkin_make all packages are built within a single CMake context
|
| 105 |
+
## The recommended prefix ensures that target names across packages don't collide
|
| 106 |
+
# add_executable(${PROJECT_NAME}_node src/midas_cpp_node.cpp)
|
| 107 |
+
|
| 108 |
+
## Rename C++ executable without prefix
|
| 109 |
+
## The above recommended prefix causes long target names, the following renames the
|
| 110 |
+
## target back to the shorter version for ease of user use
|
| 111 |
+
## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
|
| 112 |
+
# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
|
| 113 |
+
|
| 114 |
+
## Add cmake target dependencies of the executable
|
| 115 |
+
## same as for the library above
|
| 116 |
+
# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
|
| 117 |
+
|
| 118 |
+
## Specify libraries to link a library or executable target against
|
| 119 |
+
# target_link_libraries(${PROJECT_NAME}_node
|
| 120 |
+
# ${catkin_LIBRARIES}
|
| 121 |
+
# )
|
| 122 |
+
|
| 123 |
+
#############
|
| 124 |
+
## Install ##
|
| 125 |
+
#############
|
| 126 |
+
|
| 127 |
+
# all install targets should use catkin DESTINATION variables
|
| 128 |
+
# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
|
| 129 |
+
|
| 130 |
+
## Mark executable scripts (Python etc.) for installation
|
| 131 |
+
## in contrast to setup.py, you can choose the destination
|
| 132 |
+
# catkin_install_python(PROGRAMS
|
| 133 |
+
# scripts/my_python_script
|
| 134 |
+
# DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
|
| 135 |
+
# )
|
| 136 |
+
|
| 137 |
+
## Mark executables for installation
|
| 138 |
+
## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
|
| 139 |
+
# install(TARGETS ${PROJECT_NAME}_node
|
| 140 |
+
# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
|
| 141 |
+
# )
|
| 142 |
+
|
| 143 |
+
## Mark libraries for installation
|
| 144 |
+
## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html
|
| 145 |
+
# install(TARGETS ${PROJECT_NAME}
|
| 146 |
+
# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
|
| 147 |
+
# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
|
| 148 |
+
# RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION}
|
| 149 |
+
# )
|
| 150 |
+
|
| 151 |
+
## Mark cpp header files for installation
|
| 152 |
+
# install(DIRECTORY include/${PROJECT_NAME}/
|
| 153 |
+
# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
|
| 154 |
+
# FILES_MATCHING PATTERN "*.h"
|
| 155 |
+
# PATTERN ".svn" EXCLUDE
|
| 156 |
+
# )
|
| 157 |
+
|
| 158 |
+
## Mark other files for installation (e.g. launch and bag files, etc.)
|
| 159 |
+
# install(FILES
|
| 160 |
+
# # myfile1
|
| 161 |
+
# # myfile2
|
| 162 |
+
# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
|
| 163 |
+
# )
|
| 164 |
+
|
| 165 |
+
#############
|
| 166 |
+
## Testing ##
|
| 167 |
+
#############
|
| 168 |
+
|
| 169 |
+
## Add gtest based cpp test target and link libraries
|
| 170 |
+
# catkin_add_gtest(${PROJECT_NAME}-test test/test_midas_cpp.cpp)
|
| 171 |
+
# if(TARGET ${PROJECT_NAME}-test)
|
| 172 |
+
# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
|
| 173 |
+
# endif()
|
| 174 |
+
|
| 175 |
+
## Add folders to be run by python nosetests
|
| 176 |
+
# catkin_add_nosetests(test)
|
| 177 |
+
|
| 178 |
+
install(TARGETS ${PROJECT_NAME}
|
| 179 |
+
ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
|
| 180 |
+
LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
|
| 181 |
+
RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
add_custom_command(
|
| 185 |
+
TARGET midas_cpp POST_BUILD
|
| 186 |
+
COMMAND ${CMAKE_COMMAND} -E copy
|
| 187 |
+
${CMAKE_CURRENT_BINARY_DIR}/midas_cpp
|
| 188 |
+
${CMAKE_SOURCE_DIR}/midas_cpp
|
| 189 |
+
)
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_cpp.launch
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<launch>
|
| 2 |
+
<arg name="input_topic" default="image_topic"/>
|
| 3 |
+
<arg name="output_topic" default="midas_topic"/>
|
| 4 |
+
<arg name="model_name" default="model-small-traced.pt"/>
|
| 5 |
+
<arg name="out_orig_size" default="true"/>
|
| 6 |
+
<arg name="net_width" default="256"/>
|
| 7 |
+
<arg name="net_height" default="256"/>
|
| 8 |
+
<arg name="logging" default="false"/>
|
| 9 |
+
|
| 10 |
+
<node pkg="midas_cpp" type="midas_cpp" name="midas_cpp" output="log" respawn="true">
|
| 11 |
+
<param name="input_topic" value="$(arg input_topic)"/>
|
| 12 |
+
<param name="output_topic" value="$(arg output_topic)"/>
|
| 13 |
+
<param name="model_name" value="$(arg model_name)"/>
|
| 14 |
+
<param name="out_orig_size" value="$(arg out_orig_size)"/>
|
| 15 |
+
<param name="net_width" value="$(arg net_width)"/>
|
| 16 |
+
<param name="net_height" value="$(arg net_height)"/>
|
| 17 |
+
<param name="logging" value="$(arg logging)"/>
|
| 18 |
+
</node>
|
| 19 |
+
</launch>
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_talker_listener.launch
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<launch>
|
| 2 |
+
<arg name="use_camera" default="false"/>
|
| 3 |
+
<arg name="input_video_file" default="test.mp4"/>
|
| 4 |
+
|
| 5 |
+
<arg name="show_output" default="true"/>
|
| 6 |
+
<arg name="save_output" default="false"/>
|
| 7 |
+
<arg name="output_video_file" default="result.mp4"/>
|
| 8 |
+
|
| 9 |
+
<node pkg="midas_cpp" type="talker.py" name="talker" output="log" respawn="true">
|
| 10 |
+
<param name="use_camera" value="$(arg use_camera)"/>
|
| 11 |
+
<param name="input_video_file" value="$(arg input_video_file)"/>
|
| 12 |
+
</node>
|
| 13 |
+
|
| 14 |
+
<node pkg="midas_cpp" type="listener.py" name="listener" output="log" respawn="true">
|
| 15 |
+
<param name="show_output" value="$(arg show_output)"/>
|
| 16 |
+
<param name="save_output" value="$(arg save_output)"/>
|
| 17 |
+
<param name="output_video_file" value="$(arg output_video_file)"/>
|
| 18 |
+
</node>
|
| 19 |
+
|
| 20 |
+
<node pkg="midas_cpp" type="listener_original.py" name="listener_original" output="log" respawn="true">
|
| 21 |
+
<param name="show_output" value="$(arg show_output)"/>
|
| 22 |
+
</node>
|
| 23 |
+
</launch>
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/package.xml
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0"?>
|
| 2 |
+
<package format="2">
|
| 3 |
+
<name>midas_cpp</name>
|
| 4 |
+
<version>0.1.0</version>
|
| 5 |
+
<description>The midas_cpp package</description>
|
| 6 |
+
|
| 7 |
+
<maintainer email="alexeyab84@gmail.com">Alexey Bochkovskiy</maintainer>
|
| 8 |
+
<license>MIT</license>
|
| 9 |
+
<url type="website">https://github.com/isl-org/MiDaS/tree/master/ros</url>
|
| 10 |
+
<!-- <author email="alexeyab84@gmail.com">Alexey Bochkovskiy</author> -->
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
<!-- One license tag required, multiple allowed, one license per tag -->
|
| 14 |
+
<!-- Commonly used license strings: -->
|
| 15 |
+
<!-- BSD, MIT, Boost Software License, GPLv2, GPLv3, LGPLv2.1, LGPLv3 -->
|
| 16 |
+
<license>TODO</license>
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
<!-- Url tags are optional, but multiple are allowed, one per tag -->
|
| 20 |
+
<!-- Optional attribute type can be: website, bugtracker, or repository -->
|
| 21 |
+
<!-- Example: -->
|
| 22 |
+
<!-- <url type="website">http://wiki.ros.org/midas_cpp</url> -->
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
<!-- Author tags are optional, multiple are allowed, one per tag -->
|
| 26 |
+
<!-- Authors do not have to be maintainers, but could be -->
|
| 27 |
+
<!-- Example: -->
|
| 28 |
+
<!-- <author email="jane.doe@example.com">Jane Doe</author> -->
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
<!-- The *depend tags are used to specify dependencies -->
|
| 32 |
+
<!-- Dependencies can be catkin packages or system dependencies -->
|
| 33 |
+
<!-- Examples: -->
|
| 34 |
+
<!-- Use depend as a shortcut for packages that are both build and exec dependencies -->
|
| 35 |
+
<!-- <depend>roscpp</depend> -->
|
| 36 |
+
<!-- Note that this is equivalent to the following: -->
|
| 37 |
+
<!-- <build_depend>roscpp</build_depend> -->
|
| 38 |
+
<!-- <exec_depend>roscpp</exec_depend> -->
|
| 39 |
+
<!-- Use build_depend for packages you need at compile time: -->
|
| 40 |
+
<!-- <build_depend>message_generation</build_depend> -->
|
| 41 |
+
<!-- Use build_export_depend for packages you need in order to build against this package: -->
|
| 42 |
+
<!-- <build_export_depend>message_generation</build_export_depend> -->
|
| 43 |
+
<!-- Use buildtool_depend for build tool packages: -->
|
| 44 |
+
<!-- <buildtool_depend>catkin</buildtool_depend> -->
|
| 45 |
+
<!-- Use exec_depend for packages you need at runtime: -->
|
| 46 |
+
<!-- <exec_depend>message_runtime</exec_depend> -->
|
| 47 |
+
<!-- Use test_depend for packages you need only for testing: -->
|
| 48 |
+
<!-- <test_depend>gtest</test_depend> -->
|
| 49 |
+
<!-- Use doc_depend for packages you need only for building documentation: -->
|
| 50 |
+
<!-- <doc_depend>doxygen</doc_depend> -->
|
| 51 |
+
<buildtool_depend>catkin</buildtool_depend>
|
| 52 |
+
<build_depend>cv_bridge</build_depend>
|
| 53 |
+
<build_depend>image_transport</build_depend>
|
| 54 |
+
<build_depend>roscpp</build_depend>
|
| 55 |
+
<build_depend>rospy</build_depend>
|
| 56 |
+
<build_depend>sensor_msgs</build_depend>
|
| 57 |
+
<build_depend>std_msgs</build_depend>
|
| 58 |
+
<build_export_depend>cv_bridge</build_export_depend>
|
| 59 |
+
<build_export_depend>image_transport</build_export_depend>
|
| 60 |
+
<build_export_depend>roscpp</build_export_depend>
|
| 61 |
+
<build_export_depend>rospy</build_export_depend>
|
| 62 |
+
<build_export_depend>sensor_msgs</build_export_depend>
|
| 63 |
+
<build_export_depend>std_msgs</build_export_depend>
|
| 64 |
+
<exec_depend>cv_bridge</exec_depend>
|
| 65 |
+
<exec_depend>image_transport</exec_depend>
|
| 66 |
+
<exec_depend>roscpp</exec_depend>
|
| 67 |
+
<exec_depend>rospy</exec_depend>
|
| 68 |
+
<exec_depend>sensor_msgs</exec_depend>
|
| 69 |
+
<exec_depend>std_msgs</exec_depend>
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
<!-- The export tag contains other, unspecified, tags -->
|
| 73 |
+
<export>
|
| 74 |
+
<!-- Other tools can request additional information be placed here -->
|
| 75 |
+
|
| 76 |
+
</export>
|
| 77 |
+
</package>
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from __future__ import print_function
|
| 3 |
+
|
| 4 |
+
import roslib
|
| 5 |
+
#roslib.load_manifest('my_package')
|
| 6 |
+
import sys
|
| 7 |
+
import rospy
|
| 8 |
+
import cv2
|
| 9 |
+
import numpy as np
|
| 10 |
+
from std_msgs.msg import String
|
| 11 |
+
from sensor_msgs.msg import Image
|
| 12 |
+
from cv_bridge import CvBridge, CvBridgeError
|
| 13 |
+
|
| 14 |
+
class video_show:
|
| 15 |
+
|
| 16 |
+
def __init__(self):
|
| 17 |
+
self.show_output = rospy.get_param('~show_output', True)
|
| 18 |
+
self.save_output = rospy.get_param('~save_output', False)
|
| 19 |
+
self.output_video_file = rospy.get_param('~output_video_file','result.mp4')
|
| 20 |
+
# rospy.loginfo(f"Listener - params: show_output={self.show_output}, save_output={self.save_output}, output_video_file={self.output_video_file}")
|
| 21 |
+
|
| 22 |
+
self.bridge = CvBridge()
|
| 23 |
+
self.image_sub = rospy.Subscriber("midas_topic", Image, self.callback)
|
| 24 |
+
|
| 25 |
+
def callback(self, data):
|
| 26 |
+
try:
|
| 27 |
+
cv_image = self.bridge.imgmsg_to_cv2(data)
|
| 28 |
+
except CvBridgeError as e:
|
| 29 |
+
print(e)
|
| 30 |
+
return
|
| 31 |
+
|
| 32 |
+
if cv_image.size == 0:
|
| 33 |
+
return
|
| 34 |
+
|
| 35 |
+
rospy.loginfo("Listener: Received new frame")
|
| 36 |
+
cv_image = cv_image.astype("uint8")
|
| 37 |
+
|
| 38 |
+
if self.show_output==True:
|
| 39 |
+
cv2.imshow("video_show", cv_image)
|
| 40 |
+
cv2.waitKey(10)
|
| 41 |
+
|
| 42 |
+
if self.save_output==True:
|
| 43 |
+
if self.video_writer_init==False:
|
| 44 |
+
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
| 45 |
+
self.out = cv2.VideoWriter(self.output_video_file, fourcc, 25, (cv_image.shape[1], cv_image.shape[0]))
|
| 46 |
+
|
| 47 |
+
self.out.write(cv_image)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def main(args):
|
| 52 |
+
rospy.init_node('listener', anonymous=True)
|
| 53 |
+
ic = video_show()
|
| 54 |
+
try:
|
| 55 |
+
rospy.spin()
|
| 56 |
+
except KeyboardInterrupt:
|
| 57 |
+
print("Shutting down")
|
| 58 |
+
cv2.destroyAllWindows()
|
| 59 |
+
|
| 60 |
+
if __name__ == '__main__':
|
| 61 |
+
main(sys.argv)
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener_original.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from __future__ import print_function
|
| 3 |
+
|
| 4 |
+
import roslib
|
| 5 |
+
#roslib.load_manifest('my_package')
|
| 6 |
+
import sys
|
| 7 |
+
import rospy
|
| 8 |
+
import cv2
|
| 9 |
+
import numpy as np
|
| 10 |
+
from std_msgs.msg import String
|
| 11 |
+
from sensor_msgs.msg import Image
|
| 12 |
+
from cv_bridge import CvBridge, CvBridgeError
|
| 13 |
+
|
| 14 |
+
class video_show:
|
| 15 |
+
|
| 16 |
+
def __init__(self):
|
| 17 |
+
self.show_output = rospy.get_param('~show_output', True)
|
| 18 |
+
self.save_output = rospy.get_param('~save_output', False)
|
| 19 |
+
self.output_video_file = rospy.get_param('~output_video_file','result.mp4')
|
| 20 |
+
# rospy.loginfo(f"Listener original - params: show_output={self.show_output}, save_output={self.save_output}, output_video_file={self.output_video_file}")
|
| 21 |
+
|
| 22 |
+
self.bridge = CvBridge()
|
| 23 |
+
self.image_sub = rospy.Subscriber("image_topic", Image, self.callback)
|
| 24 |
+
|
| 25 |
+
def callback(self, data):
|
| 26 |
+
try:
|
| 27 |
+
cv_image = self.bridge.imgmsg_to_cv2(data)
|
| 28 |
+
except CvBridgeError as e:
|
| 29 |
+
print(e)
|
| 30 |
+
return
|
| 31 |
+
|
| 32 |
+
if cv_image.size == 0:
|
| 33 |
+
return
|
| 34 |
+
|
| 35 |
+
rospy.loginfo("Listener_original: Received new frame")
|
| 36 |
+
cv_image = cv_image.astype("uint8")
|
| 37 |
+
|
| 38 |
+
if self.show_output==True:
|
| 39 |
+
cv2.imshow("video_show_orig", cv_image)
|
| 40 |
+
cv2.waitKey(10)
|
| 41 |
+
|
| 42 |
+
if self.save_output==True:
|
| 43 |
+
if self.video_writer_init==False:
|
| 44 |
+
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
| 45 |
+
self.out = cv2.VideoWriter(self.output_video_file, fourcc, 25, (cv_image.shape[1], cv_image.shape[0]))
|
| 46 |
+
|
| 47 |
+
self.out.write(cv_image)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def main(args):
|
| 52 |
+
rospy.init_node('listener_original', anonymous=True)
|
| 53 |
+
ic = video_show()
|
| 54 |
+
try:
|
| 55 |
+
rospy.spin()
|
| 56 |
+
except KeyboardInterrupt:
|
| 57 |
+
print("Shutting down")
|
| 58 |
+
cv2.destroyAllWindows()
|
| 59 |
+
|
| 60 |
+
if __name__ == '__main__':
|
| 61 |
+
main(sys.argv)
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/talker.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
import roslib
|
| 5 |
+
#roslib.load_manifest('my_package')
|
| 6 |
+
import sys
|
| 7 |
+
import rospy
|
| 8 |
+
import cv2
|
| 9 |
+
from std_msgs.msg import String
|
| 10 |
+
from sensor_msgs.msg import Image
|
| 11 |
+
from cv_bridge import CvBridge, CvBridgeError
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def talker():
|
| 15 |
+
rospy.init_node('talker', anonymous=True)
|
| 16 |
+
|
| 17 |
+
use_camera = rospy.get_param('~use_camera', False)
|
| 18 |
+
input_video_file = rospy.get_param('~input_video_file','test.mp4')
|
| 19 |
+
# rospy.loginfo(f"Talker - params: use_camera={use_camera}, input_video_file={input_video_file}")
|
| 20 |
+
|
| 21 |
+
# rospy.loginfo("Talker: Trying to open a video stream")
|
| 22 |
+
if use_camera == True:
|
| 23 |
+
cap = cv2.VideoCapture(0)
|
| 24 |
+
else:
|
| 25 |
+
cap = cv2.VideoCapture(input_video_file)
|
| 26 |
+
|
| 27 |
+
pub = rospy.Publisher('image_topic', Image, queue_size=1)
|
| 28 |
+
rate = rospy.Rate(30) # 30hz
|
| 29 |
+
bridge = CvBridge()
|
| 30 |
+
|
| 31 |
+
while not rospy.is_shutdown():
|
| 32 |
+
ret, cv_image = cap.read()
|
| 33 |
+
if ret==False:
|
| 34 |
+
print("Talker: Video is over")
|
| 35 |
+
rospy.loginfo("Video is over")
|
| 36 |
+
return
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
image = bridge.cv2_to_imgmsg(cv_image, "bgr8")
|
| 40 |
+
except CvBridgeError as e:
|
| 41 |
+
rospy.logerr("Talker: cv2image conversion failed: ", e)
|
| 42 |
+
print(e)
|
| 43 |
+
continue
|
| 44 |
+
|
| 45 |
+
rospy.loginfo("Talker: Publishing frame")
|
| 46 |
+
pub.publish(image)
|
| 47 |
+
rate.sleep()
|
| 48 |
+
|
| 49 |
+
if __name__ == '__main__':
|
| 50 |
+
try:
|
| 51 |
+
talker()
|
| 52 |
+
except rospy.ROSInterruptException:
|
| 53 |
+
pass
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/src/main.cpp
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include <ros/ros.h>
|
| 2 |
+
#include <image_transport/image_transport.h>
|
| 3 |
+
#include <cv_bridge/cv_bridge.h>
|
| 4 |
+
#include <sensor_msgs/image_encodings.h>
|
| 5 |
+
|
| 6 |
+
#include <initializer_list>
|
| 7 |
+
|
| 8 |
+
#include <torch/script.h> // One-stop header.
|
| 9 |
+
|
| 10 |
+
#include <opencv2/core/version.hpp>
|
| 11 |
+
#include <opencv2/imgproc/imgproc.hpp>
|
| 12 |
+
#include <opencv2/opencv.hpp>
|
| 13 |
+
#include <opencv2/opencv_modules.hpp>
|
| 14 |
+
|
| 15 |
+
#include <opencv2/highgui/highgui.hpp>
|
| 16 |
+
#include <opencv2/video/video.hpp>
|
| 17 |
+
|
| 18 |
+
// includes for OpenCV >= 3.x
|
| 19 |
+
#ifndef CV_VERSION_EPOCH
|
| 20 |
+
#include <opencv2/core/types.hpp>
|
| 21 |
+
#include <opencv2/videoio/videoio.hpp>
|
| 22 |
+
#include <opencv2/imgcodecs/imgcodecs.hpp>
|
| 23 |
+
#endif
|
| 24 |
+
|
| 25 |
+
// OpenCV includes for OpenCV 2.x
|
| 26 |
+
#ifdef CV_VERSION_EPOCH
|
| 27 |
+
#include <opencv2/highgui/highgui_c.h>
|
| 28 |
+
#include <opencv2/imgproc/imgproc_c.h>
|
| 29 |
+
#include <opencv2/core/types_c.h>
|
| 30 |
+
#include <opencv2/core/version.hpp>
|
| 31 |
+
#endif
|
| 32 |
+
|
| 33 |
+
static const std::string OPENCV_WINDOW = "Image window";
|
| 34 |
+
|
| 35 |
+
class Midas
|
| 36 |
+
{
|
| 37 |
+
ros::NodeHandle nh_;
|
| 38 |
+
image_transport::ImageTransport it_;
|
| 39 |
+
image_transport::Subscriber image_sub_;
|
| 40 |
+
image_transport::Publisher image_pub_;
|
| 41 |
+
|
| 42 |
+
torch::jit::script::Module module;
|
| 43 |
+
torch::Device device;
|
| 44 |
+
|
| 45 |
+
auto ToTensor(cv::Mat img, bool show_output = false, bool unsqueeze = false, int unsqueeze_dim = 0)
|
| 46 |
+
{
|
| 47 |
+
//std::cout << "image shape: " << img.size() << std::endl;
|
| 48 |
+
at::Tensor tensor_image = torch::from_blob(img.data, { img.rows, img.cols, 3 }, at::kByte);
|
| 49 |
+
|
| 50 |
+
if (unsqueeze)
|
| 51 |
+
{
|
| 52 |
+
tensor_image.unsqueeze_(unsqueeze_dim);
|
| 53 |
+
//std::cout << "tensors new shape: " << tensor_image.sizes() << std::endl;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
if (show_output)
|
| 57 |
+
{
|
| 58 |
+
std::cout << tensor_image.slice(2, 0, 1) << std::endl;
|
| 59 |
+
}
|
| 60 |
+
//std::cout << "tenor shape: " << tensor_image.sizes() << std::endl;
|
| 61 |
+
return tensor_image;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
auto ToInput(at::Tensor tensor_image)
|
| 65 |
+
{
|
| 66 |
+
// Create a vector of inputs.
|
| 67 |
+
return std::vector<torch::jit::IValue>{tensor_image};
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
auto ToCvImage(at::Tensor tensor, int cv_type = CV_8UC3)
|
| 71 |
+
{
|
| 72 |
+
int width = tensor.sizes()[0];
|
| 73 |
+
int height = tensor.sizes()[1];
|
| 74 |
+
try
|
| 75 |
+
{
|
| 76 |
+
cv::Mat output_mat;
|
| 77 |
+
if (cv_type == CV_8UC4 || cv_type == CV_8UC3 || cv_type == CV_8UC2 || cv_type == CV_8UC1) {
|
| 78 |
+
cv::Mat cv_image(cv::Size{ height, width }, cv_type, tensor.data_ptr<uchar>());
|
| 79 |
+
output_mat = cv_image;
|
| 80 |
+
}
|
| 81 |
+
else if (cv_type == CV_32FC4 || cv_type == CV_32FC3 || cv_type == CV_32FC2 || cv_type == CV_32FC1) {
|
| 82 |
+
cv::Mat cv_image(cv::Size{ height, width }, cv_type, tensor.data_ptr<float>());
|
| 83 |
+
output_mat = cv_image;
|
| 84 |
+
}
|
| 85 |
+
else if (cv_type == CV_64FC4 || cv_type == CV_64FC3 || cv_type == CV_64FC2 || cv_type == CV_64FC1) {
|
| 86 |
+
cv::Mat cv_image(cv::Size{ height, width }, cv_type, tensor.data_ptr<double>());
|
| 87 |
+
output_mat = cv_image;
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
//show_image(output_mat, "converted image from tensor");
|
| 91 |
+
return output_mat.clone();
|
| 92 |
+
}
|
| 93 |
+
catch (const c10::Error& e)
|
| 94 |
+
{
|
| 95 |
+
std::cout << "an error has occured : " << e.msg() << std::endl;
|
| 96 |
+
}
|
| 97 |
+
return cv::Mat(height, width, CV_8UC3);
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
std::string input_topic, output_topic, model_name;
|
| 101 |
+
bool out_orig_size;
|
| 102 |
+
int net_width, net_height;
|
| 103 |
+
torch::NoGradGuard guard;
|
| 104 |
+
at::Tensor mean, std;
|
| 105 |
+
at::Tensor output, tensor;
|
| 106 |
+
|
| 107 |
+
public:
|
| 108 |
+
Midas()
|
| 109 |
+
: nh_(), it_(nh_), device(torch::Device(torch::kCPU))
|
| 110 |
+
{
|
| 111 |
+
ros::param::param<std::string>("~input_topic", input_topic, "image_topic");
|
| 112 |
+
ros::param::param<std::string>("~output_topic", output_topic, "midas_topic");
|
| 113 |
+
ros::param::param<std::string>("~model_name", model_name, "model-small-traced.pt");
|
| 114 |
+
ros::param::param<bool>("~out_orig_size", out_orig_size, true);
|
| 115 |
+
ros::param::param<int>("~net_width", net_width, 256);
|
| 116 |
+
ros::param::param<int>("~net_height", net_height, 256);
|
| 117 |
+
|
| 118 |
+
std::cout << ", input_topic = " << input_topic <<
|
| 119 |
+
", output_topic = " << output_topic <<
|
| 120 |
+
", model_name = " << model_name <<
|
| 121 |
+
", out_orig_size = " << out_orig_size <<
|
| 122 |
+
", net_width = " << net_width <<
|
| 123 |
+
", net_height = " << net_height <<
|
| 124 |
+
std::endl;
|
| 125 |
+
|
| 126 |
+
// Subscrive to input video feed and publish output video feed
|
| 127 |
+
image_sub_ = it_.subscribe(input_topic, 1, &Midas::imageCb, this);
|
| 128 |
+
image_pub_ = it_.advertise(output_topic, 1);
|
| 129 |
+
|
| 130 |
+
std::cout << "Try to load torchscript model \n";
|
| 131 |
+
|
| 132 |
+
try {
|
| 133 |
+
// Deserialize the ScriptModule from a file using torch::jit::load().
|
| 134 |
+
module = torch::jit::load(model_name);
|
| 135 |
+
}
|
| 136 |
+
catch (const c10::Error& e) {
|
| 137 |
+
std::cerr << "error loading the model\n";
|
| 138 |
+
exit(0);
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
std::cout << "ok\n";
|
| 142 |
+
|
| 143 |
+
try {
|
| 144 |
+
module.eval();
|
| 145 |
+
torch::jit::getProfilingMode() = false;
|
| 146 |
+
torch::jit::setGraphExecutorOptimize(true);
|
| 147 |
+
|
| 148 |
+
mean = torch::tensor({ 0.485, 0.456, 0.406 });
|
| 149 |
+
std = torch::tensor({ 0.229, 0.224, 0.225 });
|
| 150 |
+
|
| 151 |
+
if (torch::hasCUDA()) {
|
| 152 |
+
std::cout << "cuda is available" << std::endl;
|
| 153 |
+
at::globalContext().setBenchmarkCuDNN(true);
|
| 154 |
+
device = torch::Device(torch::kCUDA);
|
| 155 |
+
module.to(device);
|
| 156 |
+
mean = mean.to(device);
|
| 157 |
+
std = std.to(device);
|
| 158 |
+
}
|
| 159 |
+
}
|
| 160 |
+
catch (const c10::Error& e)
|
| 161 |
+
{
|
| 162 |
+
std::cerr << " module initialization: " << e.msg() << std::endl;
|
| 163 |
+
}
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
~Midas()
|
| 167 |
+
{
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
void imageCb(const sensor_msgs::ImageConstPtr& msg)
|
| 171 |
+
{
|
| 172 |
+
cv_bridge::CvImagePtr cv_ptr;
|
| 173 |
+
try
|
| 174 |
+
{
|
| 175 |
+
// sensor_msgs::Image to cv::Mat
|
| 176 |
+
cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::RGB8);
|
| 177 |
+
}
|
| 178 |
+
catch (cv_bridge::Exception& e)
|
| 179 |
+
{
|
| 180 |
+
ROS_ERROR("cv_bridge exception: %s", e.what());
|
| 181 |
+
return;
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
// pre-processing
|
| 185 |
+
auto tensor_cpu = ToTensor(cv_ptr->image); // OpenCV-image -> Libtorch-tensor
|
| 186 |
+
|
| 187 |
+
try {
|
| 188 |
+
tensor = tensor_cpu.to(device); // move to device (CPU or GPU)
|
| 189 |
+
|
| 190 |
+
tensor = tensor.toType(c10::kFloat);
|
| 191 |
+
tensor = tensor.permute({ 2, 0, 1 }); // HWC -> CHW
|
| 192 |
+
tensor = tensor.unsqueeze(0);
|
| 193 |
+
tensor = at::upsample_bilinear2d(tensor, { net_height, net_width }, true); // resize
|
| 194 |
+
tensor = tensor.squeeze(0);
|
| 195 |
+
tensor = tensor.permute({ 1, 2, 0 }); // CHW -> HWC
|
| 196 |
+
|
| 197 |
+
tensor = tensor.div(255).sub(mean).div(std); // normalization
|
| 198 |
+
tensor = tensor.permute({ 2, 0, 1 }); // HWC -> CHW
|
| 199 |
+
tensor.unsqueeze_(0); // CHW -> NCHW
|
| 200 |
+
}
|
| 201 |
+
catch (const c10::Error& e)
|
| 202 |
+
{
|
| 203 |
+
std::cerr << " pre-processing exception: " << e.msg() << std::endl;
|
| 204 |
+
return;
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
auto input_to_net = ToInput(tensor); // input to the network
|
| 208 |
+
|
| 209 |
+
// inference
|
| 210 |
+
output;
|
| 211 |
+
try {
|
| 212 |
+
output = module.forward(input_to_net).toTensor(); // run inference
|
| 213 |
+
}
|
| 214 |
+
catch (const c10::Error& e)
|
| 215 |
+
{
|
| 216 |
+
std::cerr << " module.forward() exception: " << e.msg() << std::endl;
|
| 217 |
+
return;
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
output = output.detach().to(torch::kF32);
|
| 221 |
+
|
| 222 |
+
// move to CPU temporary
|
| 223 |
+
at::Tensor output_tmp = output;
|
| 224 |
+
output_tmp = output_tmp.to(torch::kCPU);
|
| 225 |
+
|
| 226 |
+
// normalization
|
| 227 |
+
float min_val = std::numeric_limits<float>::max();
|
| 228 |
+
float max_val = std::numeric_limits<float>::min();
|
| 229 |
+
|
| 230 |
+
for (int i = 0; i < net_width * net_height; ++i) {
|
| 231 |
+
float val = output_tmp.data_ptr<float>()[i];
|
| 232 |
+
if (min_val > val) min_val = val;
|
| 233 |
+
if (max_val < val) max_val = val;
|
| 234 |
+
}
|
| 235 |
+
float range_val = max_val - min_val;
|
| 236 |
+
|
| 237 |
+
output = output.sub(min_val).div(range_val).mul(255.0F).clamp(0, 255).to(torch::kF32); // .to(torch::kU8);
|
| 238 |
+
|
| 239 |
+
// resize to the original size if required
|
| 240 |
+
if (out_orig_size) {
|
| 241 |
+
try {
|
| 242 |
+
output = at::upsample_bilinear2d(output.unsqueeze(0), { cv_ptr->image.size().height, cv_ptr->image.size().width }, true);
|
| 243 |
+
output = output.squeeze(0);
|
| 244 |
+
}
|
| 245 |
+
catch (const c10::Error& e)
|
| 246 |
+
{
|
| 247 |
+
std::cout << " upsample_bilinear2d() exception: " << e.msg() << std::endl;
|
| 248 |
+
return;
|
| 249 |
+
}
|
| 250 |
+
}
|
| 251 |
+
output = output.permute({ 1, 2, 0 }).to(torch::kCPU);
|
| 252 |
+
|
| 253 |
+
int cv_type = CV_32FC1; // CV_8UC1;
|
| 254 |
+
auto cv_img = ToCvImage(output, cv_type);
|
| 255 |
+
|
| 256 |
+
sensor_msgs::Image img_msg;
|
| 257 |
+
|
| 258 |
+
try {
|
| 259 |
+
// cv::Mat -> sensor_msgs::Image
|
| 260 |
+
std_msgs::Header header; // empty header
|
| 261 |
+
header.seq = 0; // user defined counter
|
| 262 |
+
header.stamp = ros::Time::now();// time
|
| 263 |
+
//cv_bridge::CvImage img_bridge = cv_bridge::CvImage(header, sensor_msgs::image_encodings::MONO8, cv_img);
|
| 264 |
+
cv_bridge::CvImage img_bridge = cv_bridge::CvImage(header, sensor_msgs::image_encodings::TYPE_32FC1, cv_img);
|
| 265 |
+
|
| 266 |
+
img_bridge.toImageMsg(img_msg); // cv_bridge -> sensor_msgs::Image
|
| 267 |
+
}
|
| 268 |
+
catch (cv_bridge::Exception& e)
|
| 269 |
+
{
|
| 270 |
+
ROS_ERROR("cv_bridge exception: %s", e.what());
|
| 271 |
+
return;
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
// Output modified video stream
|
| 275 |
+
image_pub_.publish(img_msg);
|
| 276 |
+
}
|
| 277 |
+
};
|
| 278 |
+
|
| 279 |
+
int main(int argc, char** argv)
|
| 280 |
+
{
|
| 281 |
+
ros::init(argc, argv, "midas", ros::init_options::AnonymousName);
|
| 282 |
+
Midas ic;
|
| 283 |
+
ros::spin();
|
| 284 |
+
return 0;
|
| 285 |
+
}
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/run_talker_listener_test.sh
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# place any test.mp4 file near with this file
|
| 2 |
+
|
| 3 |
+
# roscore
|
| 4 |
+
# rosnode kill -a
|
| 5 |
+
|
| 6 |
+
source ~/catkin_ws/devel/setup.bash
|
| 7 |
+
|
| 8 |
+
roscore &
|
| 9 |
+
P1=$!
|
| 10 |
+
rosrun midas_cpp talker.py &
|
| 11 |
+
P2=$!
|
| 12 |
+
rosrun midas_cpp listener_original.py &
|
| 13 |
+
P3=$!
|
| 14 |
+
rosrun midas_cpp listener.py &
|
| 15 |
+
P4=$!
|
| 16 |
+
wait $P1 $P2 $P3 $P4
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/run.py
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Compute depth maps for images in the input folder.
|
| 2 |
+
"""
|
| 3 |
+
import os
|
| 4 |
+
import glob
|
| 5 |
+
import torch
|
| 6 |
+
import utils
|
| 7 |
+
import cv2
|
| 8 |
+
import argparse
|
| 9 |
+
import time
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
|
| 13 |
+
from imutils.video import VideoStream
|
| 14 |
+
from midas.model_loader import default_models, load_model
|
| 15 |
+
|
| 16 |
+
first_execution = True
|
| 17 |
+
def process(device, model, model_type, image, input_size, target_size, optimize, use_camera):
|
| 18 |
+
"""
|
| 19 |
+
Run the inference and interpolate.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
device (torch.device): the torch device used
|
| 23 |
+
model: the model used for inference
|
| 24 |
+
model_type: the type of the model
|
| 25 |
+
image: the image fed into the neural network
|
| 26 |
+
input_size: the size (width, height) of the neural network input (for OpenVINO)
|
| 27 |
+
target_size: the size (width, height) the neural network output is interpolated to
|
| 28 |
+
optimize: optimize the model to half-floats on CUDA?
|
| 29 |
+
use_camera: is the camera used?
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
the prediction
|
| 33 |
+
"""
|
| 34 |
+
global first_execution
|
| 35 |
+
|
| 36 |
+
if "openvino" in model_type:
|
| 37 |
+
if first_execution or not use_camera:
|
| 38 |
+
print(f" Input resized to {input_size[0]}x{input_size[1]} before entering the encoder")
|
| 39 |
+
first_execution = False
|
| 40 |
+
|
| 41 |
+
sample = [np.reshape(image, (1, 3, *input_size))]
|
| 42 |
+
prediction = model(sample)[model.output(0)][0]
|
| 43 |
+
prediction = cv2.resize(prediction, dsize=target_size,
|
| 44 |
+
interpolation=cv2.INTER_CUBIC)
|
| 45 |
+
else:
|
| 46 |
+
sample = torch.from_numpy(image).to(device).unsqueeze(0)
|
| 47 |
+
|
| 48 |
+
if optimize and device == torch.device("cuda"):
|
| 49 |
+
if first_execution:
|
| 50 |
+
print(" Optimization to half-floats activated. Use with caution, because models like Swin require\n"
|
| 51 |
+
" float precision to work properly and may yield non-finite depth values to some extent for\n"
|
| 52 |
+
" half-floats.")
|
| 53 |
+
sample = sample.to(memory_format=torch.channels_last)
|
| 54 |
+
sample = sample.half()
|
| 55 |
+
|
| 56 |
+
if first_execution or not use_camera:
|
| 57 |
+
height, width = sample.shape[2:]
|
| 58 |
+
print(f" Input resized to {width}x{height} before entering the encoder")
|
| 59 |
+
first_execution = False
|
| 60 |
+
|
| 61 |
+
prediction = model.forward(sample)
|
| 62 |
+
prediction = (
|
| 63 |
+
torch.nn.functional.interpolate(
|
| 64 |
+
prediction.unsqueeze(1),
|
| 65 |
+
size=target_size[::-1],
|
| 66 |
+
mode="bicubic",
|
| 67 |
+
align_corners=False,
|
| 68 |
+
)
|
| 69 |
+
.squeeze()
|
| 70 |
+
.cpu()
|
| 71 |
+
.numpy()
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
return prediction
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def create_side_by_side(image, depth, grayscale):
|
| 78 |
+
"""
|
| 79 |
+
Take an RGB image and depth map and place them side by side. This includes a proper normalization of the depth map
|
| 80 |
+
for better visibility.
|
| 81 |
+
|
| 82 |
+
Args:
|
| 83 |
+
image: the RGB image
|
| 84 |
+
depth: the depth map
|
| 85 |
+
grayscale: use a grayscale colormap?
|
| 86 |
+
|
| 87 |
+
Returns:
|
| 88 |
+
the image and depth map place side by side
|
| 89 |
+
"""
|
| 90 |
+
depth_min = depth.min()
|
| 91 |
+
depth_max = depth.max()
|
| 92 |
+
normalized_depth = 255 * (depth - depth_min) / (depth_max - depth_min)
|
| 93 |
+
normalized_depth *= 3
|
| 94 |
+
|
| 95 |
+
right_side = np.repeat(np.expand_dims(normalized_depth, 2), 3, axis=2) / 3
|
| 96 |
+
if not grayscale:
|
| 97 |
+
right_side = cv2.applyColorMap(np.uint8(right_side), cv2.COLORMAP_INFERNO)
|
| 98 |
+
|
| 99 |
+
if image is None:
|
| 100 |
+
return right_side
|
| 101 |
+
else:
|
| 102 |
+
return np.concatenate((image, right_side), axis=1)
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def run(input_path, output_path, model_path, model_type="dpt_beit_large_512", optimize=False, side=False, height=None,
|
| 106 |
+
square=False, grayscale=False):
|
| 107 |
+
"""Run MonoDepthNN to compute depth maps.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
input_path (str): path to input folder
|
| 111 |
+
output_path (str): path to output folder
|
| 112 |
+
model_path (str): path to saved model
|
| 113 |
+
model_type (str): the model type
|
| 114 |
+
optimize (bool): optimize the model to half-floats on CUDA?
|
| 115 |
+
side (bool): RGB and depth side by side in output images?
|
| 116 |
+
height (int): inference encoder image height
|
| 117 |
+
square (bool): resize to a square resolution?
|
| 118 |
+
grayscale (bool): use a grayscale colormap?
|
| 119 |
+
"""
|
| 120 |
+
print("Initialize")
|
| 121 |
+
|
| 122 |
+
# select device
|
| 123 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 124 |
+
print("Device: %s" % device)
|
| 125 |
+
|
| 126 |
+
model, transform, net_w, net_h = load_model(device, model_path, model_type, optimize, height, square)
|
| 127 |
+
|
| 128 |
+
# get input
|
| 129 |
+
if input_path is not None:
|
| 130 |
+
image_names = glob.glob(os.path.join(input_path, "*"))
|
| 131 |
+
num_images = len(image_names)
|
| 132 |
+
else:
|
| 133 |
+
print("No input path specified. Grabbing images from camera.")
|
| 134 |
+
|
| 135 |
+
# create output folder
|
| 136 |
+
if output_path is not None:
|
| 137 |
+
os.makedirs(output_path, exist_ok=True)
|
| 138 |
+
|
| 139 |
+
print("Start processing")
|
| 140 |
+
|
| 141 |
+
if input_path is not None:
|
| 142 |
+
if output_path is None:
|
| 143 |
+
print("Warning: No output path specified. Images will be processed but not shown or stored anywhere.")
|
| 144 |
+
for index, image_name in enumerate(image_names):
|
| 145 |
+
|
| 146 |
+
print(" Processing {} ({}/{})".format(image_name, index + 1, num_images))
|
| 147 |
+
|
| 148 |
+
# input
|
| 149 |
+
original_image_rgb = utils.read_image(image_name) # in [0, 1]
|
| 150 |
+
image = transform({"image": original_image_rgb})["image"]
|
| 151 |
+
|
| 152 |
+
# compute
|
| 153 |
+
with torch.no_grad():
|
| 154 |
+
prediction = process(device, model, model_type, image, (net_w, net_h), original_image_rgb.shape[1::-1],
|
| 155 |
+
optimize, False)
|
| 156 |
+
|
| 157 |
+
# output
|
| 158 |
+
if output_path is not None:
|
| 159 |
+
filename = os.path.join(
|
| 160 |
+
output_path, os.path.splitext(os.path.basename(image_name))[0] + '-' + model_type
|
| 161 |
+
)
|
| 162 |
+
if not side:
|
| 163 |
+
utils.write_depth(filename, prediction, grayscale, bits=2)
|
| 164 |
+
else:
|
| 165 |
+
original_image_bgr = np.flip(original_image_rgb, 2)
|
| 166 |
+
content = create_side_by_side(original_image_bgr*255, prediction, grayscale)
|
| 167 |
+
cv2.imwrite(filename + ".png", content)
|
| 168 |
+
utils.write_pfm(filename + ".pfm", prediction.astype(np.float32))
|
| 169 |
+
|
| 170 |
+
else:
|
| 171 |
+
with torch.no_grad():
|
| 172 |
+
fps = 1
|
| 173 |
+
video = VideoStream(0).start()
|
| 174 |
+
time_start = time.time()
|
| 175 |
+
frame_index = 0
|
| 176 |
+
while True:
|
| 177 |
+
frame = video.read()
|
| 178 |
+
if frame is not None:
|
| 179 |
+
original_image_rgb = np.flip(frame, 2) # in [0, 255] (flip required to get RGB)
|
| 180 |
+
image = transform({"image": original_image_rgb/255})["image"]
|
| 181 |
+
|
| 182 |
+
prediction = process(device, model, model_type, image, (net_w, net_h),
|
| 183 |
+
original_image_rgb.shape[1::-1], optimize, True)
|
| 184 |
+
|
| 185 |
+
original_image_bgr = np.flip(original_image_rgb, 2) if side else None
|
| 186 |
+
content = create_side_by_side(original_image_bgr, prediction, grayscale)
|
| 187 |
+
cv2.imshow('MiDaS Depth Estimation - Press Escape to close window ', content/255)
|
| 188 |
+
|
| 189 |
+
if output_path is not None:
|
| 190 |
+
filename = os.path.join(output_path, 'Camera' + '-' + model_type + '_' + str(frame_index))
|
| 191 |
+
cv2.imwrite(filename + ".png", content)
|
| 192 |
+
|
| 193 |
+
alpha = 0.1
|
| 194 |
+
if time.time()-time_start > 0:
|
| 195 |
+
fps = (1 - alpha) * fps + alpha * 1 / (time.time()-time_start) # exponential moving average
|
| 196 |
+
time_start = time.time()
|
| 197 |
+
print(f"\rFPS: {round(fps,2)}", end="")
|
| 198 |
+
|
| 199 |
+
if cv2.waitKey(1) == 27: # Escape key
|
| 200 |
+
break
|
| 201 |
+
|
| 202 |
+
frame_index += 1
|
| 203 |
+
print()
|
| 204 |
+
|
| 205 |
+
print("Finished")
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
if __name__ == "__main__":
|
| 209 |
+
parser = argparse.ArgumentParser()
|
| 210 |
+
|
| 211 |
+
parser.add_argument('-i', '--input_path',
|
| 212 |
+
default=None,
|
| 213 |
+
help='Folder with input images (if no input path is specified, images are tried to be grabbed '
|
| 214 |
+
'from camera)'
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
parser.add_argument('-o', '--output_path',
|
| 218 |
+
default=None,
|
| 219 |
+
help='Folder for output images'
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
parser.add_argument('-m', '--model_weights',
|
| 223 |
+
default=None,
|
| 224 |
+
help='Path to the trained weights of model'
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
parser.add_argument('-t', '--model_type',
|
| 228 |
+
default='dpt_beit_large_512',
|
| 229 |
+
help='Model type: '
|
| 230 |
+
'dpt_beit_large_512, dpt_beit_large_384, dpt_beit_base_384, dpt_swin2_large_384, '
|
| 231 |
+
'dpt_swin2_base_384, dpt_swin2_tiny_256, dpt_swin_large_384, dpt_next_vit_large_384, '
|
| 232 |
+
'dpt_levit_224, dpt_large_384, dpt_hybrid_384, midas_v21_384, midas_v21_small_256 or '
|
| 233 |
+
'openvino_midas_v21_small_256'
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
parser.add_argument('-s', '--side',
|
| 237 |
+
action='store_true',
|
| 238 |
+
help='Output images contain RGB and depth images side by side'
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
parser.add_argument('--optimize', dest='optimize', action='store_true', help='Use half-float optimization')
|
| 242 |
+
parser.set_defaults(optimize=False)
|
| 243 |
+
|
| 244 |
+
parser.add_argument('--height',
|
| 245 |
+
type=int, default=None,
|
| 246 |
+
help='Preferred height of images feed into the encoder during inference. Note that the '
|
| 247 |
+
'preferred height may differ from the actual height, because an alignment to multiples of '
|
| 248 |
+
'32 takes place. Many models support only the height chosen during training, which is '
|
| 249 |
+
'used automatically if this parameter is not set.'
|
| 250 |
+
)
|
| 251 |
+
parser.add_argument('--square',
|
| 252 |
+
action='store_true',
|
| 253 |
+
help='Option to resize images to a square resolution by changing their widths when images are '
|
| 254 |
+
'fed into the encoder during inference. If this parameter is not set, the aspect ratio of '
|
| 255 |
+
'images is tried to be preserved if supported by the model.'
|
| 256 |
+
)
|
| 257 |
+
parser.add_argument('--grayscale',
|
| 258 |
+
action='store_true',
|
| 259 |
+
help='Use a grayscale colormap instead of the inferno one. Although the inferno colormap, '
|
| 260 |
+
'which is used by default, is better for visibility, it does not allow storing 16-bit '
|
| 261 |
+
'depth values in PNGs but only 8-bit ones due to the precision limitation of this '
|
| 262 |
+
'colormap.'
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
args = parser.parse_args()
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
if args.model_weights is None:
|
| 269 |
+
args.model_weights = default_models[args.model_type]
|
| 270 |
+
|
| 271 |
+
# set torch options
|
| 272 |
+
torch.backends.cudnn.enabled = True
|
| 273 |
+
torch.backends.cudnn.benchmark = True
|
| 274 |
+
|
| 275 |
+
# compute depth maps
|
| 276 |
+
run(args.input_path, args.output_path, args.model_weights, args.model_type, args.optimize, args.side, args.height,
|
| 277 |
+
args.square, args.grayscale)
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/README.md
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer
|
| 2 |
+
|
| 3 |
+
### TensorFlow inference using `.pb` and `.onnx` models
|
| 4 |
+
|
| 5 |
+
1. [Run inference on TensorFlow-model by using TensorFlow](#run-inference-on-tensorflow-model-by-using-tensorFlow)
|
| 6 |
+
|
| 7 |
+
2. [Run inference on ONNX-model by using TensorFlow](#run-inference-on-onnx-model-by-using-tensorflow)
|
| 8 |
+
|
| 9 |
+
3. [Make ONNX model from downloaded Pytorch model file](#make-onnx-model-from-downloaded-pytorch-model-file)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
### Run inference on TensorFlow-model by using TensorFlow
|
| 13 |
+
|
| 14 |
+
1) Download the model weights [model-f6b98070.pb](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-f6b98070.pb)
|
| 15 |
+
and [model-small.pb](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-small.pb) and place the
|
| 16 |
+
file in the `/tf/` folder.
|
| 17 |
+
|
| 18 |
+
2) Set up dependencies:
|
| 19 |
+
|
| 20 |
+
```shell
|
| 21 |
+
# install OpenCV
|
| 22 |
+
pip install --upgrade pip
|
| 23 |
+
pip install opencv-python
|
| 24 |
+
|
| 25 |
+
# install TensorFlow
|
| 26 |
+
pip install -I grpcio tensorflow==2.3.0 tensorflow-addons==0.11.2 numpy==1.18.0
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
#### Usage
|
| 30 |
+
|
| 31 |
+
1) Place one or more input images in the folder `tf/input`.
|
| 32 |
+
|
| 33 |
+
2) Run the model:
|
| 34 |
+
|
| 35 |
+
```shell
|
| 36 |
+
python tf/run_pb.py
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
Or run the small model:
|
| 40 |
+
|
| 41 |
+
```shell
|
| 42 |
+
python tf/run_pb.py --model_weights model-small.pb --model_type small
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
3) The resulting inverse depth maps are written to the `tf/output` folder.
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
### Run inference on ONNX-model by using ONNX-Runtime
|
| 49 |
+
|
| 50 |
+
1) Download the model weights [model-f6b98070.onnx](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-f6b98070.onnx)
|
| 51 |
+
and [model-small.onnx](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-small.onnx) and place the
|
| 52 |
+
file in the `/tf/` folder.
|
| 53 |
+
|
| 54 |
+
2) Set up dependencies:
|
| 55 |
+
|
| 56 |
+
```shell
|
| 57 |
+
# install OpenCV
|
| 58 |
+
pip install --upgrade pip
|
| 59 |
+
pip install opencv-python
|
| 60 |
+
|
| 61 |
+
# install ONNX
|
| 62 |
+
pip install onnx==1.7.0
|
| 63 |
+
|
| 64 |
+
# install ONNX Runtime
|
| 65 |
+
pip install onnxruntime==1.5.2
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
#### Usage
|
| 69 |
+
|
| 70 |
+
1) Place one or more input images in the folder `tf/input`.
|
| 71 |
+
|
| 72 |
+
2) Run the model:
|
| 73 |
+
|
| 74 |
+
```shell
|
| 75 |
+
python tf/run_onnx.py
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
Or run the small model:
|
| 79 |
+
|
| 80 |
+
```shell
|
| 81 |
+
python tf/run_onnx.py --model_weights model-small.onnx --model_type small
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
3) The resulting inverse depth maps are written to the `tf/output` folder.
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
### Make ONNX model from downloaded Pytorch model file
|
| 89 |
+
|
| 90 |
+
1) Download the model weights [model-f6b98070.pt](https://github.com/isl-org/MiDaS/releases/download/v2_1/model-f6b98070.pt) and place the
|
| 91 |
+
file in the root folder.
|
| 92 |
+
|
| 93 |
+
2) Set up dependencies:
|
| 94 |
+
|
| 95 |
+
```shell
|
| 96 |
+
# install OpenCV
|
| 97 |
+
pip install --upgrade pip
|
| 98 |
+
pip install opencv-python
|
| 99 |
+
|
| 100 |
+
# install PyTorch TorchVision
|
| 101 |
+
pip install -I torch==1.7.0 torchvision==0.8.0
|
| 102 |
+
|
| 103 |
+
# install TensorFlow
|
| 104 |
+
pip install -I grpcio tensorflow==2.3.0 tensorflow-addons==0.11.2 numpy==1.18.0
|
| 105 |
+
|
| 106 |
+
# install ONNX
|
| 107 |
+
pip install onnx==1.7.0
|
| 108 |
+
|
| 109 |
+
# install ONNX-TensorFlow
|
| 110 |
+
git clone https://github.com/onnx/onnx-tensorflow.git
|
| 111 |
+
cd onnx-tensorflow
|
| 112 |
+
git checkout 095b51b88e35c4001d70f15f80f31014b592b81e
|
| 113 |
+
pip install -e .
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
#### Usage
|
| 117 |
+
|
| 118 |
+
1) Run the converter:
|
| 119 |
+
|
| 120 |
+
```shell
|
| 121 |
+
python tf/make_onnx_model.py
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
2) The resulting `model-f6b98070.onnx` file is written to the `/tf/` folder.
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
### Requirements
|
| 128 |
+
|
| 129 |
+
The code was tested with Python 3.6.9, PyTorch 1.5.1, TensorFlow 2.2.0, TensorFlow-addons 0.8.3, ONNX 1.7.0, ONNX-TensorFlow (GitHub-master-17.07.2020) and OpenCV 4.3.0.
|
| 130 |
+
|
| 131 |
+
### Citation
|
| 132 |
+
|
| 133 |
+
Please cite our paper if you use this code or any of the models:
|
| 134 |
+
```
|
| 135 |
+
@article{Ranftl2019,
|
| 136 |
+
author = {Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun},
|
| 137 |
+
title = {Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer},
|
| 138 |
+
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},
|
| 139 |
+
year = {2020},
|
| 140 |
+
}
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
### License
|
| 144 |
+
|
| 145 |
+
MIT License
|
| 146 |
+
|
| 147 |
+
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/input/.placeholder
ADDED
|
File without changes
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/make_onnx_model.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Compute depth maps for images in the input folder.
|
| 2 |
+
"""
|
| 3 |
+
import os
|
| 4 |
+
import ntpath
|
| 5 |
+
import glob
|
| 6 |
+
import torch
|
| 7 |
+
import utils
|
| 8 |
+
import cv2
|
| 9 |
+
import numpy as np
|
| 10 |
+
from torchvision.transforms import Compose, Normalize
|
| 11 |
+
from torchvision import transforms
|
| 12 |
+
|
| 13 |
+
from shutil import copyfile
|
| 14 |
+
import fileinput
|
| 15 |
+
import sys
|
| 16 |
+
sys.path.append(os.getcwd() + '/..')
|
| 17 |
+
|
| 18 |
+
def modify_file():
|
| 19 |
+
modify_filename = '../midas/blocks.py'
|
| 20 |
+
copyfile(modify_filename, modify_filename+'.bak')
|
| 21 |
+
|
| 22 |
+
with open(modify_filename, 'r') as file :
|
| 23 |
+
filedata = file.read()
|
| 24 |
+
|
| 25 |
+
filedata = filedata.replace('align_corners=True', 'align_corners=False')
|
| 26 |
+
filedata = filedata.replace('import torch.nn as nn', 'import torch.nn as nn\nimport torchvision.models as models')
|
| 27 |
+
filedata = filedata.replace('torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl")', 'models.resnext101_32x8d()')
|
| 28 |
+
|
| 29 |
+
with open(modify_filename, 'w') as file:
|
| 30 |
+
file.write(filedata)
|
| 31 |
+
|
| 32 |
+
def restore_file():
|
| 33 |
+
modify_filename = '../midas/blocks.py'
|
| 34 |
+
copyfile(modify_filename+'.bak', modify_filename)
|
| 35 |
+
|
| 36 |
+
modify_file()
|
| 37 |
+
|
| 38 |
+
from midas.midas_net import MidasNet
|
| 39 |
+
from midas.transforms import Resize, NormalizeImage, PrepareForNet
|
| 40 |
+
|
| 41 |
+
restore_file()
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class MidasNet_preprocessing(MidasNet):
|
| 45 |
+
"""Network for monocular depth estimation.
|
| 46 |
+
"""
|
| 47 |
+
def forward(self, x):
|
| 48 |
+
"""Forward pass.
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
x (tensor): input data (image)
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
tensor: depth
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
mean = torch.tensor([0.485, 0.456, 0.406])
|
| 58 |
+
std = torch.tensor([0.229, 0.224, 0.225])
|
| 59 |
+
x.sub_(mean[None, :, None, None]).div_(std[None, :, None, None])
|
| 60 |
+
|
| 61 |
+
return MidasNet.forward(self, x)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def run(model_path):
|
| 65 |
+
"""Run MonoDepthNN to compute depth maps.
|
| 66 |
+
|
| 67 |
+
Args:
|
| 68 |
+
model_path (str): path to saved model
|
| 69 |
+
"""
|
| 70 |
+
print("initialize")
|
| 71 |
+
|
| 72 |
+
# select device
|
| 73 |
+
|
| 74 |
+
# load network
|
| 75 |
+
#model = MidasNet(model_path, non_negative=True)
|
| 76 |
+
model = MidasNet_preprocessing(model_path, non_negative=True)
|
| 77 |
+
|
| 78 |
+
model.eval()
|
| 79 |
+
|
| 80 |
+
print("start processing")
|
| 81 |
+
|
| 82 |
+
# input
|
| 83 |
+
img_input = np.zeros((3, 384, 384), np.float32)
|
| 84 |
+
|
| 85 |
+
# compute
|
| 86 |
+
with torch.no_grad():
|
| 87 |
+
sample = torch.from_numpy(img_input).unsqueeze(0)
|
| 88 |
+
prediction = model.forward(sample)
|
| 89 |
+
prediction = (
|
| 90 |
+
torch.nn.functional.interpolate(
|
| 91 |
+
prediction.unsqueeze(1),
|
| 92 |
+
size=img_input.shape[:2],
|
| 93 |
+
mode="bicubic",
|
| 94 |
+
align_corners=False,
|
| 95 |
+
)
|
| 96 |
+
.squeeze()
|
| 97 |
+
.cpu()
|
| 98 |
+
.numpy()
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
torch.onnx.export(model, sample, ntpath.basename(model_path).rsplit('.', 1)[0]+'.onnx', opset_version=9)
|
| 102 |
+
|
| 103 |
+
print("finished")
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
if __name__ == "__main__":
|
| 107 |
+
# set paths
|
| 108 |
+
# MODEL_PATH = "model.pt"
|
| 109 |
+
MODEL_PATH = "../model-f6b98070.pt"
|
| 110 |
+
|
| 111 |
+
# compute depth maps
|
| 112 |
+
run(MODEL_PATH)
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/output/.placeholder
ADDED
|
File without changes
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_onnx.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Compute depth maps for images in the input folder.
|
| 2 |
+
"""
|
| 3 |
+
import os
|
| 4 |
+
import glob
|
| 5 |
+
import utils
|
| 6 |
+
import cv2
|
| 7 |
+
import sys
|
| 8 |
+
import numpy as np
|
| 9 |
+
import argparse
|
| 10 |
+
|
| 11 |
+
import onnx
|
| 12 |
+
import onnxruntime as rt
|
| 13 |
+
|
| 14 |
+
from transforms import Resize, NormalizeImage, PrepareForNet
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def run(input_path, output_path, model_path, model_type="large"):
|
| 18 |
+
"""Run MonoDepthNN to compute depth maps.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
input_path (str): path to input folder
|
| 22 |
+
output_path (str): path to output folder
|
| 23 |
+
model_path (str): path to saved model
|
| 24 |
+
"""
|
| 25 |
+
print("initialize")
|
| 26 |
+
|
| 27 |
+
# select device
|
| 28 |
+
device = "CUDA:0"
|
| 29 |
+
#device = "CPU"
|
| 30 |
+
print("device: %s" % device)
|
| 31 |
+
|
| 32 |
+
# network resolution
|
| 33 |
+
if model_type == "large":
|
| 34 |
+
net_w, net_h = 384, 384
|
| 35 |
+
elif model_type == "small":
|
| 36 |
+
net_w, net_h = 256, 256
|
| 37 |
+
else:
|
| 38 |
+
print(f"model_type '{model_type}' not implemented, use: --model_type large")
|
| 39 |
+
assert False
|
| 40 |
+
|
| 41 |
+
# load network
|
| 42 |
+
print("loading model...")
|
| 43 |
+
model = rt.InferenceSession(model_path)
|
| 44 |
+
input_name = model.get_inputs()[0].name
|
| 45 |
+
output_name = model.get_outputs()[0].name
|
| 46 |
+
|
| 47 |
+
resize_image = Resize(
|
| 48 |
+
net_w,
|
| 49 |
+
net_h,
|
| 50 |
+
resize_target=None,
|
| 51 |
+
keep_aspect_ratio=False,
|
| 52 |
+
ensure_multiple_of=32,
|
| 53 |
+
resize_method="upper_bound",
|
| 54 |
+
image_interpolation_method=cv2.INTER_CUBIC,
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
def compose2(f1, f2):
|
| 58 |
+
return lambda x: f2(f1(x))
|
| 59 |
+
|
| 60 |
+
transform = compose2(resize_image, PrepareForNet())
|
| 61 |
+
|
| 62 |
+
# get input
|
| 63 |
+
img_names = glob.glob(os.path.join(input_path, "*"))
|
| 64 |
+
num_images = len(img_names)
|
| 65 |
+
|
| 66 |
+
# create output folder
|
| 67 |
+
os.makedirs(output_path, exist_ok=True)
|
| 68 |
+
|
| 69 |
+
print("start processing")
|
| 70 |
+
|
| 71 |
+
for ind, img_name in enumerate(img_names):
|
| 72 |
+
|
| 73 |
+
print(" processing {} ({}/{})".format(img_name, ind + 1, num_images))
|
| 74 |
+
|
| 75 |
+
# input
|
| 76 |
+
img = utils.read_image(img_name)
|
| 77 |
+
img_input = transform({"image": img})["image"]
|
| 78 |
+
|
| 79 |
+
# compute
|
| 80 |
+
output = model.run([output_name], {input_name: img_input.reshape(1, 3, net_h, net_w).astype(np.float32)})[0]
|
| 81 |
+
prediction = np.array(output).reshape(net_h, net_w)
|
| 82 |
+
prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC)
|
| 83 |
+
|
| 84 |
+
# output
|
| 85 |
+
filename = os.path.join(
|
| 86 |
+
output_path, os.path.splitext(os.path.basename(img_name))[0]
|
| 87 |
+
)
|
| 88 |
+
utils.write_depth(filename, prediction, bits=2)
|
| 89 |
+
|
| 90 |
+
print("finished")
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
if __name__ == "__main__":
|
| 94 |
+
parser = argparse.ArgumentParser()
|
| 95 |
+
|
| 96 |
+
parser.add_argument('-i', '--input_path',
|
| 97 |
+
default='input',
|
| 98 |
+
help='folder with input images'
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
parser.add_argument('-o', '--output_path',
|
| 102 |
+
default='output',
|
| 103 |
+
help='folder for output images'
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
parser.add_argument('-m', '--model_weights',
|
| 107 |
+
default='model-f6b98070.onnx',
|
| 108 |
+
help='path to the trained weights of model'
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
parser.add_argument('-t', '--model_type',
|
| 112 |
+
default='large',
|
| 113 |
+
help='model type: large or small'
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
args = parser.parse_args()
|
| 117 |
+
|
| 118 |
+
# compute depth maps
|
| 119 |
+
run(args.input_path, args.output_path, args.model_weights, args.model_type)
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_pb.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Compute depth maps for images in the input folder.
|
| 2 |
+
"""
|
| 3 |
+
import os
|
| 4 |
+
import glob
|
| 5 |
+
import utils
|
| 6 |
+
import cv2
|
| 7 |
+
import argparse
|
| 8 |
+
|
| 9 |
+
import tensorflow as tf
|
| 10 |
+
|
| 11 |
+
from transforms import Resize, NormalizeImage, PrepareForNet
|
| 12 |
+
|
| 13 |
+
def run(input_path, output_path, model_path, model_type="large"):
|
| 14 |
+
"""Run MonoDepthNN to compute depth maps.
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
input_path (str): path to input folder
|
| 18 |
+
output_path (str): path to output folder
|
| 19 |
+
model_path (str): path to saved model
|
| 20 |
+
"""
|
| 21 |
+
print("initialize")
|
| 22 |
+
|
| 23 |
+
# the runtime initialization will not allocate all memory on the device to avoid out of GPU memory
|
| 24 |
+
gpus = tf.config.experimental.list_physical_devices('GPU')
|
| 25 |
+
if gpus:
|
| 26 |
+
try:
|
| 27 |
+
for gpu in gpus:
|
| 28 |
+
#tf.config.experimental.set_memory_growth(gpu, True)
|
| 29 |
+
tf.config.experimental.set_virtual_device_configuration(gpu,
|
| 30 |
+
[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)])
|
| 31 |
+
except RuntimeError as e:
|
| 32 |
+
print(e)
|
| 33 |
+
|
| 34 |
+
# network resolution
|
| 35 |
+
if model_type == "large":
|
| 36 |
+
net_w, net_h = 384, 384
|
| 37 |
+
elif model_type == "small":
|
| 38 |
+
net_w, net_h = 256, 256
|
| 39 |
+
else:
|
| 40 |
+
print(f"model_type '{model_type}' not implemented, use: --model_type large")
|
| 41 |
+
assert False
|
| 42 |
+
|
| 43 |
+
# load network
|
| 44 |
+
graph_def = tf.compat.v1.GraphDef()
|
| 45 |
+
with tf.io.gfile.GFile(model_path, 'rb') as f:
|
| 46 |
+
graph_def.ParseFromString(f.read())
|
| 47 |
+
tf.import_graph_def(graph_def, name='')
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
model_operations = tf.compat.v1.get_default_graph().get_operations()
|
| 51 |
+
input_node = '0:0'
|
| 52 |
+
output_layer = model_operations[len(model_operations) - 1].name + ':0'
|
| 53 |
+
print("Last layer name: ", output_layer)
|
| 54 |
+
|
| 55 |
+
resize_image = Resize(
|
| 56 |
+
net_w,
|
| 57 |
+
net_h,
|
| 58 |
+
resize_target=None,
|
| 59 |
+
keep_aspect_ratio=False,
|
| 60 |
+
ensure_multiple_of=32,
|
| 61 |
+
resize_method="upper_bound",
|
| 62 |
+
image_interpolation_method=cv2.INTER_CUBIC,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
def compose2(f1, f2):
|
| 66 |
+
return lambda x: f2(f1(x))
|
| 67 |
+
|
| 68 |
+
transform = compose2(resize_image, PrepareForNet())
|
| 69 |
+
|
| 70 |
+
# get input
|
| 71 |
+
img_names = glob.glob(os.path.join(input_path, "*"))
|
| 72 |
+
num_images = len(img_names)
|
| 73 |
+
|
| 74 |
+
# create output folder
|
| 75 |
+
os.makedirs(output_path, exist_ok=True)
|
| 76 |
+
|
| 77 |
+
print("start processing")
|
| 78 |
+
|
| 79 |
+
with tf.compat.v1.Session() as sess:
|
| 80 |
+
try:
|
| 81 |
+
# load images
|
| 82 |
+
for ind, img_name in enumerate(img_names):
|
| 83 |
+
|
| 84 |
+
print(" processing {} ({}/{})".format(img_name, ind + 1, num_images))
|
| 85 |
+
|
| 86 |
+
# input
|
| 87 |
+
img = utils.read_image(img_name)
|
| 88 |
+
img_input = transform({"image": img})["image"]
|
| 89 |
+
|
| 90 |
+
# compute
|
| 91 |
+
prob_tensor = sess.graph.get_tensor_by_name(output_layer)
|
| 92 |
+
prediction, = sess.run(prob_tensor, {input_node: [img_input] })
|
| 93 |
+
prediction = prediction.reshape(net_h, net_w)
|
| 94 |
+
prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC)
|
| 95 |
+
|
| 96 |
+
# output
|
| 97 |
+
filename = os.path.join(
|
| 98 |
+
output_path, os.path.splitext(os.path.basename(img_name))[0]
|
| 99 |
+
)
|
| 100 |
+
utils.write_depth(filename, prediction, bits=2)
|
| 101 |
+
|
| 102 |
+
except KeyError:
|
| 103 |
+
print ("Couldn't find input node: ' + input_node + ' or output layer: " + output_layer + ".")
|
| 104 |
+
exit(-1)
|
| 105 |
+
|
| 106 |
+
print("finished")
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
if __name__ == "__main__":
|
| 110 |
+
parser = argparse.ArgumentParser()
|
| 111 |
+
|
| 112 |
+
parser.add_argument('-i', '--input_path',
|
| 113 |
+
default='input',
|
| 114 |
+
help='folder with input images'
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
parser.add_argument('-o', '--output_path',
|
| 118 |
+
default='output',
|
| 119 |
+
help='folder for output images'
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
parser.add_argument('-m', '--model_weights',
|
| 123 |
+
default='model-f6b98070.pb',
|
| 124 |
+
help='path to the trained weights of model'
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
parser.add_argument('-t', '--model_type',
|
| 128 |
+
default='large',
|
| 129 |
+
help='model type: large or small'
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
args = parser.parse_args()
|
| 133 |
+
|
| 134 |
+
# compute depth maps
|
| 135 |
+
run(args.input_path, args.output_path, args.model_weights, args.model_type)
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/transforms.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import cv2
|
| 3 |
+
import math
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
|
| 7 |
+
"""Rezise the sample to ensure the given size. Keeps aspect ratio.
|
| 8 |
+
|
| 9 |
+
Args:
|
| 10 |
+
sample (dict): sample
|
| 11 |
+
size (tuple): image size
|
| 12 |
+
|
| 13 |
+
Returns:
|
| 14 |
+
tuple: new size
|
| 15 |
+
"""
|
| 16 |
+
shape = list(sample["disparity"].shape)
|
| 17 |
+
|
| 18 |
+
if shape[0] >= size[0] and shape[1] >= size[1]:
|
| 19 |
+
return sample
|
| 20 |
+
|
| 21 |
+
scale = [0, 0]
|
| 22 |
+
scale[0] = size[0] / shape[0]
|
| 23 |
+
scale[1] = size[1] / shape[1]
|
| 24 |
+
|
| 25 |
+
scale = max(scale)
|
| 26 |
+
|
| 27 |
+
shape[0] = math.ceil(scale * shape[0])
|
| 28 |
+
shape[1] = math.ceil(scale * shape[1])
|
| 29 |
+
|
| 30 |
+
# resize
|
| 31 |
+
sample["image"] = cv2.resize(
|
| 32 |
+
sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
sample["disparity"] = cv2.resize(
|
| 36 |
+
sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST
|
| 37 |
+
)
|
| 38 |
+
sample["mask"] = cv2.resize(
|
| 39 |
+
sample["mask"].astype(np.float32),
|
| 40 |
+
tuple(shape[::-1]),
|
| 41 |
+
interpolation=cv2.INTER_NEAREST,
|
| 42 |
+
)
|
| 43 |
+
sample["mask"] = sample["mask"].astype(bool)
|
| 44 |
+
|
| 45 |
+
return tuple(shape)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class Resize(object):
|
| 49 |
+
"""Resize sample to given size (width, height).
|
| 50 |
+
"""
|
| 51 |
+
|
| 52 |
+
def __init__(
|
| 53 |
+
self,
|
| 54 |
+
width,
|
| 55 |
+
height,
|
| 56 |
+
resize_target=True,
|
| 57 |
+
keep_aspect_ratio=False,
|
| 58 |
+
ensure_multiple_of=1,
|
| 59 |
+
resize_method="lower_bound",
|
| 60 |
+
image_interpolation_method=cv2.INTER_AREA,
|
| 61 |
+
):
|
| 62 |
+
"""Init.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
width (int): desired output width
|
| 66 |
+
height (int): desired output height
|
| 67 |
+
resize_target (bool, optional):
|
| 68 |
+
True: Resize the full sample (image, mask, target).
|
| 69 |
+
False: Resize image only.
|
| 70 |
+
Defaults to True.
|
| 71 |
+
keep_aspect_ratio (bool, optional):
|
| 72 |
+
True: Keep the aspect ratio of the input sample.
|
| 73 |
+
Output sample might not have the given width and height, and
|
| 74 |
+
resize behaviour depends on the parameter 'resize_method'.
|
| 75 |
+
Defaults to False.
|
| 76 |
+
ensure_multiple_of (int, optional):
|
| 77 |
+
Output width and height is constrained to be multiple of this parameter.
|
| 78 |
+
Defaults to 1.
|
| 79 |
+
resize_method (str, optional):
|
| 80 |
+
"lower_bound": Output will be at least as large as the given size.
|
| 81 |
+
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
|
| 82 |
+
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
|
| 83 |
+
Defaults to "lower_bound".
|
| 84 |
+
"""
|
| 85 |
+
self.__width = width
|
| 86 |
+
self.__height = height
|
| 87 |
+
|
| 88 |
+
self.__resize_target = resize_target
|
| 89 |
+
self.__keep_aspect_ratio = keep_aspect_ratio
|
| 90 |
+
self.__multiple_of = ensure_multiple_of
|
| 91 |
+
self.__resize_method = resize_method
|
| 92 |
+
self.__image_interpolation_method = image_interpolation_method
|
| 93 |
+
|
| 94 |
+
def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
|
| 95 |
+
y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
| 96 |
+
|
| 97 |
+
if max_val is not None and y > max_val:
|
| 98 |
+
y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
| 99 |
+
|
| 100 |
+
if y < min_val:
|
| 101 |
+
y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
| 102 |
+
|
| 103 |
+
return y
|
| 104 |
+
|
| 105 |
+
def get_size(self, width, height):
|
| 106 |
+
# determine new height and width
|
| 107 |
+
scale_height = self.__height / height
|
| 108 |
+
scale_width = self.__width / width
|
| 109 |
+
|
| 110 |
+
if self.__keep_aspect_ratio:
|
| 111 |
+
if self.__resize_method == "lower_bound":
|
| 112 |
+
# scale such that output size is lower bound
|
| 113 |
+
if scale_width > scale_height:
|
| 114 |
+
# fit width
|
| 115 |
+
scale_height = scale_width
|
| 116 |
+
else:
|
| 117 |
+
# fit height
|
| 118 |
+
scale_width = scale_height
|
| 119 |
+
elif self.__resize_method == "upper_bound":
|
| 120 |
+
# scale such that output size is upper bound
|
| 121 |
+
if scale_width < scale_height:
|
| 122 |
+
# fit width
|
| 123 |
+
scale_height = scale_width
|
| 124 |
+
else:
|
| 125 |
+
# fit height
|
| 126 |
+
scale_width = scale_height
|
| 127 |
+
elif self.__resize_method == "minimal":
|
| 128 |
+
# scale as least as possbile
|
| 129 |
+
if abs(1 - scale_width) < abs(1 - scale_height):
|
| 130 |
+
# fit width
|
| 131 |
+
scale_height = scale_width
|
| 132 |
+
else:
|
| 133 |
+
# fit height
|
| 134 |
+
scale_width = scale_height
|
| 135 |
+
else:
|
| 136 |
+
raise ValueError(
|
| 137 |
+
f"resize_method {self.__resize_method} not implemented"
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
if self.__resize_method == "lower_bound":
|
| 141 |
+
new_height = self.constrain_to_multiple_of(
|
| 142 |
+
scale_height * height, min_val=self.__height
|
| 143 |
+
)
|
| 144 |
+
new_width = self.constrain_to_multiple_of(
|
| 145 |
+
scale_width * width, min_val=self.__width
|
| 146 |
+
)
|
| 147 |
+
elif self.__resize_method == "upper_bound":
|
| 148 |
+
new_height = self.constrain_to_multiple_of(
|
| 149 |
+
scale_height * height, max_val=self.__height
|
| 150 |
+
)
|
| 151 |
+
new_width = self.constrain_to_multiple_of(
|
| 152 |
+
scale_width * width, max_val=self.__width
|
| 153 |
+
)
|
| 154 |
+
elif self.__resize_method == "minimal":
|
| 155 |
+
new_height = self.constrain_to_multiple_of(scale_height * height)
|
| 156 |
+
new_width = self.constrain_to_multiple_of(scale_width * width)
|
| 157 |
+
else:
|
| 158 |
+
raise ValueError(f"resize_method {self.__resize_method} not implemented")
|
| 159 |
+
|
| 160 |
+
return (new_width, new_height)
|
| 161 |
+
|
| 162 |
+
def __call__(self, sample):
|
| 163 |
+
width, height = self.get_size(
|
| 164 |
+
sample["image"].shape[1], sample["image"].shape[0]
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
# resize sample
|
| 168 |
+
sample["image"] = cv2.resize(
|
| 169 |
+
sample["image"],
|
| 170 |
+
(width, height),
|
| 171 |
+
interpolation=self.__image_interpolation_method,
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
if self.__resize_target:
|
| 175 |
+
if "disparity" in sample:
|
| 176 |
+
sample["disparity"] = cv2.resize(
|
| 177 |
+
sample["disparity"],
|
| 178 |
+
(width, height),
|
| 179 |
+
interpolation=cv2.INTER_NEAREST,
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
if "depth" in sample:
|
| 183 |
+
sample["depth"] = cv2.resize(
|
| 184 |
+
sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
sample["mask"] = cv2.resize(
|
| 188 |
+
sample["mask"].astype(np.float32),
|
| 189 |
+
(width, height),
|
| 190 |
+
interpolation=cv2.INTER_NEAREST,
|
| 191 |
+
)
|
| 192 |
+
sample["mask"] = sample["mask"].astype(bool)
|
| 193 |
+
|
| 194 |
+
return sample
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
class NormalizeImage(object):
|
| 198 |
+
"""Normlize image by given mean and std.
|
| 199 |
+
"""
|
| 200 |
+
|
| 201 |
+
def __init__(self, mean, std):
|
| 202 |
+
self.__mean = mean
|
| 203 |
+
self.__std = std
|
| 204 |
+
|
| 205 |
+
def __call__(self, sample):
|
| 206 |
+
sample["image"] = (sample["image"] - self.__mean) / self.__std
|
| 207 |
+
|
| 208 |
+
return sample
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
class PrepareForNet(object):
|
| 212 |
+
"""Prepare sample for usage as network input.
|
| 213 |
+
"""
|
| 214 |
+
|
| 215 |
+
def __init__(self):
|
| 216 |
+
pass
|
| 217 |
+
|
| 218 |
+
def __call__(self, sample):
|
| 219 |
+
image = np.transpose(sample["image"], (2, 0, 1))
|
| 220 |
+
sample["image"] = np.ascontiguousarray(image).astype(np.float32)
|
| 221 |
+
|
| 222 |
+
if "mask" in sample:
|
| 223 |
+
sample["mask"] = sample["mask"].astype(np.float32)
|
| 224 |
+
sample["mask"] = np.ascontiguousarray(sample["mask"])
|
| 225 |
+
|
| 226 |
+
if "disparity" in sample:
|
| 227 |
+
disparity = sample["disparity"].astype(np.float32)
|
| 228 |
+
sample["disparity"] = np.ascontiguousarray(disparity)
|
| 229 |
+
|
| 230 |
+
if "depth" in sample:
|
| 231 |
+
depth = sample["depth"].astype(np.float32)
|
| 232 |
+
sample["depth"] = np.ascontiguousarray(depth)
|
| 233 |
+
|
| 234 |
+
return sample
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/utils.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import sys
|
| 3 |
+
import cv2
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def write_pfm(path, image, scale=1):
|
| 7 |
+
"""Write pfm file.
|
| 8 |
+
Args:
|
| 9 |
+
path (str): pathto file
|
| 10 |
+
image (array): data
|
| 11 |
+
scale (int, optional): Scale. Defaults to 1.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
with open(path, "wb") as file:
|
| 15 |
+
color = None
|
| 16 |
+
|
| 17 |
+
if image.dtype.name != "float32":
|
| 18 |
+
raise Exception("Image dtype must be float32.")
|
| 19 |
+
|
| 20 |
+
image = np.flipud(image)
|
| 21 |
+
|
| 22 |
+
if len(image.shape) == 3 and image.shape[2] == 3: # color image
|
| 23 |
+
color = True
|
| 24 |
+
elif (
|
| 25 |
+
len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1
|
| 26 |
+
): # greyscale
|
| 27 |
+
color = False
|
| 28 |
+
else:
|
| 29 |
+
raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.")
|
| 30 |
+
|
| 31 |
+
file.write("PF\n" if color else "Pf\n".encode())
|
| 32 |
+
file.write("%d %d\n".encode() % (image.shape[1], image.shape[0]))
|
| 33 |
+
|
| 34 |
+
endian = image.dtype.byteorder
|
| 35 |
+
|
| 36 |
+
if endian == "<" or endian == "=" and sys.byteorder == "little":
|
| 37 |
+
scale = -scale
|
| 38 |
+
|
| 39 |
+
file.write("%f\n".encode() % scale)
|
| 40 |
+
|
| 41 |
+
image.tofile(file)
|
| 42 |
+
|
| 43 |
+
def read_image(path):
|
| 44 |
+
"""Read image and output RGB image (0-1).
|
| 45 |
+
Args:
|
| 46 |
+
path (str): path to file
|
| 47 |
+
Returns:
|
| 48 |
+
array: RGB image (0-1)
|
| 49 |
+
"""
|
| 50 |
+
img = cv2.imread(path)
|
| 51 |
+
|
| 52 |
+
if img.ndim == 2:
|
| 53 |
+
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
| 54 |
+
|
| 55 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
|
| 56 |
+
|
| 57 |
+
return img
|
| 58 |
+
|
| 59 |
+
def write_depth(path, depth, bits=1):
|
| 60 |
+
"""Write depth map to pfm and png file.
|
| 61 |
+
Args:
|
| 62 |
+
path (str): filepath without extension
|
| 63 |
+
depth (array): depth
|
| 64 |
+
"""
|
| 65 |
+
write_pfm(path + ".pfm", depth.astype(np.float32))
|
| 66 |
+
|
| 67 |
+
depth_min = depth.min()
|
| 68 |
+
depth_max = depth.max()
|
| 69 |
+
|
| 70 |
+
max_val = (2**(8*bits))-1
|
| 71 |
+
|
| 72 |
+
if depth_max - depth_min > np.finfo("float").eps:
|
| 73 |
+
out = max_val * (depth - depth_min) / (depth_max - depth_min)
|
| 74 |
+
else:
|
| 75 |
+
out = 0
|
| 76 |
+
|
| 77 |
+
if bits == 1:
|
| 78 |
+
cv2.imwrite(path + ".png", out.astype("uint8"))
|
| 79 |
+
elif bits == 2:
|
| 80 |
+
cv2.imwrite(path + ".png", out.astype("uint16"))
|
| 81 |
+
|
| 82 |
+
return
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/utils.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Utils for monoDepth.
|
| 2 |
+
"""
|
| 3 |
+
import sys
|
| 4 |
+
import re
|
| 5 |
+
import numpy as np
|
| 6 |
+
import cv2
|
| 7 |
+
import torch
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def read_pfm(path):
|
| 11 |
+
"""Read pfm file.
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
path (str): path to file
|
| 15 |
+
|
| 16 |
+
Returns:
|
| 17 |
+
tuple: (data, scale)
|
| 18 |
+
"""
|
| 19 |
+
with open(path, "rb") as file:
|
| 20 |
+
|
| 21 |
+
color = None
|
| 22 |
+
width = None
|
| 23 |
+
height = None
|
| 24 |
+
scale = None
|
| 25 |
+
endian = None
|
| 26 |
+
|
| 27 |
+
header = file.readline().rstrip()
|
| 28 |
+
if header.decode("ascii") == "PF":
|
| 29 |
+
color = True
|
| 30 |
+
elif header.decode("ascii") == "Pf":
|
| 31 |
+
color = False
|
| 32 |
+
else:
|
| 33 |
+
raise Exception("Not a PFM file: " + path)
|
| 34 |
+
|
| 35 |
+
dim_match = re.match(r"^(\d+)\s(\d+)\s$", file.readline().decode("ascii"))
|
| 36 |
+
if dim_match:
|
| 37 |
+
width, height = list(map(int, dim_match.groups()))
|
| 38 |
+
else:
|
| 39 |
+
raise Exception("Malformed PFM header.")
|
| 40 |
+
|
| 41 |
+
scale = float(file.readline().decode("ascii").rstrip())
|
| 42 |
+
if scale < 0:
|
| 43 |
+
# little-endian
|
| 44 |
+
endian = "<"
|
| 45 |
+
scale = -scale
|
| 46 |
+
else:
|
| 47 |
+
# big-endian
|
| 48 |
+
endian = ">"
|
| 49 |
+
|
| 50 |
+
data = np.fromfile(file, endian + "f")
|
| 51 |
+
shape = (height, width, 3) if color else (height, width)
|
| 52 |
+
|
| 53 |
+
data = np.reshape(data, shape)
|
| 54 |
+
data = np.flipud(data)
|
| 55 |
+
|
| 56 |
+
return data, scale
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def write_pfm(path, image, scale=1):
|
| 60 |
+
"""Write pfm file.
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
path (str): pathto file
|
| 64 |
+
image (array): data
|
| 65 |
+
scale (int, optional): Scale. Defaults to 1.
|
| 66 |
+
"""
|
| 67 |
+
|
| 68 |
+
with open(path, "wb") as file:
|
| 69 |
+
color = None
|
| 70 |
+
|
| 71 |
+
if image.dtype.name != "float32":
|
| 72 |
+
raise Exception("Image dtype must be float32.")
|
| 73 |
+
|
| 74 |
+
image = np.flipud(image)
|
| 75 |
+
|
| 76 |
+
if len(image.shape) == 3 and image.shape[2] == 3: # color image
|
| 77 |
+
color = True
|
| 78 |
+
elif (
|
| 79 |
+
len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1
|
| 80 |
+
): # greyscale
|
| 81 |
+
color = False
|
| 82 |
+
else:
|
| 83 |
+
raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.")
|
| 84 |
+
|
| 85 |
+
file.write("PF\n" if color else "Pf\n".encode())
|
| 86 |
+
file.write("%d %d\n".encode() % (image.shape[1], image.shape[0]))
|
| 87 |
+
|
| 88 |
+
endian = image.dtype.byteorder
|
| 89 |
+
|
| 90 |
+
if endian == "<" or endian == "=" and sys.byteorder == "little":
|
| 91 |
+
scale = -scale
|
| 92 |
+
|
| 93 |
+
file.write("%f\n".encode() % scale)
|
| 94 |
+
|
| 95 |
+
image.tofile(file)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def read_image(path):
|
| 99 |
+
"""Read image and output RGB image (0-1).
|
| 100 |
+
|
| 101 |
+
Args:
|
| 102 |
+
path (str): path to file
|
| 103 |
+
|
| 104 |
+
Returns:
|
| 105 |
+
array: RGB image (0-1)
|
| 106 |
+
"""
|
| 107 |
+
img = cv2.imread(path)
|
| 108 |
+
|
| 109 |
+
if img.ndim == 2:
|
| 110 |
+
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
| 111 |
+
|
| 112 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
|
| 113 |
+
|
| 114 |
+
return img
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def resize_image(img):
|
| 118 |
+
"""Resize image and make it fit for network.
|
| 119 |
+
|
| 120 |
+
Args:
|
| 121 |
+
img (array): image
|
| 122 |
+
|
| 123 |
+
Returns:
|
| 124 |
+
tensor: data ready for network
|
| 125 |
+
"""
|
| 126 |
+
height_orig = img.shape[0]
|
| 127 |
+
width_orig = img.shape[1]
|
| 128 |
+
|
| 129 |
+
if width_orig > height_orig:
|
| 130 |
+
scale = width_orig / 384
|
| 131 |
+
else:
|
| 132 |
+
scale = height_orig / 384
|
| 133 |
+
|
| 134 |
+
height = (np.ceil(height_orig / scale / 32) * 32).astype(int)
|
| 135 |
+
width = (np.ceil(width_orig / scale / 32) * 32).astype(int)
|
| 136 |
+
|
| 137 |
+
img_resized = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)
|
| 138 |
+
|
| 139 |
+
img_resized = (
|
| 140 |
+
torch.from_numpy(np.transpose(img_resized, (2, 0, 1))).contiguous().float()
|
| 141 |
+
)
|
| 142 |
+
img_resized = img_resized.unsqueeze(0)
|
| 143 |
+
|
| 144 |
+
return img_resized
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def resize_depth(depth, width, height):
|
| 148 |
+
"""Resize depth map and bring to CPU (numpy).
|
| 149 |
+
|
| 150 |
+
Args:
|
| 151 |
+
depth (tensor): depth
|
| 152 |
+
width (int): image width
|
| 153 |
+
height (int): image height
|
| 154 |
+
|
| 155 |
+
Returns:
|
| 156 |
+
array: processed depth
|
| 157 |
+
"""
|
| 158 |
+
depth = torch.squeeze(depth[0, :, :, :]).to("cpu")
|
| 159 |
+
|
| 160 |
+
depth_resized = cv2.resize(
|
| 161 |
+
depth.numpy(), (width, height), interpolation=cv2.INTER_CUBIC
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
return depth_resized
|
| 165 |
+
|
| 166 |
+
def write_depth(path, depth, grayscale, bits=1):
|
| 167 |
+
"""Write depth map to png file.
|
| 168 |
+
|
| 169 |
+
Args:
|
| 170 |
+
path (str): filepath without extension
|
| 171 |
+
depth (array): depth
|
| 172 |
+
grayscale (bool): use a grayscale colormap?
|
| 173 |
+
"""
|
| 174 |
+
if not grayscale:
|
| 175 |
+
bits = 1
|
| 176 |
+
|
| 177 |
+
if not np.isfinite(depth).all():
|
| 178 |
+
depth=np.nan_to_num(depth, nan=0.0, posinf=0.0, neginf=0.0)
|
| 179 |
+
print("WARNING: Non-finite depth values present")
|
| 180 |
+
|
| 181 |
+
depth_min = depth.min()
|
| 182 |
+
depth_max = depth.max()
|
| 183 |
+
|
| 184 |
+
max_val = (2**(8*bits))-1
|
| 185 |
+
|
| 186 |
+
if depth_max - depth_min > np.finfo("float").eps:
|
| 187 |
+
out = max_val * (depth - depth_min) / (depth_max - depth_min)
|
| 188 |
+
else:
|
| 189 |
+
out = np.zeros(depth.shape, dtype=depth.dtype)
|
| 190 |
+
|
| 191 |
+
if not grayscale:
|
| 192 |
+
out = cv2.applyColorMap(np.uint8(out), cv2.COLORMAP_INFERNO)
|
| 193 |
+
|
| 194 |
+
if bits == 1:
|
| 195 |
+
cv2.imwrite(path + ".png", out.astype("uint8"))
|
| 196 |
+
elif bits == 2:
|
| 197 |
+
cv2.imwrite(path + ".png", out.astype("uint16"))
|
| 198 |
+
|
| 199 |
+
return
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/base_models/midas_repo/weights/.placeholder
ADDED
|
File without changes
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/builder.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
| 25 |
+
from importlib import import_module
|
| 26 |
+
from .depth_model import DepthModel
|
| 27 |
+
|
| 28 |
+
def build_model(config) -> DepthModel:
|
| 29 |
+
"""Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface.
|
| 30 |
+
This function should be used to construct models for training and evaluation.
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder.
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
torch.nn.Module: Model corresponding to name and version as specified in config
|
| 37 |
+
"""
|
| 38 |
+
module_name = f"zoedepth.models.{config.model}"
|
| 39 |
+
try:
|
| 40 |
+
module = import_module(module_name)
|
| 41 |
+
except ModuleNotFoundError as e:
|
| 42 |
+
# print the original error message
|
| 43 |
+
print(e)
|
| 44 |
+
raise ValueError(
|
| 45 |
+
f"Model {config.model} not found. Refer above error for details.") from e
|
| 46 |
+
try:
|
| 47 |
+
get_version = getattr(module, "get_version")
|
| 48 |
+
except AttributeError as e:
|
| 49 |
+
raise ValueError(
|
| 50 |
+
f"Model {config.model} has no get_version function.") from e
|
| 51 |
+
return get_version(config.version_name).build_from_config(config)
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/depth_model.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
| 25 |
+
import numpy as np
|
| 26 |
+
import torch
|
| 27 |
+
import torch.nn as nn
|
| 28 |
+
import torch.nn.functional as F
|
| 29 |
+
from torchvision import transforms
|
| 30 |
+
import PIL.Image
|
| 31 |
+
from PIL import Image
|
| 32 |
+
from typing import Union
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class DepthModel(nn.Module):
|
| 36 |
+
def __init__(self):
|
| 37 |
+
super().__init__()
|
| 38 |
+
self.device = 'cpu'
|
| 39 |
+
|
| 40 |
+
def to(self, device) -> nn.Module:
|
| 41 |
+
self.device = device
|
| 42 |
+
return super().to(device)
|
| 43 |
+
|
| 44 |
+
def forward(self, x, *args, **kwargs):
|
| 45 |
+
raise NotImplementedError
|
| 46 |
+
|
| 47 |
+
def _infer(self, x: torch.Tensor):
|
| 48 |
+
"""
|
| 49 |
+
Inference interface for the model
|
| 50 |
+
Args:
|
| 51 |
+
x (torch.Tensor): input tensor of shape (b, c, h, w)
|
| 52 |
+
Returns:
|
| 53 |
+
torch.Tensor: output tensor of shape (b, 1, h, w)
|
| 54 |
+
"""
|
| 55 |
+
return self(x)['metric_depth']
|
| 56 |
+
|
| 57 |
+
def _infer_with_pad_aug(self, x: torch.Tensor, pad_input: bool=True, fh: float=3, fw: float=3, upsampling_mode: str='bicubic', padding_mode="reflect", **kwargs) -> torch.Tensor:
|
| 58 |
+
"""
|
| 59 |
+
Inference interface for the model with padding augmentation
|
| 60 |
+
Padding augmentation fixes the boundary artifacts in the output depth map.
|
| 61 |
+
Boundary artifacts are sometimes caused by the fact that the model is trained on NYU raw dataset which has a black or white border around the image.
|
| 62 |
+
This augmentation pads the input image and crops the prediction back to the original size / view.
|
| 63 |
+
|
| 64 |
+
Note: This augmentation is not required for the models trained with 'avoid_boundary'=True.
|
| 65 |
+
Args:
|
| 66 |
+
x (torch.Tensor): input tensor of shape (b, c, h, w)
|
| 67 |
+
pad_input (bool, optional): whether to pad the input or not. Defaults to True.
|
| 68 |
+
fh (float, optional): height padding factor. The padding is calculated as sqrt(h/2) * fh. Defaults to 3.
|
| 69 |
+
fw (float, optional): width padding factor. The padding is calculated as sqrt(w/2) * fw. Defaults to 3.
|
| 70 |
+
upsampling_mode (str, optional): upsampling mode. Defaults to 'bicubic'.
|
| 71 |
+
padding_mode (str, optional): padding mode. Defaults to "reflect".
|
| 72 |
+
Returns:
|
| 73 |
+
torch.Tensor: output tensor of shape (b, 1, h, w)
|
| 74 |
+
"""
|
| 75 |
+
# assert x is nchw and c = 3
|
| 76 |
+
assert x.dim() == 4, "x must be 4 dimensional, got {}".format(x.dim())
|
| 77 |
+
assert x.shape[1] == 3, "x must have 3 channels, got {}".format(x.shape[1])
|
| 78 |
+
|
| 79 |
+
if pad_input:
|
| 80 |
+
assert fh > 0 or fw > 0, "atlease one of fh and fw must be greater than 0"
|
| 81 |
+
pad_h = int(np.sqrt(x.shape[2]/2) * fh)
|
| 82 |
+
pad_w = int(np.sqrt(x.shape[3]/2) * fw)
|
| 83 |
+
padding = [pad_w, pad_w]
|
| 84 |
+
if pad_h > 0:
|
| 85 |
+
padding += [pad_h, pad_h]
|
| 86 |
+
|
| 87 |
+
x = F.pad(x, padding, mode=padding_mode, **kwargs)
|
| 88 |
+
out = self._infer(x)
|
| 89 |
+
if out.shape[-2:] != x.shape[-2:]:
|
| 90 |
+
out = F.interpolate(out, size=(x.shape[2], x.shape[3]), mode=upsampling_mode, align_corners=False)
|
| 91 |
+
if pad_input:
|
| 92 |
+
# crop to the original size, handling the case where pad_h and pad_w is 0
|
| 93 |
+
if pad_h > 0:
|
| 94 |
+
out = out[:, :, pad_h:-pad_h,:]
|
| 95 |
+
if pad_w > 0:
|
| 96 |
+
out = out[:, :, :, pad_w:-pad_w]
|
| 97 |
+
return out
|
| 98 |
+
|
| 99 |
+
def infer_with_flip_aug(self, x, pad_input: bool=True, **kwargs) -> torch.Tensor:
|
| 100 |
+
"""
|
| 101 |
+
Inference interface for the model with horizontal flip augmentation
|
| 102 |
+
Horizontal flip augmentation improves the accuracy of the model by averaging the output of the model with and without horizontal flip.
|
| 103 |
+
Args:
|
| 104 |
+
x (torch.Tensor): input tensor of shape (b, c, h, w)
|
| 105 |
+
pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
|
| 106 |
+
Returns:
|
| 107 |
+
torch.Tensor: output tensor of shape (b, 1, h, w)
|
| 108 |
+
"""
|
| 109 |
+
# infer with horizontal flip and average
|
| 110 |
+
out = self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs)
|
| 111 |
+
out_flip = self._infer_with_pad_aug(torch.flip(x, dims=[3]), pad_input=pad_input, **kwargs)
|
| 112 |
+
out = (out + torch.flip(out_flip, dims=[3])) / 2
|
| 113 |
+
return out
|
| 114 |
+
|
| 115 |
+
def infer(self, x, pad_input: bool=True, with_flip_aug: bool=True, **kwargs) -> torch.Tensor:
|
| 116 |
+
"""
|
| 117 |
+
Inference interface for the model
|
| 118 |
+
Args:
|
| 119 |
+
x (torch.Tensor): input tensor of shape (b, c, h, w)
|
| 120 |
+
pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
|
| 121 |
+
with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True.
|
| 122 |
+
Returns:
|
| 123 |
+
torch.Tensor: output tensor of shape (b, 1, h, w)
|
| 124 |
+
"""
|
| 125 |
+
if with_flip_aug:
|
| 126 |
+
return self.infer_with_flip_aug(x, pad_input=pad_input, **kwargs)
|
| 127 |
+
else:
|
| 128 |
+
return self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs)
|
| 129 |
+
|
| 130 |
+
@torch.no_grad()
|
| 131 |
+
def infer_pil(self, pil_img, pad_input: bool=True, with_flip_aug: bool=True, output_type: str="numpy", **kwargs) -> Union[np.ndarray, PIL.Image.Image, torch.Tensor]:
|
| 132 |
+
"""
|
| 133 |
+
Inference interface for the model for PIL image
|
| 134 |
+
Args:
|
| 135 |
+
pil_img (PIL.Image.Image): input PIL image
|
| 136 |
+
pad_input (bool, optional): whether to use padding augmentation. Defaults to True.
|
| 137 |
+
with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True.
|
| 138 |
+
output_type (str, optional): output type. Supported values are 'numpy', 'pil' and 'tensor'. Defaults to "numpy".
|
| 139 |
+
"""
|
| 140 |
+
x = transforms.ToTensor()(pil_img).unsqueeze(0).to(self.device)
|
| 141 |
+
out_tensor = self.infer(x, pad_input=pad_input, with_flip_aug=with_flip_aug, **kwargs)
|
| 142 |
+
if output_type == "numpy":
|
| 143 |
+
return out_tensor.squeeze().cpu().numpy()
|
| 144 |
+
elif output_type == "pil":
|
| 145 |
+
# uint16 is required for depth pil image
|
| 146 |
+
out_16bit_numpy = (out_tensor.squeeze().cpu().numpy()*256).astype(np.uint16)
|
| 147 |
+
return Image.fromarray(out_16bit_numpy)
|
| 148 |
+
elif output_type == "tensor":
|
| 149 |
+
return out_tensor.squeeze().cpu()
|
| 150 |
+
else:
|
| 151 |
+
raise ValueError(f"output_type {output_type} not supported. Supported values are 'numpy', 'pil' and 'tensor'")
|
| 152 |
+
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/attractor.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
| 25 |
+
import torch
|
| 26 |
+
import torch.nn as nn
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@torch.jit.script
|
| 30 |
+
def exp_attractor(dx, alpha: float = 300, gamma: int = 2):
|
| 31 |
+
"""Exponential attractor: dc = exp(-alpha*|dx|^gamma) * dx , where dx = a - c, a = attractor point, c = bin center, dc = shift in bin centermmary for exp_attractor
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center.
|
| 35 |
+
alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300.
|
| 36 |
+
gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2.
|
| 37 |
+
|
| 38 |
+
Returns:
|
| 39 |
+
torch.Tensor : Delta shifts - dc; New bin centers = Old bin centers + dc
|
| 40 |
+
"""
|
| 41 |
+
return torch.exp(-alpha*(torch.abs(dx)**gamma)) * (dx)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
@torch.jit.script
|
| 45 |
+
def inv_attractor(dx, alpha: float = 300, gamma: int = 2):
|
| 46 |
+
"""Inverse attractor: dc = dx / (1 + alpha*dx^gamma), where dx = a - c, a = attractor point, c = bin center, dc = shift in bin center
|
| 47 |
+
This is the default one according to the accompanying paper.
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center.
|
| 51 |
+
alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300.
|
| 52 |
+
gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2.
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
torch.Tensor: Delta shifts - dc; New bin centers = Old bin centers + dc
|
| 56 |
+
"""
|
| 57 |
+
return dx.div(1+alpha*dx.pow(gamma))
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class AttractorLayer(nn.Module):
|
| 61 |
+
def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10,
|
| 62 |
+
alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False):
|
| 63 |
+
"""
|
| 64 |
+
Attractor layer for bin centers. Bin centers are bounded on the interval (min_depth, max_depth)
|
| 65 |
+
"""
|
| 66 |
+
super().__init__()
|
| 67 |
+
|
| 68 |
+
self.n_attractors = n_attractors
|
| 69 |
+
self.n_bins = n_bins
|
| 70 |
+
self.min_depth = min_depth
|
| 71 |
+
self.max_depth = max_depth
|
| 72 |
+
self.alpha = alpha
|
| 73 |
+
self.gamma = gamma
|
| 74 |
+
self.kind = kind
|
| 75 |
+
self.attractor_type = attractor_type
|
| 76 |
+
self.memory_efficient = memory_efficient
|
| 77 |
+
|
| 78 |
+
self._net = nn.Sequential(
|
| 79 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
| 80 |
+
nn.ReLU(inplace=True),
|
| 81 |
+
nn.Conv2d(mlp_dim, n_attractors*2, 1, 1, 0), # x2 for linear norm
|
| 82 |
+
nn.ReLU(inplace=True)
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
|
| 86 |
+
"""
|
| 87 |
+
Args:
|
| 88 |
+
x (torch.Tensor) : feature block; shape - n, c, h, w
|
| 89 |
+
b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w
|
| 90 |
+
|
| 91 |
+
Returns:
|
| 92 |
+
tuple(torch.Tensor,torch.Tensor) : new bin centers normed and scaled; shape - n, nbins, h, w
|
| 93 |
+
"""
|
| 94 |
+
if prev_b_embedding is not None:
|
| 95 |
+
if interpolate:
|
| 96 |
+
prev_b_embedding = nn.functional.interpolate(
|
| 97 |
+
prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
|
| 98 |
+
x = x + prev_b_embedding
|
| 99 |
+
|
| 100 |
+
A = self._net(x)
|
| 101 |
+
eps = 1e-3
|
| 102 |
+
A = A + eps
|
| 103 |
+
n, c, h, w = A.shape
|
| 104 |
+
A = A.view(n, self.n_attractors, 2, h, w)
|
| 105 |
+
A_normed = A / A.sum(dim=2, keepdim=True) # n, a, 2, h, w
|
| 106 |
+
A_normed = A[:, :, 0, ...] # n, na, h, w
|
| 107 |
+
|
| 108 |
+
b_prev = nn.functional.interpolate(
|
| 109 |
+
b_prev, (h, w), mode='bilinear', align_corners=True)
|
| 110 |
+
b_centers = b_prev
|
| 111 |
+
|
| 112 |
+
if self.attractor_type == 'exp':
|
| 113 |
+
dist = exp_attractor
|
| 114 |
+
else:
|
| 115 |
+
dist = inv_attractor
|
| 116 |
+
|
| 117 |
+
if not self.memory_efficient:
|
| 118 |
+
func = {'mean': torch.mean, 'sum': torch.sum}[self.kind]
|
| 119 |
+
# .shape N, nbins, h, w
|
| 120 |
+
delta_c = func(dist(A_normed.unsqueeze(
|
| 121 |
+
2) - b_centers.unsqueeze(1)), dim=1)
|
| 122 |
+
else:
|
| 123 |
+
delta_c = torch.zeros_like(b_centers, device=b_centers.device)
|
| 124 |
+
for i in range(self.n_attractors):
|
| 125 |
+
# .shape N, nbins, h, w
|
| 126 |
+
delta_c += dist(A_normed[:, i, ...].unsqueeze(1) - b_centers)
|
| 127 |
+
|
| 128 |
+
if self.kind == 'mean':
|
| 129 |
+
delta_c = delta_c / self.n_attractors
|
| 130 |
+
|
| 131 |
+
b_new_centers = b_centers + delta_c
|
| 132 |
+
B_centers = (self.max_depth - self.min_depth) * \
|
| 133 |
+
b_new_centers + self.min_depth
|
| 134 |
+
B_centers, _ = torch.sort(B_centers, dim=1)
|
| 135 |
+
B_centers = torch.clip(B_centers, self.min_depth, self.max_depth)
|
| 136 |
+
return b_new_centers, B_centers
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
class AttractorLayerUnnormed(nn.Module):
|
| 140 |
+
def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10,
|
| 141 |
+
alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False):
|
| 142 |
+
"""
|
| 143 |
+
Attractor layer for bin centers. Bin centers are unbounded
|
| 144 |
+
"""
|
| 145 |
+
super().__init__()
|
| 146 |
+
|
| 147 |
+
self.n_attractors = n_attractors
|
| 148 |
+
self.n_bins = n_bins
|
| 149 |
+
self.min_depth = min_depth
|
| 150 |
+
self.max_depth = max_depth
|
| 151 |
+
self.alpha = alpha
|
| 152 |
+
self.gamma = gamma
|
| 153 |
+
self.kind = kind
|
| 154 |
+
self.attractor_type = attractor_type
|
| 155 |
+
self.memory_efficient = memory_efficient
|
| 156 |
+
|
| 157 |
+
self._net = nn.Sequential(
|
| 158 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
| 159 |
+
nn.ReLU(inplace=True),
|
| 160 |
+
nn.Conv2d(mlp_dim, n_attractors, 1, 1, 0),
|
| 161 |
+
nn.Softplus()
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
|
| 165 |
+
"""
|
| 166 |
+
Args:
|
| 167 |
+
x (torch.Tensor) : feature block; shape - n, c, h, w
|
| 168 |
+
b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w
|
| 169 |
+
|
| 170 |
+
Returns:
|
| 171 |
+
tuple(torch.Tensor,torch.Tensor) : new bin centers unbounded; shape - n, nbins, h, w. Two outputs just to keep the API consistent with the normed version
|
| 172 |
+
"""
|
| 173 |
+
if prev_b_embedding is not None:
|
| 174 |
+
if interpolate:
|
| 175 |
+
prev_b_embedding = nn.functional.interpolate(
|
| 176 |
+
prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
|
| 177 |
+
x = x + prev_b_embedding
|
| 178 |
+
|
| 179 |
+
A = self._net(x)
|
| 180 |
+
n, c, h, w = A.shape
|
| 181 |
+
|
| 182 |
+
b_prev = nn.functional.interpolate(
|
| 183 |
+
b_prev, (h, w), mode='bilinear', align_corners=True)
|
| 184 |
+
b_centers = b_prev
|
| 185 |
+
|
| 186 |
+
if self.attractor_type == 'exp':
|
| 187 |
+
dist = exp_attractor
|
| 188 |
+
else:
|
| 189 |
+
dist = inv_attractor
|
| 190 |
+
|
| 191 |
+
if not self.memory_efficient:
|
| 192 |
+
func = {'mean': torch.mean, 'sum': torch.sum}[self.kind]
|
| 193 |
+
# .shape N, nbins, h, w
|
| 194 |
+
delta_c = func(
|
| 195 |
+
dist(A.unsqueeze(2) - b_centers.unsqueeze(1)), dim=1)
|
| 196 |
+
else:
|
| 197 |
+
delta_c = torch.zeros_like(b_centers, device=b_centers.device)
|
| 198 |
+
for i in range(self.n_attractors):
|
| 199 |
+
delta_c += dist(A[:, i, ...].unsqueeze(1) -
|
| 200 |
+
b_centers) # .shape N, nbins, h, w
|
| 201 |
+
|
| 202 |
+
if self.kind == 'mean':
|
| 203 |
+
delta_c = delta_c / self.n_attractors
|
| 204 |
+
|
| 205 |
+
b_new_centers = b_centers + delta_c
|
| 206 |
+
B_centers = b_new_centers
|
| 207 |
+
|
| 208 |
+
return b_new_centers, B_centers
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/dist_layers.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
| 25 |
+
import torch
|
| 26 |
+
import torch.nn as nn
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def log_binom(n, k, eps=1e-7):
|
| 30 |
+
""" log(nCk) using stirling approximation """
|
| 31 |
+
n = n + eps
|
| 32 |
+
k = k + eps
|
| 33 |
+
return n * torch.log(n) - k * torch.log(k) - (n-k) * torch.log(n-k+eps)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class LogBinomial(nn.Module):
|
| 37 |
+
def __init__(self, n_classes=256, act=torch.softmax):
|
| 38 |
+
"""Compute log binomial distribution for n_classes
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
n_classes (int, optional): number of output classes. Defaults to 256.
|
| 42 |
+
"""
|
| 43 |
+
super().__init__()
|
| 44 |
+
self.K = n_classes
|
| 45 |
+
self.act = act
|
| 46 |
+
self.register_buffer('k_idx', torch.arange(
|
| 47 |
+
0, n_classes).view(1, -1, 1, 1))
|
| 48 |
+
self.register_buffer('K_minus_1', torch.Tensor(
|
| 49 |
+
[self.K-1]).view(1, -1, 1, 1))
|
| 50 |
+
|
| 51 |
+
def forward(self, x, t=1., eps=1e-4):
|
| 52 |
+
"""Compute log binomial distribution for x
|
| 53 |
+
|
| 54 |
+
Args:
|
| 55 |
+
x (torch.Tensor - NCHW): probabilities
|
| 56 |
+
t (float, torch.Tensor - NCHW, optional): Temperature of distribution. Defaults to 1..
|
| 57 |
+
eps (float, optional): Small number for numerical stability. Defaults to 1e-4.
|
| 58 |
+
|
| 59 |
+
Returns:
|
| 60 |
+
torch.Tensor -NCHW: log binomial distribution logbinomial(p;t)
|
| 61 |
+
"""
|
| 62 |
+
if x.ndim == 3:
|
| 63 |
+
x = x.unsqueeze(1) # make it nchw
|
| 64 |
+
|
| 65 |
+
one_minus_x = torch.clamp(1 - x, eps, 1)
|
| 66 |
+
x = torch.clamp(x, eps, 1)
|
| 67 |
+
y = log_binom(self.K_minus_1, self.k_idx) + self.k_idx * \
|
| 68 |
+
torch.log(x) + (self.K - 1 - self.k_idx) * torch.log(one_minus_x)
|
| 69 |
+
return self.act(y/t, dim=1)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
class ConditionalLogBinomial(nn.Module):
|
| 73 |
+
def __init__(self, in_features, condition_dim, n_classes=256, bottleneck_factor=2, p_eps=1e-4, max_temp=50, min_temp=1e-7, act=torch.softmax):
|
| 74 |
+
"""Conditional Log Binomial distribution
|
| 75 |
+
|
| 76 |
+
Args:
|
| 77 |
+
in_features (int): number of input channels in main feature
|
| 78 |
+
condition_dim (int): number of input channels in condition feature
|
| 79 |
+
n_classes (int, optional): Number of classes. Defaults to 256.
|
| 80 |
+
bottleneck_factor (int, optional): Hidden dim factor. Defaults to 2.
|
| 81 |
+
p_eps (float, optional): small eps value. Defaults to 1e-4.
|
| 82 |
+
max_temp (float, optional): Maximum temperature of output distribution. Defaults to 50.
|
| 83 |
+
min_temp (float, optional): Minimum temperature of output distribution. Defaults to 1e-7.
|
| 84 |
+
"""
|
| 85 |
+
super().__init__()
|
| 86 |
+
self.p_eps = p_eps
|
| 87 |
+
self.max_temp = max_temp
|
| 88 |
+
self.min_temp = min_temp
|
| 89 |
+
self.log_binomial_transform = LogBinomial(n_classes, act=act)
|
| 90 |
+
bottleneck = (in_features + condition_dim) // bottleneck_factor
|
| 91 |
+
self.mlp = nn.Sequential(
|
| 92 |
+
nn.Conv2d(in_features + condition_dim, bottleneck,
|
| 93 |
+
kernel_size=1, stride=1, padding=0),
|
| 94 |
+
nn.GELU(),
|
| 95 |
+
# 2 for p linear norm, 2 for t linear norm
|
| 96 |
+
nn.Conv2d(bottleneck, 2+2, kernel_size=1, stride=1, padding=0),
|
| 97 |
+
nn.Softplus()
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
def forward(self, x, cond):
|
| 101 |
+
"""Forward pass
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
x (torch.Tensor - NCHW): Main feature
|
| 105 |
+
cond (torch.Tensor - NCHW): condition feature
|
| 106 |
+
|
| 107 |
+
Returns:
|
| 108 |
+
torch.Tensor: Output log binomial distribution
|
| 109 |
+
"""
|
| 110 |
+
pt = self.mlp(torch.concat((x, cond), dim=1))
|
| 111 |
+
p, t = pt[:, :2, ...], pt[:, 2:, ...]
|
| 112 |
+
|
| 113 |
+
p = p + self.p_eps
|
| 114 |
+
p = p[:, 0, ...] / (p[:, 0, ...] + p[:, 1, ...])
|
| 115 |
+
|
| 116 |
+
t = t + self.p_eps
|
| 117 |
+
t = t[:, 0, ...] / (t[:, 0, ...] + t[:, 1, ...])
|
| 118 |
+
t = t.unsqueeze(1)
|
| 119 |
+
t = (self.max_temp - self.min_temp) * t + self.min_temp
|
| 120 |
+
|
| 121 |
+
return self.log_binomial_transform(p, t)
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/localbins_layers.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
| 25 |
+
import torch
|
| 26 |
+
import torch.nn as nn
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class SeedBinRegressor(nn.Module):
|
| 30 |
+
def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10):
|
| 31 |
+
"""Bin center regressor network. Bin centers are bounded on (min_depth, max_depth) interval.
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
in_features (int): input channels
|
| 35 |
+
n_bins (int, optional): Number of bin centers. Defaults to 16.
|
| 36 |
+
mlp_dim (int, optional): Hidden dimension. Defaults to 256.
|
| 37 |
+
min_depth (float, optional): Min depth value. Defaults to 1e-3.
|
| 38 |
+
max_depth (float, optional): Max depth value. Defaults to 10.
|
| 39 |
+
"""
|
| 40 |
+
super().__init__()
|
| 41 |
+
self.version = "1_1"
|
| 42 |
+
self.min_depth = min_depth
|
| 43 |
+
self.max_depth = max_depth
|
| 44 |
+
|
| 45 |
+
self._net = nn.Sequential(
|
| 46 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
| 47 |
+
nn.ReLU(inplace=True),
|
| 48 |
+
nn.Conv2d(mlp_dim, n_bins, 1, 1, 0),
|
| 49 |
+
nn.ReLU(inplace=True)
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
def forward(self, x):
|
| 53 |
+
"""
|
| 54 |
+
Returns tensor of bin_width vectors (centers). One vector b for every pixel
|
| 55 |
+
"""
|
| 56 |
+
B = self._net(x)
|
| 57 |
+
eps = 1e-3
|
| 58 |
+
B = B + eps
|
| 59 |
+
B_widths_normed = B / B.sum(dim=1, keepdim=True)
|
| 60 |
+
B_widths = (self.max_depth - self.min_depth) * \
|
| 61 |
+
B_widths_normed # .shape NCHW
|
| 62 |
+
# pad has the form (left, right, top, bottom, front, back)
|
| 63 |
+
B_widths = nn.functional.pad(
|
| 64 |
+
B_widths, (0, 0, 0, 0, 1, 0), mode='constant', value=self.min_depth)
|
| 65 |
+
B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW
|
| 66 |
+
|
| 67 |
+
B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:, 1:, ...])
|
| 68 |
+
return B_widths_normed, B_centers
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class SeedBinRegressorUnnormed(nn.Module):
|
| 72 |
+
def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10):
|
| 73 |
+
"""Bin center regressor network. Bin centers are unbounded
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
in_features (int): input channels
|
| 77 |
+
n_bins (int, optional): Number of bin centers. Defaults to 16.
|
| 78 |
+
mlp_dim (int, optional): Hidden dimension. Defaults to 256.
|
| 79 |
+
min_depth (float, optional): Not used. (for compatibility with SeedBinRegressor)
|
| 80 |
+
max_depth (float, optional): Not used. (for compatibility with SeedBinRegressor)
|
| 81 |
+
"""
|
| 82 |
+
super().__init__()
|
| 83 |
+
self.version = "1_1"
|
| 84 |
+
self._net = nn.Sequential(
|
| 85 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
| 86 |
+
nn.ReLU(inplace=True),
|
| 87 |
+
nn.Conv2d(mlp_dim, n_bins, 1, 1, 0),
|
| 88 |
+
nn.Softplus()
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
def forward(self, x):
|
| 92 |
+
"""
|
| 93 |
+
Returns tensor of bin_width vectors (centers). One vector b for every pixel
|
| 94 |
+
"""
|
| 95 |
+
B_centers = self._net(x)
|
| 96 |
+
return B_centers, B_centers
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
class Projector(nn.Module):
|
| 100 |
+
def __init__(self, in_features, out_features, mlp_dim=128):
|
| 101 |
+
"""Projector MLP
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
in_features (int): input channels
|
| 105 |
+
out_features (int): output channels
|
| 106 |
+
mlp_dim (int, optional): hidden dimension. Defaults to 128.
|
| 107 |
+
"""
|
| 108 |
+
super().__init__()
|
| 109 |
+
|
| 110 |
+
self._net = nn.Sequential(
|
| 111 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
| 112 |
+
nn.ReLU(inplace=True),
|
| 113 |
+
nn.Conv2d(mlp_dim, out_features, 1, 1, 0),
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
def forward(self, x):
|
| 117 |
+
return self._net(x)
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
class LinearSplitter(nn.Module):
|
| 122 |
+
def __init__(self, in_features, prev_nbins, split_factor=2, mlp_dim=128, min_depth=1e-3, max_depth=10):
|
| 123 |
+
super().__init__()
|
| 124 |
+
|
| 125 |
+
self.prev_nbins = prev_nbins
|
| 126 |
+
self.split_factor = split_factor
|
| 127 |
+
self.min_depth = min_depth
|
| 128 |
+
self.max_depth = max_depth
|
| 129 |
+
|
| 130 |
+
self._net = nn.Sequential(
|
| 131 |
+
nn.Conv2d(in_features, mlp_dim, 1, 1, 0),
|
| 132 |
+
nn.GELU(),
|
| 133 |
+
nn.Conv2d(mlp_dim, prev_nbins * split_factor, 1, 1, 0),
|
| 134 |
+
nn.ReLU()
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False):
|
| 138 |
+
"""
|
| 139 |
+
x : feature block; shape - n, c, h, w
|
| 140 |
+
b_prev : previous bin widths normed; shape - n, prev_nbins, h, w
|
| 141 |
+
"""
|
| 142 |
+
if prev_b_embedding is not None:
|
| 143 |
+
if interpolate:
|
| 144 |
+
prev_b_embedding = nn.functional.interpolate(prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True)
|
| 145 |
+
x = x + prev_b_embedding
|
| 146 |
+
S = self._net(x)
|
| 147 |
+
eps = 1e-3
|
| 148 |
+
S = S + eps
|
| 149 |
+
n, c, h, w = S.shape
|
| 150 |
+
S = S.view(n, self.prev_nbins, self.split_factor, h, w)
|
| 151 |
+
S_normed = S / S.sum(dim=2, keepdim=True) # fractional splits
|
| 152 |
+
|
| 153 |
+
b_prev = nn.functional.interpolate(b_prev, (h,w), mode='bilinear', align_corners=True)
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
b_prev = b_prev / b_prev.sum(dim=1, keepdim=True) # renormalize for gurantees
|
| 157 |
+
# print(b_prev.shape, S_normed.shape)
|
| 158 |
+
# if is_for_query:(1).expand(-1, b_prev.size(0)//n, -1, -1, -1, -1).flatten(0,1) # TODO ? can replace all this with a single torch.repeat?
|
| 159 |
+
b = b_prev.unsqueeze(2) * S_normed
|
| 160 |
+
b = b.flatten(1,2) # .shape n, prev_nbins * split_factor, h, w
|
| 161 |
+
|
| 162 |
+
# calculate bin centers for loss calculation
|
| 163 |
+
B_widths = (self.max_depth - self.min_depth) * b # .shape N, nprev * splitfactor, H, W
|
| 164 |
+
# pad has the form (left, right, top, bottom, front, back)
|
| 165 |
+
B_widths = nn.functional.pad(B_widths, (0,0,0,0,1,0), mode='constant', value=self.min_depth)
|
| 166 |
+
B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW
|
| 167 |
+
|
| 168 |
+
B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:,1:,...])
|
| 169 |
+
return b, B_centers
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/layers/patch_transformer.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
| 25 |
+
import torch
|
| 26 |
+
import torch.nn as nn
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class PatchTransformerEncoder(nn.Module):
|
| 30 |
+
def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False):
|
| 31 |
+
"""ViT-like transformer block
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
in_channels (int): Input channels
|
| 35 |
+
patch_size (int, optional): patch size. Defaults to 10.
|
| 36 |
+
embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128.
|
| 37 |
+
num_heads (int, optional): number of attention heads. Defaults to 4.
|
| 38 |
+
use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False.
|
| 39 |
+
"""
|
| 40 |
+
super(PatchTransformerEncoder, self).__init__()
|
| 41 |
+
self.use_class_token = use_class_token
|
| 42 |
+
encoder_layers = nn.TransformerEncoderLayer(
|
| 43 |
+
embedding_dim, num_heads, dim_feedforward=1024)
|
| 44 |
+
self.transformer_encoder = nn.TransformerEncoder(
|
| 45 |
+
encoder_layers, num_layers=4) # takes shape S,N,E
|
| 46 |
+
|
| 47 |
+
self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim,
|
| 48 |
+
kernel_size=patch_size, stride=patch_size, padding=0)
|
| 49 |
+
|
| 50 |
+
def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'):
|
| 51 |
+
"""Generate positional encodings
|
| 52 |
+
|
| 53 |
+
Args:
|
| 54 |
+
sequence_length (int): Sequence length
|
| 55 |
+
embedding_dim (int): Embedding dimension
|
| 56 |
+
|
| 57 |
+
Returns:
|
| 58 |
+
torch.Tensor SBE: Positional encodings
|
| 59 |
+
"""
|
| 60 |
+
position = torch.arange(
|
| 61 |
+
0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1)
|
| 62 |
+
index = torch.arange(
|
| 63 |
+
0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0)
|
| 64 |
+
div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim))
|
| 65 |
+
pos_encoding = position * div_term
|
| 66 |
+
pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1)
|
| 67 |
+
pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1)
|
| 68 |
+
return pos_encoding
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def forward(self, x):
|
| 72 |
+
"""Forward pass
|
| 73 |
+
|
| 74 |
+
Args:
|
| 75 |
+
x (torch.Tensor - NCHW): Input feature tensor
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim
|
| 79 |
+
"""
|
| 80 |
+
embeddings = self.embedding_convPxP(x).flatten(
|
| 81 |
+
2) # .shape = n,c,s = n, embedding_dim, s
|
| 82 |
+
if self.use_class_token:
|
| 83 |
+
# extra special token at start ?
|
| 84 |
+
embeddings = nn.functional.pad(embeddings, (1, 0))
|
| 85 |
+
|
| 86 |
+
# change to S,N,E format required by transformer
|
| 87 |
+
embeddings = embeddings.permute(2, 0, 1)
|
| 88 |
+
S, N, E = embeddings.shape
|
| 89 |
+
embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device)
|
| 90 |
+
x = self.transformer_encoder(embeddings) # .shape = S, N, E
|
| 91 |
+
return x
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/model_io.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
| 25 |
+
import torch
|
| 26 |
+
|
| 27 |
+
def load_state_dict(model, state_dict):
|
| 28 |
+
"""Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict.
|
| 29 |
+
|
| 30 |
+
DataParallel prefixes state_dict keys with 'module.' when saving.
|
| 31 |
+
If the model is not a DataParallel model but the state_dict is, then prefixes are removed.
|
| 32 |
+
If the model is a DataParallel model but the state_dict is not, then prefixes are added.
|
| 33 |
+
"""
|
| 34 |
+
state_dict = state_dict.get('model', state_dict)
|
| 35 |
+
# if model is a DataParallel model, then state_dict keys are prefixed with 'module.'
|
| 36 |
+
|
| 37 |
+
do_prefix = isinstance(
|
| 38 |
+
model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel))
|
| 39 |
+
state = {}
|
| 40 |
+
for k, v in state_dict.items():
|
| 41 |
+
if k.startswith('module.') and not do_prefix:
|
| 42 |
+
k = k[7:]
|
| 43 |
+
|
| 44 |
+
if not k.startswith('module.') and do_prefix:
|
| 45 |
+
k = 'module.' + k
|
| 46 |
+
|
| 47 |
+
state[k] = v
|
| 48 |
+
|
| 49 |
+
model.load_state_dict(state)
|
| 50 |
+
print("Loaded successfully")
|
| 51 |
+
return model
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def load_wts(model, checkpoint_path):
|
| 55 |
+
ckpt = torch.load(checkpoint_path, map_location='cpu')
|
| 56 |
+
return load_state_dict(model, ckpt)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def load_state_dict_from_url(model, url, **kwargs):
|
| 60 |
+
state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs)
|
| 61 |
+
return load_state_dict(model, state_dict)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def load_state_from_resource(model, resource: str):
|
| 65 |
+
"""Loads weights to the model from a given resource. A resource can be of following types:
|
| 66 |
+
1. URL. Prefixed with "url::"
|
| 67 |
+
e.g. url::http(s)://url.resource.com/ckpt.pt
|
| 68 |
+
|
| 69 |
+
2. Local path. Prefixed with "local::"
|
| 70 |
+
e.g. local::/path/to/ckpt.pt
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
Args:
|
| 74 |
+
model (torch.nn.Module): Model
|
| 75 |
+
resource (str): resource string
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
torch.nn.Module: Model with loaded weights
|
| 79 |
+
"""
|
| 80 |
+
print(f"Using pretrained resource {resource}")
|
| 81 |
+
|
| 82 |
+
if resource.startswith('url::'):
|
| 83 |
+
url = resource.split('url::')[1]
|
| 84 |
+
return load_state_dict_from_url(model, url, progress=True)
|
| 85 |
+
|
| 86 |
+
elif resource.startswith('local::'):
|
| 87 |
+
path = resource.split('local::')[1]
|
| 88 |
+
return load_wts(model, path)
|
| 89 |
+
|
| 90 |
+
else:
|
| 91 |
+
raise ValueError("Invalid resource type, only url:: and local:: are supported")
|
| 92 |
+
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/__init__.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
| 25 |
+
from .zoedepth_v1 import ZoeDepth
|
| 26 |
+
|
| 27 |
+
all_versions = {
|
| 28 |
+
"v1": ZoeDepth,
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
get_version = lambda v : all_versions[v]
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": {
|
| 3 |
+
"name": "ZoeDepth",
|
| 4 |
+
"version_name": "v1",
|
| 5 |
+
"n_bins": 64,
|
| 6 |
+
"bin_embedding_dim": 128,
|
| 7 |
+
"bin_centers_type": "softplus",
|
| 8 |
+
"n_attractors":[16, 8, 4, 1],
|
| 9 |
+
"attractor_alpha": 1000,
|
| 10 |
+
"attractor_gamma": 2,
|
| 11 |
+
"attractor_kind" : "mean",
|
| 12 |
+
"attractor_type" : "inv",
|
| 13 |
+
"midas_model_type" : "DPT_BEiT_L_384",
|
| 14 |
+
"min_temp": 0.0212,
|
| 15 |
+
"max_temp": 50.0,
|
| 16 |
+
"output_distribution": "logbinomial",
|
| 17 |
+
"memory_efficient": true,
|
| 18 |
+
"inverse_midas": false,
|
| 19 |
+
"img_size": [384, 512]
|
| 20 |
+
},
|
| 21 |
+
|
| 22 |
+
"train": {
|
| 23 |
+
"train_midas": true,
|
| 24 |
+
"use_pretrained_midas": true,
|
| 25 |
+
"trainer": "zoedepth",
|
| 26 |
+
"epochs": 5,
|
| 27 |
+
"bs": 16,
|
| 28 |
+
"optim_kwargs": {"lr": 0.000161, "wd": 0.01},
|
| 29 |
+
"sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
|
| 30 |
+
"same_lr": false,
|
| 31 |
+
"w_si": 1,
|
| 32 |
+
"w_domain": 0.2,
|
| 33 |
+
"w_reg": 0,
|
| 34 |
+
"w_grad": 0,
|
| 35 |
+
"avoid_boundary": false,
|
| 36 |
+
"random_crop": false,
|
| 37 |
+
"input_width": 640,
|
| 38 |
+
"input_height": 480,
|
| 39 |
+
"midas_lr_factor": 1,
|
| 40 |
+
"encoder_lr_factor":10,
|
| 41 |
+
"pos_enc_lr_factor":10,
|
| 42 |
+
"freeze_midas_bn": true
|
| 43 |
+
|
| 44 |
+
},
|
| 45 |
+
|
| 46 |
+
"infer":{
|
| 47 |
+
"train_midas": false,
|
| 48 |
+
"use_pretrained_midas": false,
|
| 49 |
+
"pretrained_resource" : null,
|
| 50 |
+
"force_keep_ar": true
|
| 51 |
+
},
|
| 52 |
+
|
| 53 |
+
"eval":{
|
| 54 |
+
"train_midas": false,
|
| 55 |
+
"use_pretrained_midas": false,
|
| 56 |
+
"pretrained_resource" : null
|
| 57 |
+
}
|
| 58 |
+
}
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": {
|
| 3 |
+
"bin_centers_type": "normed",
|
| 4 |
+
"img_size": [384, 768]
|
| 5 |
+
},
|
| 6 |
+
|
| 7 |
+
"train": {
|
| 8 |
+
},
|
| 9 |
+
|
| 10 |
+
"infer":{
|
| 11 |
+
"train_midas": false,
|
| 12 |
+
"use_pretrained_midas": false,
|
| 13 |
+
"pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt",
|
| 14 |
+
"force_keep_ar": true
|
| 15 |
+
},
|
| 16 |
+
|
| 17 |
+
"eval":{
|
| 18 |
+
"train_midas": false,
|
| 19 |
+
"use_pretrained_midas": false,
|
| 20 |
+
"pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt"
|
| 21 |
+
}
|
| 22 |
+
}
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth/zoedepth_v1.py
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
| 25 |
+
import itertools
|
| 26 |
+
|
| 27 |
+
import torch
|
| 28 |
+
import torch.nn as nn
|
| 29 |
+
from ..depth_model import DepthModel
|
| 30 |
+
from ..base_models.midas import MidasCore
|
| 31 |
+
from ..layers.attractor import AttractorLayer, AttractorLayerUnnormed
|
| 32 |
+
from ..layers.dist_layers import ConditionalLogBinomial
|
| 33 |
+
from ..layers.localbins_layers import (Projector, SeedBinRegressor,
|
| 34 |
+
SeedBinRegressorUnnormed)
|
| 35 |
+
from ..model_io import load_state_from_resource
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class ZoeDepth(DepthModel):
|
| 39 |
+
def __init__(self, core, n_bins=64, bin_centers_type="softplus", bin_embedding_dim=128, min_depth=1e-3, max_depth=10,
|
| 40 |
+
n_attractors=[16, 8, 4, 1], attractor_alpha=300, attractor_gamma=2, attractor_kind='sum', attractor_type='exp', min_temp=5, max_temp=50, train_midas=True,
|
| 41 |
+
midas_lr_factor=10, encoder_lr_factor=10, pos_enc_lr_factor=10, inverse_midas=False, **kwargs):
|
| 42 |
+
"""ZoeDepth model. This is the version of ZoeDepth that has a single metric head
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
core (models.base_models.midas.MidasCore): The base midas model that is used for extraction of "relative" features
|
| 46 |
+
n_bins (int, optional): Number of bin centers. Defaults to 64.
|
| 47 |
+
bin_centers_type (str, optional): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
|
| 48 |
+
For "softplus", softplus activation is used and thus are unbounded. Defaults to "softplus".
|
| 49 |
+
bin_embedding_dim (int, optional): bin embedding dimension. Defaults to 128.
|
| 50 |
+
min_depth (float, optional): Lower bound for normed bin centers. Defaults to 1e-3.
|
| 51 |
+
max_depth (float, optional): Upper bound for normed bin centers. Defaults to 10.
|
| 52 |
+
n_attractors (List[int], optional): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
|
| 53 |
+
attractor_alpha (int, optional): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 300.
|
| 54 |
+
attractor_gamma (int, optional): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
|
| 55 |
+
attractor_kind (str, optional): Attraction aggregation "sum" or "mean". Defaults to 'sum'.
|
| 56 |
+
attractor_type (str, optional): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'exp'.
|
| 57 |
+
min_temp (int, optional): Lower bound for temperature of output probability distribution. Defaults to 5.
|
| 58 |
+
max_temp (int, optional): Upper bound for temperature of output probability distribution. Defaults to 50.
|
| 59 |
+
train_midas (bool, optional): Whether to train "core", the base midas model. Defaults to True.
|
| 60 |
+
midas_lr_factor (int, optional): Learning rate reduction factor for base midas model except its encoder and positional encodings. Defaults to 10.
|
| 61 |
+
encoder_lr_factor (int, optional): Learning rate reduction factor for the encoder in midas model. Defaults to 10.
|
| 62 |
+
pos_enc_lr_factor (int, optional): Learning rate reduction factor for positional encodings in the base midas model. Defaults to 10.
|
| 63 |
+
"""
|
| 64 |
+
super().__init__()
|
| 65 |
+
|
| 66 |
+
self.core = core
|
| 67 |
+
self.max_depth = max_depth
|
| 68 |
+
self.min_depth = min_depth
|
| 69 |
+
self.min_temp = min_temp
|
| 70 |
+
self.bin_centers_type = bin_centers_type
|
| 71 |
+
|
| 72 |
+
self.midas_lr_factor = midas_lr_factor
|
| 73 |
+
self.encoder_lr_factor = encoder_lr_factor
|
| 74 |
+
self.pos_enc_lr_factor = pos_enc_lr_factor
|
| 75 |
+
self.train_midas = train_midas
|
| 76 |
+
self.inverse_midas = inverse_midas
|
| 77 |
+
|
| 78 |
+
if self.encoder_lr_factor <= 0:
|
| 79 |
+
self.core.freeze_encoder(
|
| 80 |
+
freeze_rel_pos=self.pos_enc_lr_factor <= 0)
|
| 81 |
+
|
| 82 |
+
N_MIDAS_OUT = 32
|
| 83 |
+
btlnck_features = self.core.output_channels[0]
|
| 84 |
+
num_out_features = self.core.output_channels[1:]
|
| 85 |
+
|
| 86 |
+
self.conv2 = nn.Conv2d(btlnck_features, btlnck_features,
|
| 87 |
+
kernel_size=1, stride=1, padding=0) # btlnck conv
|
| 88 |
+
|
| 89 |
+
if bin_centers_type == "normed":
|
| 90 |
+
SeedBinRegressorLayer = SeedBinRegressor
|
| 91 |
+
Attractor = AttractorLayer
|
| 92 |
+
elif bin_centers_type == "softplus":
|
| 93 |
+
SeedBinRegressorLayer = SeedBinRegressorUnnormed
|
| 94 |
+
Attractor = AttractorLayerUnnormed
|
| 95 |
+
elif bin_centers_type == "hybrid1":
|
| 96 |
+
SeedBinRegressorLayer = SeedBinRegressor
|
| 97 |
+
Attractor = AttractorLayerUnnormed
|
| 98 |
+
elif bin_centers_type == "hybrid2":
|
| 99 |
+
SeedBinRegressorLayer = SeedBinRegressorUnnormed
|
| 100 |
+
Attractor = AttractorLayer
|
| 101 |
+
else:
|
| 102 |
+
raise ValueError(
|
| 103 |
+
"bin_centers_type should be one of 'normed', 'softplus', 'hybrid1', 'hybrid2'")
|
| 104 |
+
|
| 105 |
+
self.seed_bin_regressor = SeedBinRegressorLayer(
|
| 106 |
+
btlnck_features, n_bins=n_bins, min_depth=min_depth, max_depth=max_depth)
|
| 107 |
+
self.seed_projector = Projector(btlnck_features, bin_embedding_dim)
|
| 108 |
+
self.projectors = nn.ModuleList([
|
| 109 |
+
Projector(num_out, bin_embedding_dim)
|
| 110 |
+
for num_out in num_out_features
|
| 111 |
+
])
|
| 112 |
+
self.attractors = nn.ModuleList([
|
| 113 |
+
Attractor(bin_embedding_dim, n_bins, n_attractors=n_attractors[i], min_depth=min_depth, max_depth=max_depth,
|
| 114 |
+
alpha=attractor_alpha, gamma=attractor_gamma, kind=attractor_kind, attractor_type=attractor_type)
|
| 115 |
+
for i in range(len(num_out_features))
|
| 116 |
+
])
|
| 117 |
+
|
| 118 |
+
last_in = N_MIDAS_OUT + 1 # +1 for relative depth
|
| 119 |
+
|
| 120 |
+
# use log binomial instead of softmax
|
| 121 |
+
self.conditional_log_binomial = ConditionalLogBinomial(
|
| 122 |
+
last_in, bin_embedding_dim, n_classes=n_bins, min_temp=min_temp, max_temp=max_temp)
|
| 123 |
+
|
| 124 |
+
def forward(self, x, return_final_centers=False, denorm=False, return_probs=False, **kwargs):
|
| 125 |
+
"""
|
| 126 |
+
Args:
|
| 127 |
+
x (torch.Tensor): Input image tensor of shape (B, C, H, W)
|
| 128 |
+
return_final_centers (bool, optional): Whether to return the final bin centers. Defaults to False.
|
| 129 |
+
denorm (bool, optional): Whether to denormalize the input image. This reverses ImageNet normalization as midas normalization is different. Defaults to False.
|
| 130 |
+
return_probs (bool, optional): Whether to return the output probability distribution. Defaults to False.
|
| 131 |
+
|
| 132 |
+
Returns:
|
| 133 |
+
dict: Dictionary containing the following keys:
|
| 134 |
+
- rel_depth (torch.Tensor): Relative depth map of shape (B, H, W)
|
| 135 |
+
- metric_depth (torch.Tensor): Metric depth map of shape (B, 1, H, W)
|
| 136 |
+
- bin_centers (torch.Tensor): Bin centers of shape (B, n_bins). Present only if return_final_centers is True
|
| 137 |
+
- probs (torch.Tensor): Output probability distribution of shape (B, n_bins, H, W). Present only if return_probs is True
|
| 138 |
+
|
| 139 |
+
"""
|
| 140 |
+
b, c, h, w = x.shape
|
| 141 |
+
# print("input shape ", x.shape)
|
| 142 |
+
self.orig_input_width = w
|
| 143 |
+
self.orig_input_height = h
|
| 144 |
+
rel_depth, out = self.core(x, denorm=denorm, return_rel_depth=True)
|
| 145 |
+
# print("output shapes", rel_depth.shape, out.shape)
|
| 146 |
+
|
| 147 |
+
outconv_activation = out[0]
|
| 148 |
+
btlnck = out[1]
|
| 149 |
+
x_blocks = out[2:]
|
| 150 |
+
|
| 151 |
+
x_d0 = self.conv2(btlnck)
|
| 152 |
+
x = x_d0
|
| 153 |
+
_, seed_b_centers = self.seed_bin_regressor(x)
|
| 154 |
+
|
| 155 |
+
if self.bin_centers_type == 'normed' or self.bin_centers_type == 'hybrid2':
|
| 156 |
+
b_prev = (seed_b_centers - self.min_depth) / \
|
| 157 |
+
(self.max_depth - self.min_depth)
|
| 158 |
+
else:
|
| 159 |
+
b_prev = seed_b_centers
|
| 160 |
+
|
| 161 |
+
prev_b_embedding = self.seed_projector(x)
|
| 162 |
+
|
| 163 |
+
# unroll this loop for better performance
|
| 164 |
+
for projector, attractor, x in zip(self.projectors, self.attractors, x_blocks):
|
| 165 |
+
b_embedding = projector(x)
|
| 166 |
+
b, b_centers = attractor(
|
| 167 |
+
b_embedding, b_prev, prev_b_embedding, interpolate=True)
|
| 168 |
+
b_prev = b.clone()
|
| 169 |
+
prev_b_embedding = b_embedding.clone()
|
| 170 |
+
|
| 171 |
+
last = outconv_activation
|
| 172 |
+
|
| 173 |
+
if self.inverse_midas:
|
| 174 |
+
# invert depth followed by normalization
|
| 175 |
+
rel_depth = 1.0 / (rel_depth + 1e-6)
|
| 176 |
+
rel_depth = (rel_depth - rel_depth.min()) / \
|
| 177 |
+
(rel_depth.max() - rel_depth.min())
|
| 178 |
+
# concat rel depth with last. First interpolate rel depth to last size
|
| 179 |
+
rel_cond = rel_depth.unsqueeze(1)
|
| 180 |
+
rel_cond = nn.functional.interpolate(
|
| 181 |
+
rel_cond, size=last.shape[2:], mode='bilinear', align_corners=True)
|
| 182 |
+
last = torch.cat([last, rel_cond], dim=1)
|
| 183 |
+
|
| 184 |
+
b_embedding = nn.functional.interpolate(
|
| 185 |
+
b_embedding, last.shape[-2:], mode='bilinear', align_corners=True)
|
| 186 |
+
x = self.conditional_log_binomial(last, b_embedding)
|
| 187 |
+
|
| 188 |
+
# Now depth value is Sum px * cx , where cx are bin_centers from the last bin tensor
|
| 189 |
+
# print(x.shape, b_centers.shape)
|
| 190 |
+
b_centers = nn.functional.interpolate(
|
| 191 |
+
b_centers, x.shape[-2:], mode='bilinear', align_corners=True)
|
| 192 |
+
out = torch.sum(x * b_centers, dim=1, keepdim=True)
|
| 193 |
+
|
| 194 |
+
# Structure output dict
|
| 195 |
+
output = dict(metric_depth=out)
|
| 196 |
+
if return_final_centers or return_probs:
|
| 197 |
+
output['bin_centers'] = b_centers
|
| 198 |
+
|
| 199 |
+
if return_probs:
|
| 200 |
+
output['probs'] = x
|
| 201 |
+
|
| 202 |
+
return output
|
| 203 |
+
|
| 204 |
+
def get_lr_params(self, lr):
|
| 205 |
+
"""
|
| 206 |
+
Learning rate configuration for different layers of the model
|
| 207 |
+
Args:
|
| 208 |
+
lr (float) : Base learning rate
|
| 209 |
+
Returns:
|
| 210 |
+
list : list of parameters to optimize and their learning rates, in the format required by torch optimizers.
|
| 211 |
+
"""
|
| 212 |
+
param_conf = []
|
| 213 |
+
if self.train_midas:
|
| 214 |
+
if self.encoder_lr_factor > 0:
|
| 215 |
+
param_conf.append({'params': self.core.get_enc_params_except_rel_pos(
|
| 216 |
+
), 'lr': lr / self.encoder_lr_factor})
|
| 217 |
+
|
| 218 |
+
if self.pos_enc_lr_factor > 0:
|
| 219 |
+
param_conf.append(
|
| 220 |
+
{'params': self.core.get_rel_pos_params(), 'lr': lr / self.pos_enc_lr_factor})
|
| 221 |
+
|
| 222 |
+
midas_params = self.core.core.scratch.parameters()
|
| 223 |
+
midas_lr_factor = self.midas_lr_factor
|
| 224 |
+
param_conf.append(
|
| 225 |
+
{'params': midas_params, 'lr': lr / midas_lr_factor})
|
| 226 |
+
|
| 227 |
+
remaining_modules = []
|
| 228 |
+
for name, child in self.named_children():
|
| 229 |
+
if name != 'core':
|
| 230 |
+
remaining_modules.append(child)
|
| 231 |
+
remaining_params = itertools.chain(
|
| 232 |
+
*[child.parameters() for child in remaining_modules])
|
| 233 |
+
|
| 234 |
+
param_conf.append({'params': remaining_params, 'lr': lr})
|
| 235 |
+
|
| 236 |
+
return param_conf
|
| 237 |
+
|
| 238 |
+
@staticmethod
|
| 239 |
+
def build(midas_model_type="DPT_BEiT_L_384", pretrained_resource=None, use_pretrained_midas=False, train_midas=False, freeze_midas_bn=True, **kwargs):
|
| 240 |
+
core = MidasCore.build(midas_model_type=midas_model_type, use_pretrained_midas=use_pretrained_midas,
|
| 241 |
+
train_midas=train_midas, fetch_features=True, freeze_bn=freeze_midas_bn, **kwargs)
|
| 242 |
+
model = ZoeDepth(core, **kwargs)
|
| 243 |
+
if pretrained_resource:
|
| 244 |
+
assert isinstance(pretrained_resource, str), "pretrained_resource must be a string"
|
| 245 |
+
model = load_state_from_resource(model, pretrained_resource)
|
| 246 |
+
return model
|
| 247 |
+
|
| 248 |
+
@staticmethod
|
| 249 |
+
def build_from_config(config):
|
| 250 |
+
return ZoeDepth.build(**config)
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth_nk/__init__.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
| 25 |
+
from .zoedepth_nk_v1 import ZoeDepthNK
|
| 26 |
+
|
| 27 |
+
all_versions = {
|
| 28 |
+
"v1": ZoeDepthNK,
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
get_version = lambda v : all_versions[v]
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": {
|
| 3 |
+
"name": "ZoeDepthNK",
|
| 4 |
+
"version_name": "v1",
|
| 5 |
+
"bin_conf" : [
|
| 6 |
+
{
|
| 7 |
+
"name": "nyu",
|
| 8 |
+
"n_bins": 64,
|
| 9 |
+
"min_depth": 1e-3,
|
| 10 |
+
"max_depth": 10.0
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"name": "kitti",
|
| 14 |
+
"n_bins": 64,
|
| 15 |
+
"min_depth": 1e-3,
|
| 16 |
+
"max_depth": 80.0
|
| 17 |
+
}
|
| 18 |
+
],
|
| 19 |
+
"bin_embedding_dim": 128,
|
| 20 |
+
"bin_centers_type": "softplus",
|
| 21 |
+
"n_attractors":[16, 8, 4, 1],
|
| 22 |
+
"attractor_alpha": 1000,
|
| 23 |
+
"attractor_gamma": 2,
|
| 24 |
+
"attractor_kind" : "mean",
|
| 25 |
+
"attractor_type" : "inv",
|
| 26 |
+
"min_temp": 0.0212,
|
| 27 |
+
"max_temp": 50.0,
|
| 28 |
+
"memory_efficient": true,
|
| 29 |
+
"midas_model_type" : "DPT_BEiT_L_384",
|
| 30 |
+
"img_size": [384, 512]
|
| 31 |
+
},
|
| 32 |
+
|
| 33 |
+
"train": {
|
| 34 |
+
"train_midas": true,
|
| 35 |
+
"use_pretrained_midas": true,
|
| 36 |
+
"trainer": "zoedepth_nk",
|
| 37 |
+
"epochs": 5,
|
| 38 |
+
"bs": 16,
|
| 39 |
+
"optim_kwargs": {"lr": 0.0002512, "wd": 0.01},
|
| 40 |
+
"sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
|
| 41 |
+
"same_lr": false,
|
| 42 |
+
"w_si": 1,
|
| 43 |
+
"w_domain": 100,
|
| 44 |
+
"avoid_boundary": false,
|
| 45 |
+
"random_crop": false,
|
| 46 |
+
"input_width": 640,
|
| 47 |
+
"input_height": 480,
|
| 48 |
+
"w_grad": 0,
|
| 49 |
+
"w_reg": 0,
|
| 50 |
+
"midas_lr_factor": 10,
|
| 51 |
+
"encoder_lr_factor":10,
|
| 52 |
+
"pos_enc_lr_factor":10
|
| 53 |
+
},
|
| 54 |
+
|
| 55 |
+
"infer": {
|
| 56 |
+
"train_midas": false,
|
| 57 |
+
"pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
|
| 58 |
+
"use_pretrained_midas": false,
|
| 59 |
+
"force_keep_ar": true
|
| 60 |
+
},
|
| 61 |
+
|
| 62 |
+
"eval": {
|
| 63 |
+
"train_midas": false,
|
| 64 |
+
"pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
|
| 65 |
+
"use_pretrained_midas": false
|
| 66 |
+
}
|
| 67 |
+
}
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/models/zoedepth_nk/zoedepth_nk_v1.py
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
| 25 |
+
import itertools
|
| 26 |
+
|
| 27 |
+
import torch
|
| 28 |
+
import torch.nn as nn
|
| 29 |
+
|
| 30 |
+
from zoedepth.models.depth_model import DepthModel
|
| 31 |
+
from zoedepth.models.base_models.midas import MidasCore
|
| 32 |
+
from zoedepth.models.layers.attractor import AttractorLayer, AttractorLayerUnnormed
|
| 33 |
+
from zoedepth.models.layers.dist_layers import ConditionalLogBinomial
|
| 34 |
+
from zoedepth.models.layers.localbins_layers import (Projector, SeedBinRegressor,
|
| 35 |
+
SeedBinRegressorUnnormed)
|
| 36 |
+
from zoedepth.models.layers.patch_transformer import PatchTransformerEncoder
|
| 37 |
+
from zoedepth.models.model_io import load_state_from_resource
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class ZoeDepthNK(DepthModel):
|
| 41 |
+
def __init__(self, core, bin_conf, bin_centers_type="softplus", bin_embedding_dim=128,
|
| 42 |
+
n_attractors=[16, 8, 4, 1], attractor_alpha=300, attractor_gamma=2, attractor_kind='sum', attractor_type='exp',
|
| 43 |
+
min_temp=5, max_temp=50,
|
| 44 |
+
memory_efficient=False, train_midas=True,
|
| 45 |
+
is_midas_pretrained=True, midas_lr_factor=1, encoder_lr_factor=10, pos_enc_lr_factor=10, inverse_midas=False, **kwargs):
|
| 46 |
+
"""ZoeDepthNK model. This is the version of ZoeDepth that has two metric heads and uses a learned router to route to experts.
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
core (models.base_models.midas.MidasCore): The base midas model that is used for extraction of "relative" features
|
| 50 |
+
|
| 51 |
+
bin_conf (List[dict]): A list of dictionaries that contain the bin configuration for each metric head. Each dictionary should contain the following keys:
|
| 52 |
+
"name" (str, typically same as the dataset name), "n_bins" (int), "min_depth" (float), "max_depth" (float)
|
| 53 |
+
|
| 54 |
+
The length of this list determines the number of metric heads.
|
| 55 |
+
bin_centers_type (str, optional): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers.
|
| 56 |
+
For "softplus", softplus activation is used and thus are unbounded. Defaults to "normed".
|
| 57 |
+
bin_embedding_dim (int, optional): bin embedding dimension. Defaults to 128.
|
| 58 |
+
|
| 59 |
+
n_attractors (List[int], optional): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1].
|
| 60 |
+
attractor_alpha (int, optional): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 300.
|
| 61 |
+
attractor_gamma (int, optional): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2.
|
| 62 |
+
attractor_kind (str, optional): Attraction aggregation "sum" or "mean". Defaults to 'sum'.
|
| 63 |
+
attractor_type (str, optional): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'exp'.
|
| 64 |
+
|
| 65 |
+
min_temp (int, optional): Lower bound for temperature of output probability distribution. Defaults to 5.
|
| 66 |
+
max_temp (int, optional): Upper bound for temperature of output probability distribution. Defaults to 50.
|
| 67 |
+
|
| 68 |
+
memory_efficient (bool, optional): Whether to use memory efficient version of attractor layers. Memory efficient version is slower but is recommended incase of multiple metric heads in order save GPU memory. Defaults to False.
|
| 69 |
+
|
| 70 |
+
train_midas (bool, optional): Whether to train "core", the base midas model. Defaults to True.
|
| 71 |
+
is_midas_pretrained (bool, optional): Is "core" pretrained? Defaults to True.
|
| 72 |
+
midas_lr_factor (int, optional): Learning rate reduction factor for base midas model except its encoder and positional encodings. Defaults to 10.
|
| 73 |
+
encoder_lr_factor (int, optional): Learning rate reduction factor for the encoder in midas model. Defaults to 10.
|
| 74 |
+
pos_enc_lr_factor (int, optional): Learning rate reduction factor for positional encodings in the base midas model. Defaults to 10.
|
| 75 |
+
|
| 76 |
+
"""
|
| 77 |
+
|
| 78 |
+
super().__init__()
|
| 79 |
+
|
| 80 |
+
self.core = core
|
| 81 |
+
self.bin_conf = bin_conf
|
| 82 |
+
self.min_temp = min_temp
|
| 83 |
+
self.max_temp = max_temp
|
| 84 |
+
self.memory_efficient = memory_efficient
|
| 85 |
+
self.train_midas = train_midas
|
| 86 |
+
self.is_midas_pretrained = is_midas_pretrained
|
| 87 |
+
self.midas_lr_factor = midas_lr_factor
|
| 88 |
+
self.encoder_lr_factor = encoder_lr_factor
|
| 89 |
+
self.pos_enc_lr_factor = pos_enc_lr_factor
|
| 90 |
+
self.inverse_midas = inverse_midas
|
| 91 |
+
|
| 92 |
+
N_MIDAS_OUT = 32
|
| 93 |
+
btlnck_features = self.core.output_channels[0]
|
| 94 |
+
num_out_features = self.core.output_channels[1:]
|
| 95 |
+
# self.scales = [16, 8, 4, 2] # spatial scale factors
|
| 96 |
+
|
| 97 |
+
self.conv2 = nn.Conv2d(
|
| 98 |
+
btlnck_features, btlnck_features, kernel_size=1, stride=1, padding=0)
|
| 99 |
+
|
| 100 |
+
# Transformer classifier on the bottleneck
|
| 101 |
+
self.patch_transformer = PatchTransformerEncoder(
|
| 102 |
+
btlnck_features, 1, 128, use_class_token=True)
|
| 103 |
+
self.mlp_classifier = nn.Sequential(
|
| 104 |
+
nn.Linear(128, 128),
|
| 105 |
+
nn.ReLU(),
|
| 106 |
+
nn.Linear(128, 2)
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
if bin_centers_type == "normed":
|
| 110 |
+
SeedBinRegressorLayer = SeedBinRegressor
|
| 111 |
+
Attractor = AttractorLayer
|
| 112 |
+
elif bin_centers_type == "softplus":
|
| 113 |
+
SeedBinRegressorLayer = SeedBinRegressorUnnormed
|
| 114 |
+
Attractor = AttractorLayerUnnormed
|
| 115 |
+
elif bin_centers_type == "hybrid1":
|
| 116 |
+
SeedBinRegressorLayer = SeedBinRegressor
|
| 117 |
+
Attractor = AttractorLayerUnnormed
|
| 118 |
+
elif bin_centers_type == "hybrid2":
|
| 119 |
+
SeedBinRegressorLayer = SeedBinRegressorUnnormed
|
| 120 |
+
Attractor = AttractorLayer
|
| 121 |
+
else:
|
| 122 |
+
raise ValueError(
|
| 123 |
+
"bin_centers_type should be one of 'normed', 'softplus', 'hybrid1', 'hybrid2'")
|
| 124 |
+
self.bin_centers_type = bin_centers_type
|
| 125 |
+
# We have bins for each bin conf.
|
| 126 |
+
# Create a map (ModuleDict) of 'name' -> seed_bin_regressor
|
| 127 |
+
self.seed_bin_regressors = nn.ModuleDict(
|
| 128 |
+
{conf['name']: SeedBinRegressorLayer(btlnck_features, conf["n_bins"], mlp_dim=bin_embedding_dim//2, min_depth=conf["min_depth"], max_depth=conf["max_depth"])
|
| 129 |
+
for conf in bin_conf}
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
self.seed_projector = Projector(
|
| 133 |
+
btlnck_features, bin_embedding_dim, mlp_dim=bin_embedding_dim//2)
|
| 134 |
+
self.projectors = nn.ModuleList([
|
| 135 |
+
Projector(num_out, bin_embedding_dim, mlp_dim=bin_embedding_dim//2)
|
| 136 |
+
for num_out in num_out_features
|
| 137 |
+
])
|
| 138 |
+
|
| 139 |
+
# Create a map (ModuleDict) of 'name' -> attractors (ModuleList)
|
| 140 |
+
self.attractors = nn.ModuleDict(
|
| 141 |
+
{conf['name']: nn.ModuleList([
|
| 142 |
+
Attractor(bin_embedding_dim, n_attractors[i],
|
| 143 |
+
mlp_dim=bin_embedding_dim, alpha=attractor_alpha,
|
| 144 |
+
gamma=attractor_gamma, kind=attractor_kind,
|
| 145 |
+
attractor_type=attractor_type, memory_efficient=memory_efficient,
|
| 146 |
+
min_depth=conf["min_depth"], max_depth=conf["max_depth"])
|
| 147 |
+
for i in range(len(n_attractors))
|
| 148 |
+
])
|
| 149 |
+
for conf in bin_conf}
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
last_in = N_MIDAS_OUT
|
| 153 |
+
# conditional log binomial for each bin conf
|
| 154 |
+
self.conditional_log_binomial = nn.ModuleDict(
|
| 155 |
+
{conf['name']: ConditionalLogBinomial(last_in, bin_embedding_dim, conf['n_bins'], bottleneck_factor=4, min_temp=self.min_temp, max_temp=self.max_temp)
|
| 156 |
+
for conf in bin_conf}
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
def forward(self, x, return_final_centers=False, denorm=False, return_probs=False, **kwargs):
|
| 160 |
+
"""
|
| 161 |
+
Args:
|
| 162 |
+
x (torch.Tensor): Input image tensor of shape (B, C, H, W). Assumes all images are from the same domain.
|
| 163 |
+
return_final_centers (bool, optional): Whether to return the final centers of the attractors. Defaults to False.
|
| 164 |
+
denorm (bool, optional): Whether to denormalize the input image. Defaults to False.
|
| 165 |
+
return_probs (bool, optional): Whether to return the probabilities of the bins. Defaults to False.
|
| 166 |
+
|
| 167 |
+
Returns:
|
| 168 |
+
dict: Dictionary of outputs with keys:
|
| 169 |
+
- "rel_depth": Relative depth map of shape (B, 1, H, W)
|
| 170 |
+
- "metric_depth": Metric depth map of shape (B, 1, H, W)
|
| 171 |
+
- "domain_logits": Domain logits of shape (B, 2)
|
| 172 |
+
- "bin_centers": Bin centers of shape (B, N, H, W). Present only if return_final_centers is True
|
| 173 |
+
- "probs": Bin probabilities of shape (B, N, H, W). Present only if return_probs is True
|
| 174 |
+
"""
|
| 175 |
+
b, c, h, w = x.shape
|
| 176 |
+
self.orig_input_width = w
|
| 177 |
+
self.orig_input_height = h
|
| 178 |
+
rel_depth, out = self.core(x, denorm=denorm, return_rel_depth=True)
|
| 179 |
+
|
| 180 |
+
outconv_activation = out[0]
|
| 181 |
+
btlnck = out[1]
|
| 182 |
+
x_blocks = out[2:]
|
| 183 |
+
|
| 184 |
+
x_d0 = self.conv2(btlnck)
|
| 185 |
+
x = x_d0
|
| 186 |
+
|
| 187 |
+
# Predict which path to take
|
| 188 |
+
embedding = self.patch_transformer(x)[0] # N, E
|
| 189 |
+
domain_logits = self.mlp_classifier(embedding) # N, 2
|
| 190 |
+
domain_vote = torch.softmax(domain_logits.sum(
|
| 191 |
+
dim=0, keepdim=True), dim=-1) # 1, 2
|
| 192 |
+
|
| 193 |
+
# Get the path
|
| 194 |
+
bin_conf_name = ["nyu", "kitti"][torch.argmax(
|
| 195 |
+
domain_vote, dim=-1).squeeze().item()]
|
| 196 |
+
|
| 197 |
+
try:
|
| 198 |
+
conf = [c for c in self.bin_conf if c.name == bin_conf_name][0]
|
| 199 |
+
except IndexError:
|
| 200 |
+
raise ValueError(
|
| 201 |
+
f"bin_conf_name {bin_conf_name} not found in bin_confs")
|
| 202 |
+
|
| 203 |
+
min_depth = conf['min_depth']
|
| 204 |
+
max_depth = conf['max_depth']
|
| 205 |
+
|
| 206 |
+
seed_bin_regressor = self.seed_bin_regressors[bin_conf_name]
|
| 207 |
+
_, seed_b_centers = seed_bin_regressor(x)
|
| 208 |
+
if self.bin_centers_type == 'normed' or self.bin_centers_type == 'hybrid2':
|
| 209 |
+
b_prev = (seed_b_centers - min_depth)/(max_depth - min_depth)
|
| 210 |
+
else:
|
| 211 |
+
b_prev = seed_b_centers
|
| 212 |
+
prev_b_embedding = self.seed_projector(x)
|
| 213 |
+
|
| 214 |
+
attractors = self.attractors[bin_conf_name]
|
| 215 |
+
for projector, attractor, x in zip(self.projectors, attractors, x_blocks):
|
| 216 |
+
b_embedding = projector(x)
|
| 217 |
+
b, b_centers = attractor(
|
| 218 |
+
b_embedding, b_prev, prev_b_embedding, interpolate=True)
|
| 219 |
+
b_prev = b
|
| 220 |
+
prev_b_embedding = b_embedding
|
| 221 |
+
|
| 222 |
+
last = outconv_activation
|
| 223 |
+
|
| 224 |
+
b_centers = nn.functional.interpolate(
|
| 225 |
+
b_centers, last.shape[-2:], mode='bilinear', align_corners=True)
|
| 226 |
+
b_embedding = nn.functional.interpolate(
|
| 227 |
+
b_embedding, last.shape[-2:], mode='bilinear', align_corners=True)
|
| 228 |
+
|
| 229 |
+
clb = self.conditional_log_binomial[bin_conf_name]
|
| 230 |
+
x = clb(last, b_embedding)
|
| 231 |
+
|
| 232 |
+
# Now depth value is Sum px * cx , where cx are bin_centers from the last bin tensor
|
| 233 |
+
# print(x.shape, b_centers.shape)
|
| 234 |
+
# b_centers = nn.functional.interpolate(b_centers, x.shape[-2:], mode='bilinear', align_corners=True)
|
| 235 |
+
out = torch.sum(x * b_centers, dim=1, keepdim=True)
|
| 236 |
+
|
| 237 |
+
output = dict(domain_logits=domain_logits, metric_depth=out)
|
| 238 |
+
if return_final_centers or return_probs:
|
| 239 |
+
output['bin_centers'] = b_centers
|
| 240 |
+
|
| 241 |
+
if return_probs:
|
| 242 |
+
output['probs'] = x
|
| 243 |
+
return output
|
| 244 |
+
|
| 245 |
+
def get_lr_params(self, lr):
|
| 246 |
+
"""
|
| 247 |
+
Learning rate configuration for different layers of the model
|
| 248 |
+
|
| 249 |
+
Args:
|
| 250 |
+
lr (float) : Base learning rate
|
| 251 |
+
Returns:
|
| 252 |
+
list : list of parameters to optimize and their learning rates, in the format required by torch optimizers.
|
| 253 |
+
"""
|
| 254 |
+
param_conf = []
|
| 255 |
+
if self.train_midas:
|
| 256 |
+
def get_rel_pos_params():
|
| 257 |
+
for name, p in self.core.core.pretrained.named_parameters():
|
| 258 |
+
if "relative_position" in name:
|
| 259 |
+
yield p
|
| 260 |
+
|
| 261 |
+
def get_enc_params_except_rel_pos():
|
| 262 |
+
for name, p in self.core.core.pretrained.named_parameters():
|
| 263 |
+
if "relative_position" not in name:
|
| 264 |
+
yield p
|
| 265 |
+
|
| 266 |
+
encoder_params = get_enc_params_except_rel_pos()
|
| 267 |
+
rel_pos_params = get_rel_pos_params()
|
| 268 |
+
midas_params = self.core.core.scratch.parameters()
|
| 269 |
+
midas_lr_factor = self.midas_lr_factor if self.is_midas_pretrained else 1.0
|
| 270 |
+
param_conf.extend([
|
| 271 |
+
{'params': encoder_params, 'lr': lr / self.encoder_lr_factor},
|
| 272 |
+
{'params': rel_pos_params, 'lr': lr / self.pos_enc_lr_factor},
|
| 273 |
+
{'params': midas_params, 'lr': lr / midas_lr_factor}
|
| 274 |
+
])
|
| 275 |
+
|
| 276 |
+
remaining_modules = []
|
| 277 |
+
for name, child in self.named_children():
|
| 278 |
+
if name != 'core':
|
| 279 |
+
remaining_modules.append(child)
|
| 280 |
+
remaining_params = itertools.chain(
|
| 281 |
+
*[child.parameters() for child in remaining_modules])
|
| 282 |
+
param_conf.append({'params': remaining_params, 'lr': lr})
|
| 283 |
+
return param_conf
|
| 284 |
+
|
| 285 |
+
def get_conf_parameters(self, conf_name):
|
| 286 |
+
"""
|
| 287 |
+
Returns parameters of all the ModuleDicts children that are exclusively used for the given bin configuration
|
| 288 |
+
"""
|
| 289 |
+
params = []
|
| 290 |
+
for name, child in self.named_children():
|
| 291 |
+
if isinstance(child, nn.ModuleDict):
|
| 292 |
+
for bin_conf_name, module in child.items():
|
| 293 |
+
if bin_conf_name == conf_name:
|
| 294 |
+
params += list(module.parameters())
|
| 295 |
+
return params
|
| 296 |
+
|
| 297 |
+
def freeze_conf(self, conf_name):
|
| 298 |
+
"""
|
| 299 |
+
Freezes all the parameters of all the ModuleDicts children that are exclusively used for the given bin configuration
|
| 300 |
+
"""
|
| 301 |
+
for p in self.get_conf_parameters(conf_name):
|
| 302 |
+
p.requires_grad = False
|
| 303 |
+
|
| 304 |
+
def unfreeze_conf(self, conf_name):
|
| 305 |
+
"""
|
| 306 |
+
Unfreezes all the parameters of all the ModuleDicts children that are exclusively used for the given bin configuration
|
| 307 |
+
"""
|
| 308 |
+
for p in self.get_conf_parameters(conf_name):
|
| 309 |
+
p.requires_grad = True
|
| 310 |
+
|
| 311 |
+
def freeze_all_confs(self):
|
| 312 |
+
"""
|
| 313 |
+
Freezes all the parameters of all the ModuleDicts children
|
| 314 |
+
"""
|
| 315 |
+
for name, child in self.named_children():
|
| 316 |
+
if isinstance(child, nn.ModuleDict):
|
| 317 |
+
for bin_conf_name, module in child.items():
|
| 318 |
+
for p in module.parameters():
|
| 319 |
+
p.requires_grad = False
|
| 320 |
+
|
| 321 |
+
@staticmethod
|
| 322 |
+
def build(midas_model_type="DPT_BEiT_L_384", pretrained_resource=None, use_pretrained_midas=False, train_midas=False, freeze_midas_bn=True, **kwargs):
|
| 323 |
+
core = MidasCore.build(midas_model_type=midas_model_type, use_pretrained_midas=use_pretrained_midas,
|
| 324 |
+
train_midas=train_midas, fetch_features=True, freeze_bn=freeze_midas_bn, **kwargs)
|
| 325 |
+
model = ZoeDepthNK(core, **kwargs)
|
| 326 |
+
if pretrained_resource:
|
| 327 |
+
assert isinstance(pretrained_resource, str), "pretrained_resource must be a string"
|
| 328 |
+
model = load_state_from_resource(model, pretrained_resource)
|
| 329 |
+
return model
|
| 330 |
+
|
| 331 |
+
@staticmethod
|
| 332 |
+
def build_from_config(config):
|
| 333 |
+
return ZoeDepthNK.build(**config)
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/__init__.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/arg_utils.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
def infer_type(x): # hacky way to infer type from string args
|
| 4 |
+
if not isinstance(x, str):
|
| 5 |
+
return x
|
| 6 |
+
|
| 7 |
+
try:
|
| 8 |
+
x = int(x)
|
| 9 |
+
return x
|
| 10 |
+
except ValueError:
|
| 11 |
+
pass
|
| 12 |
+
|
| 13 |
+
try:
|
| 14 |
+
x = float(x)
|
| 15 |
+
return x
|
| 16 |
+
except ValueError:
|
| 17 |
+
pass
|
| 18 |
+
|
| 19 |
+
return x
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def parse_unknown(unknown_args):
|
| 23 |
+
clean = []
|
| 24 |
+
for a in unknown_args:
|
| 25 |
+
if "=" in a:
|
| 26 |
+
k, v = a.split("=")
|
| 27 |
+
clean.extend([k, v])
|
| 28 |
+
else:
|
| 29 |
+
clean.append(a)
|
| 30 |
+
|
| 31 |
+
keys = clean[::2]
|
| 32 |
+
values = clean[1::2]
|
| 33 |
+
return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)}
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/config.py
ADDED
|
@@ -0,0 +1,437 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
| 25 |
+
import json
|
| 26 |
+
import os
|
| 27 |
+
|
| 28 |
+
from .easydict import EasyDict as edict
|
| 29 |
+
from .arg_utils import infer_type
|
| 30 |
+
|
| 31 |
+
import pathlib
|
| 32 |
+
import platform
|
| 33 |
+
|
| 34 |
+
ROOT = pathlib.Path(__file__).parent.parent.resolve()
|
| 35 |
+
|
| 36 |
+
HOME_DIR = os.path.expanduser("~")
|
| 37 |
+
|
| 38 |
+
COMMON_CONFIG = {
|
| 39 |
+
"save_dir": os.path.expanduser("~/shortcuts/monodepth3_checkpoints"),
|
| 40 |
+
"project": "ZoeDepth",
|
| 41 |
+
"tags": '',
|
| 42 |
+
"notes": "",
|
| 43 |
+
"gpu": None,
|
| 44 |
+
"root": ".",
|
| 45 |
+
"uid": None,
|
| 46 |
+
"print_losses": False
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
DATASETS_CONFIG = {
|
| 50 |
+
"kitti": {
|
| 51 |
+
"dataset": "kitti",
|
| 52 |
+
"min_depth": 0.001,
|
| 53 |
+
"max_depth": 80,
|
| 54 |
+
"data_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"),
|
| 55 |
+
"gt_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"),
|
| 56 |
+
"filenames_file": "./train_test_inputs/kitti_eigen_train_files_with_gt.txt",
|
| 57 |
+
"input_height": 352,
|
| 58 |
+
"input_width": 1216, # 704
|
| 59 |
+
"data_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"),
|
| 60 |
+
"gt_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"),
|
| 61 |
+
"filenames_file_eval": "./train_test_inputs/kitti_eigen_test_files_with_gt.txt",
|
| 62 |
+
|
| 63 |
+
"min_depth_eval": 1e-3,
|
| 64 |
+
"max_depth_eval": 80,
|
| 65 |
+
|
| 66 |
+
"do_random_rotate": True,
|
| 67 |
+
"degree": 1.0,
|
| 68 |
+
"do_kb_crop": True,
|
| 69 |
+
"garg_crop": True,
|
| 70 |
+
"eigen_crop": False,
|
| 71 |
+
"use_right": False
|
| 72 |
+
},
|
| 73 |
+
"kitti_test": {
|
| 74 |
+
"dataset": "kitti",
|
| 75 |
+
"min_depth": 0.001,
|
| 76 |
+
"max_depth": 80,
|
| 77 |
+
"data_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"),
|
| 78 |
+
"gt_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"),
|
| 79 |
+
"filenames_file": "./train_test_inputs/kitti_eigen_train_files_with_gt.txt",
|
| 80 |
+
"input_height": 352,
|
| 81 |
+
"input_width": 1216,
|
| 82 |
+
"data_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"),
|
| 83 |
+
"gt_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"),
|
| 84 |
+
"filenames_file_eval": "./train_test_inputs/kitti_eigen_test_files_with_gt.txt",
|
| 85 |
+
|
| 86 |
+
"min_depth_eval": 1e-3,
|
| 87 |
+
"max_depth_eval": 80,
|
| 88 |
+
|
| 89 |
+
"do_random_rotate": False,
|
| 90 |
+
"degree": 1.0,
|
| 91 |
+
"do_kb_crop": True,
|
| 92 |
+
"garg_crop": True,
|
| 93 |
+
"eigen_crop": False,
|
| 94 |
+
"use_right": False
|
| 95 |
+
},
|
| 96 |
+
"nyu": {
|
| 97 |
+
"dataset": "nyu",
|
| 98 |
+
"avoid_boundary": False,
|
| 99 |
+
"min_depth": 1e-3, # originally 0.1
|
| 100 |
+
"max_depth": 10,
|
| 101 |
+
"data_path": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/sync/"),
|
| 102 |
+
"gt_path": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/sync/"),
|
| 103 |
+
"filenames_file": "./train_test_inputs/nyudepthv2_train_files_with_gt.txt",
|
| 104 |
+
"input_height": 480,
|
| 105 |
+
"input_width": 640,
|
| 106 |
+
"data_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/official_splits/test/"),
|
| 107 |
+
"gt_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/official_splits/test/"),
|
| 108 |
+
"filenames_file_eval": "./train_test_inputs/nyudepthv2_test_files_with_gt.txt",
|
| 109 |
+
"min_depth_eval": 1e-3,
|
| 110 |
+
"max_depth_eval": 10,
|
| 111 |
+
"min_depth_diff": -10,
|
| 112 |
+
"max_depth_diff": 10,
|
| 113 |
+
|
| 114 |
+
"do_random_rotate": True,
|
| 115 |
+
"degree": 1.0,
|
| 116 |
+
"do_kb_crop": False,
|
| 117 |
+
"garg_crop": False,
|
| 118 |
+
"eigen_crop": True
|
| 119 |
+
},
|
| 120 |
+
"ibims": {
|
| 121 |
+
"dataset": "ibims",
|
| 122 |
+
"ibims_root": os.path.join(HOME_DIR, "shortcuts/datasets/ibims/ibims1_core_raw/"),
|
| 123 |
+
"eigen_crop": True,
|
| 124 |
+
"garg_crop": False,
|
| 125 |
+
"do_kb_crop": False,
|
| 126 |
+
"min_depth_eval": 0,
|
| 127 |
+
"max_depth_eval": 10,
|
| 128 |
+
"min_depth": 1e-3,
|
| 129 |
+
"max_depth": 10
|
| 130 |
+
},
|
| 131 |
+
"sunrgbd": {
|
| 132 |
+
"dataset": "sunrgbd",
|
| 133 |
+
"sunrgbd_root": os.path.join(HOME_DIR, "shortcuts/datasets/SUNRGBD/test/"),
|
| 134 |
+
"eigen_crop": True,
|
| 135 |
+
"garg_crop": False,
|
| 136 |
+
"do_kb_crop": False,
|
| 137 |
+
"min_depth_eval": 0,
|
| 138 |
+
"max_depth_eval": 8,
|
| 139 |
+
"min_depth": 1e-3,
|
| 140 |
+
"max_depth": 10
|
| 141 |
+
},
|
| 142 |
+
"diml_indoor": {
|
| 143 |
+
"dataset": "diml_indoor",
|
| 144 |
+
"diml_indoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diml_indoor_test/"),
|
| 145 |
+
"eigen_crop": True,
|
| 146 |
+
"garg_crop": False,
|
| 147 |
+
"do_kb_crop": False,
|
| 148 |
+
"min_depth_eval": 0,
|
| 149 |
+
"max_depth_eval": 10,
|
| 150 |
+
"min_depth": 1e-3,
|
| 151 |
+
"max_depth": 10
|
| 152 |
+
},
|
| 153 |
+
"diml_outdoor": {
|
| 154 |
+
"dataset": "diml_outdoor",
|
| 155 |
+
"diml_outdoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diml_outdoor_test/"),
|
| 156 |
+
"eigen_crop": False,
|
| 157 |
+
"garg_crop": True,
|
| 158 |
+
"do_kb_crop": False,
|
| 159 |
+
"min_depth_eval": 2,
|
| 160 |
+
"max_depth_eval": 80,
|
| 161 |
+
"min_depth": 1e-3,
|
| 162 |
+
"max_depth": 80
|
| 163 |
+
},
|
| 164 |
+
"diode_indoor": {
|
| 165 |
+
"dataset": "diode_indoor",
|
| 166 |
+
"diode_indoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diode_indoor/"),
|
| 167 |
+
"eigen_crop": True,
|
| 168 |
+
"garg_crop": False,
|
| 169 |
+
"do_kb_crop": False,
|
| 170 |
+
"min_depth_eval": 1e-3,
|
| 171 |
+
"max_depth_eval": 10,
|
| 172 |
+
"min_depth": 1e-3,
|
| 173 |
+
"max_depth": 10
|
| 174 |
+
},
|
| 175 |
+
"diode_outdoor": {
|
| 176 |
+
"dataset": "diode_outdoor",
|
| 177 |
+
"diode_outdoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diode_outdoor/"),
|
| 178 |
+
"eigen_crop": False,
|
| 179 |
+
"garg_crop": True,
|
| 180 |
+
"do_kb_crop": False,
|
| 181 |
+
"min_depth_eval": 1e-3,
|
| 182 |
+
"max_depth_eval": 80,
|
| 183 |
+
"min_depth": 1e-3,
|
| 184 |
+
"max_depth": 80
|
| 185 |
+
},
|
| 186 |
+
"hypersim_test": {
|
| 187 |
+
"dataset": "hypersim_test",
|
| 188 |
+
"hypersim_test_root": os.path.join(HOME_DIR, "shortcuts/datasets/hypersim_test/"),
|
| 189 |
+
"eigen_crop": True,
|
| 190 |
+
"garg_crop": False,
|
| 191 |
+
"do_kb_crop": False,
|
| 192 |
+
"min_depth_eval": 1e-3,
|
| 193 |
+
"max_depth_eval": 80,
|
| 194 |
+
"min_depth": 1e-3,
|
| 195 |
+
"max_depth": 10
|
| 196 |
+
},
|
| 197 |
+
"vkitti": {
|
| 198 |
+
"dataset": "vkitti",
|
| 199 |
+
"vkitti_root": os.path.join(HOME_DIR, "shortcuts/datasets/vkitti_test/"),
|
| 200 |
+
"eigen_crop": False,
|
| 201 |
+
"garg_crop": True,
|
| 202 |
+
"do_kb_crop": True,
|
| 203 |
+
"min_depth_eval": 1e-3,
|
| 204 |
+
"max_depth_eval": 80,
|
| 205 |
+
"min_depth": 1e-3,
|
| 206 |
+
"max_depth": 80
|
| 207 |
+
},
|
| 208 |
+
"vkitti2": {
|
| 209 |
+
"dataset": "vkitti2",
|
| 210 |
+
"vkitti2_root": os.path.join(HOME_DIR, "shortcuts/datasets/vkitti2/"),
|
| 211 |
+
"eigen_crop": False,
|
| 212 |
+
"garg_crop": True,
|
| 213 |
+
"do_kb_crop": True,
|
| 214 |
+
"min_depth_eval": 1e-3,
|
| 215 |
+
"max_depth_eval": 80,
|
| 216 |
+
"min_depth": 1e-3,
|
| 217 |
+
"max_depth": 80,
|
| 218 |
+
},
|
| 219 |
+
"ddad": {
|
| 220 |
+
"dataset": "ddad",
|
| 221 |
+
"ddad_root": os.path.join(HOME_DIR, "shortcuts/datasets/ddad/ddad_val/"),
|
| 222 |
+
"eigen_crop": False,
|
| 223 |
+
"garg_crop": True,
|
| 224 |
+
"do_kb_crop": True,
|
| 225 |
+
"min_depth_eval": 1e-3,
|
| 226 |
+
"max_depth_eval": 80,
|
| 227 |
+
"min_depth": 1e-3,
|
| 228 |
+
"max_depth": 80,
|
| 229 |
+
},
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
ALL_INDOOR = ["nyu", "ibims", "sunrgbd", "diode_indoor", "hypersim_test"]
|
| 233 |
+
ALL_OUTDOOR = ["kitti", "diml_outdoor", "diode_outdoor", "vkitti2", "ddad"]
|
| 234 |
+
ALL_EVAL_DATASETS = ALL_INDOOR + ALL_OUTDOOR
|
| 235 |
+
|
| 236 |
+
COMMON_TRAINING_CONFIG = {
|
| 237 |
+
"dataset": "nyu",
|
| 238 |
+
"distributed": True,
|
| 239 |
+
"workers": 16,
|
| 240 |
+
"clip_grad": 0.1,
|
| 241 |
+
"use_shared_dict": False,
|
| 242 |
+
"shared_dict": None,
|
| 243 |
+
"use_amp": False,
|
| 244 |
+
|
| 245 |
+
"aug": True,
|
| 246 |
+
"random_crop": False,
|
| 247 |
+
"random_translate": False,
|
| 248 |
+
"translate_prob": 0.2,
|
| 249 |
+
"max_translation": 100,
|
| 250 |
+
|
| 251 |
+
"validate_every": 0.25,
|
| 252 |
+
"log_images_every": 0.1,
|
| 253 |
+
"prefetch": False,
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
def flatten(config, except_keys=('bin_conf')):
|
| 258 |
+
def recurse(inp):
|
| 259 |
+
if isinstance(inp, dict):
|
| 260 |
+
for key, value in inp.items():
|
| 261 |
+
if key in except_keys:
|
| 262 |
+
yield (key, value)
|
| 263 |
+
if isinstance(value, dict):
|
| 264 |
+
yield from recurse(value)
|
| 265 |
+
else:
|
| 266 |
+
yield (key, value)
|
| 267 |
+
|
| 268 |
+
return dict(list(recurse(config)))
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
def split_combined_args(kwargs):
|
| 272 |
+
"""Splits the arguments that are combined with '__' into multiple arguments.
|
| 273 |
+
Combined arguments should have equal number of keys and values.
|
| 274 |
+
Keys are separated by '__' and Values are separated with ';'.
|
| 275 |
+
For example, '__n_bins__lr=256;0.001'
|
| 276 |
+
|
| 277 |
+
Args:
|
| 278 |
+
kwargs (dict): key-value pairs of arguments where key-value is optionally combined according to the above format.
|
| 279 |
+
|
| 280 |
+
Returns:
|
| 281 |
+
dict: Parsed dict with the combined arguments split into individual key-value pairs.
|
| 282 |
+
"""
|
| 283 |
+
new_kwargs = dict(kwargs)
|
| 284 |
+
for key, value in kwargs.items():
|
| 285 |
+
if key.startswith("__"):
|
| 286 |
+
keys = key.split("__")[1:]
|
| 287 |
+
values = value.split(";")
|
| 288 |
+
assert len(keys) == len(
|
| 289 |
+
values), f"Combined arguments should have equal number of keys and values. Keys are separated by '__' and Values are separated with ';'. For example, '__n_bins__lr=256;0.001. Given (keys,values) is ({keys}, {values})"
|
| 290 |
+
for k, v in zip(keys, values):
|
| 291 |
+
new_kwargs[k] = v
|
| 292 |
+
return new_kwargs
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
def parse_list(config, key, dtype=int):
|
| 296 |
+
"""Parse a list of values for the key if the value is a string. The values are separated by a comma.
|
| 297 |
+
Modifies the config in place.
|
| 298 |
+
"""
|
| 299 |
+
if key in config:
|
| 300 |
+
if isinstance(config[key], str):
|
| 301 |
+
config[key] = list(map(dtype, config[key].split(',')))
|
| 302 |
+
assert isinstance(config[key], list) and all([isinstance(e, dtype) for e in config[key]]
|
| 303 |
+
), f"{key} should be a list of values dtype {dtype}. Given {config[key]} of type {type(config[key])} with values of type {[type(e) for e in config[key]]}."
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def get_model_config(model_name, model_version=None):
|
| 307 |
+
"""Find and parse the .json config file for the model.
|
| 308 |
+
|
| 309 |
+
Args:
|
| 310 |
+
model_name (str): name of the model. The config file should be named config_{model_name}[_{model_version}].json under the models/{model_name} directory.
|
| 311 |
+
model_version (str, optional): Specific config version. If specified config_{model_name}_{model_version}.json is searched for and used. Otherwise config_{model_name}.json is used. Defaults to None.
|
| 312 |
+
|
| 313 |
+
Returns:
|
| 314 |
+
easydict: the config dictionary for the model.
|
| 315 |
+
"""
|
| 316 |
+
config_fname = f"config_{model_name}_{model_version}.json" if model_version is not None else f"config_{model_name}.json"
|
| 317 |
+
config_file = os.path.join(ROOT, "models", model_name, config_fname)
|
| 318 |
+
if not os.path.exists(config_file):
|
| 319 |
+
return None
|
| 320 |
+
|
| 321 |
+
with open(config_file, "r") as f:
|
| 322 |
+
config = edict(json.load(f))
|
| 323 |
+
|
| 324 |
+
# handle dictionary inheritance
|
| 325 |
+
# only training config is supported for inheritance
|
| 326 |
+
if "inherit" in config.train and config.train.inherit is not None:
|
| 327 |
+
inherit_config = get_model_config(config.train["inherit"]).train
|
| 328 |
+
for key, value in inherit_config.items():
|
| 329 |
+
if key not in config.train:
|
| 330 |
+
config.train[key] = value
|
| 331 |
+
return edict(config)
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
def update_model_config(config, mode, model_name, model_version=None, strict=False):
|
| 335 |
+
model_config = get_model_config(model_name, model_version)
|
| 336 |
+
if model_config is not None:
|
| 337 |
+
config = {**config, **
|
| 338 |
+
flatten({**model_config.model, **model_config[mode]})}
|
| 339 |
+
elif strict:
|
| 340 |
+
raise ValueError(f"Config file for model {model_name} not found.")
|
| 341 |
+
return config
|
| 342 |
+
|
| 343 |
+
|
| 344 |
+
def check_choices(name, value, choices):
|
| 345 |
+
# return # No checks in dev branch
|
| 346 |
+
if value not in choices:
|
| 347 |
+
raise ValueError(f"{name} {value} not in supported choices {choices}")
|
| 348 |
+
|
| 349 |
+
|
| 350 |
+
KEYS_TYPE_BOOL = ["use_amp", "distributed", "use_shared_dict", "same_lr", "aug", "three_phase",
|
| 351 |
+
"prefetch", "cycle_momentum"] # Casting is not necessary as their int casted values in config are 0 or 1
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
def get_config(model_name, mode='train', dataset=None, **overwrite_kwargs):
|
| 355 |
+
"""Main entry point to get the config for the model.
|
| 356 |
+
|
| 357 |
+
Args:
|
| 358 |
+
model_name (str): name of the desired model.
|
| 359 |
+
mode (str, optional): "train" or "infer". Defaults to 'train'.
|
| 360 |
+
dataset (str, optional): If specified, the corresponding dataset configuration is loaded as well. Defaults to None.
|
| 361 |
+
|
| 362 |
+
Keyword Args: key-value pairs of arguments to overwrite the default config.
|
| 363 |
+
|
| 364 |
+
The order of precedence for overwriting the config is (Higher precedence first):
|
| 365 |
+
# 1. overwrite_kwargs
|
| 366 |
+
# 2. "config_version": Config file version if specified in overwrite_kwargs. The corresponding config loaded is config_{model_name}_{config_version}.json
|
| 367 |
+
# 3. "version_name": Default Model version specific config specified in overwrite_kwargs. The corresponding config loaded is config_{model_name}_{version_name}.json
|
| 368 |
+
# 4. common_config: Default config for all models specified in COMMON_CONFIG
|
| 369 |
+
|
| 370 |
+
Returns:
|
| 371 |
+
easydict: The config dictionary for the model.
|
| 372 |
+
"""
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
check_choices("Model", model_name, ["zoedepth", "zoedepth_nk"])
|
| 376 |
+
check_choices("Mode", mode, ["train", "infer", "eval"])
|
| 377 |
+
if mode == "train":
|
| 378 |
+
check_choices("Dataset", dataset, ["nyu", "kitti", "mix", None])
|
| 379 |
+
|
| 380 |
+
config = flatten({**COMMON_CONFIG, **COMMON_TRAINING_CONFIG})
|
| 381 |
+
config = update_model_config(config, mode, model_name)
|
| 382 |
+
|
| 383 |
+
# update with model version specific config
|
| 384 |
+
version_name = overwrite_kwargs.get("version_name", config["version_name"])
|
| 385 |
+
config = update_model_config(config, mode, model_name, version_name)
|
| 386 |
+
|
| 387 |
+
# update with config version if specified
|
| 388 |
+
config_version = overwrite_kwargs.get("config_version", None)
|
| 389 |
+
if config_version is not None:
|
| 390 |
+
print("Overwriting config with config_version", config_version)
|
| 391 |
+
config = update_model_config(config, mode, model_name, config_version)
|
| 392 |
+
|
| 393 |
+
# update with overwrite_kwargs
|
| 394 |
+
# Combined args are useful for hyperparameter search
|
| 395 |
+
overwrite_kwargs = split_combined_args(overwrite_kwargs)
|
| 396 |
+
config = {**config, **overwrite_kwargs}
|
| 397 |
+
|
| 398 |
+
# Casting to bool # TODO: Not necessary. Remove and test
|
| 399 |
+
for key in KEYS_TYPE_BOOL:
|
| 400 |
+
if key in config:
|
| 401 |
+
config[key] = bool(config[key])
|
| 402 |
+
|
| 403 |
+
# Model specific post processing of config
|
| 404 |
+
parse_list(config, "n_attractors")
|
| 405 |
+
|
| 406 |
+
# adjust n_bins for each bin configuration if bin_conf is given and n_bins is passed in overwrite_kwargs
|
| 407 |
+
if 'bin_conf' in config and 'n_bins' in overwrite_kwargs:
|
| 408 |
+
bin_conf = config['bin_conf'] # list of dicts
|
| 409 |
+
n_bins = overwrite_kwargs['n_bins']
|
| 410 |
+
new_bin_conf = []
|
| 411 |
+
for conf in bin_conf:
|
| 412 |
+
conf['n_bins'] = n_bins
|
| 413 |
+
new_bin_conf.append(conf)
|
| 414 |
+
config['bin_conf'] = new_bin_conf
|
| 415 |
+
|
| 416 |
+
if mode == "train":
|
| 417 |
+
orig_dataset = dataset
|
| 418 |
+
if dataset == "mix":
|
| 419 |
+
dataset = 'nyu' # Use nyu as default for mix. Dataset config is changed accordingly while loading the dataloader
|
| 420 |
+
if dataset is not None:
|
| 421 |
+
config['project'] = f"MonoDepth3-{orig_dataset}" # Set project for wandb
|
| 422 |
+
|
| 423 |
+
if dataset is not None:
|
| 424 |
+
config['dataset'] = dataset
|
| 425 |
+
config = {**DATASETS_CONFIG[dataset], **config}
|
| 426 |
+
|
| 427 |
+
|
| 428 |
+
config['model'] = model_name
|
| 429 |
+
typed_config = {k: infer_type(v) for k, v in config.items()}
|
| 430 |
+
# add hostname to config
|
| 431 |
+
config['hostname'] = platform.node()
|
| 432 |
+
return edict(typed_config)
|
| 433 |
+
|
| 434 |
+
|
| 435 |
+
def change_dataset(config, new_dataset):
|
| 436 |
+
config.update(DATASETS_CONFIG[new_dataset])
|
| 437 |
+
return config
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/easydict/__init__.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
EasyDict
|
| 3 |
+
Copy/pasted from https://github.com/makinacorpus/easydict
|
| 4 |
+
Original author: Mathieu Leplatre <mathieu.leplatre@makina-corpus.com>
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
class EasyDict(dict):
|
| 8 |
+
"""
|
| 9 |
+
Get attributes
|
| 10 |
+
|
| 11 |
+
>>> d = EasyDict({'foo':3})
|
| 12 |
+
>>> d['foo']
|
| 13 |
+
3
|
| 14 |
+
>>> d.foo
|
| 15 |
+
3
|
| 16 |
+
>>> d.bar
|
| 17 |
+
Traceback (most recent call last):
|
| 18 |
+
...
|
| 19 |
+
AttributeError: 'EasyDict' object has no attribute 'bar'
|
| 20 |
+
|
| 21 |
+
Works recursively
|
| 22 |
+
|
| 23 |
+
>>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}})
|
| 24 |
+
>>> isinstance(d.bar, dict)
|
| 25 |
+
True
|
| 26 |
+
>>> d.bar.x
|
| 27 |
+
1
|
| 28 |
+
|
| 29 |
+
Bullet-proof
|
| 30 |
+
|
| 31 |
+
>>> EasyDict({})
|
| 32 |
+
{}
|
| 33 |
+
>>> EasyDict(d={})
|
| 34 |
+
{}
|
| 35 |
+
>>> EasyDict(None)
|
| 36 |
+
{}
|
| 37 |
+
>>> d = {'a': 1}
|
| 38 |
+
>>> EasyDict(**d)
|
| 39 |
+
{'a': 1}
|
| 40 |
+
>>> EasyDict((('a', 1), ('b', 2)))
|
| 41 |
+
{'a': 1, 'b': 2}
|
| 42 |
+
|
| 43 |
+
Set attributes
|
| 44 |
+
|
| 45 |
+
>>> d = EasyDict()
|
| 46 |
+
>>> d.foo = 3
|
| 47 |
+
>>> d.foo
|
| 48 |
+
3
|
| 49 |
+
>>> d.bar = {'prop': 'value'}
|
| 50 |
+
>>> d.bar.prop
|
| 51 |
+
'value'
|
| 52 |
+
>>> d
|
| 53 |
+
{'foo': 3, 'bar': {'prop': 'value'}}
|
| 54 |
+
>>> d.bar.prop = 'newer'
|
| 55 |
+
>>> d.bar.prop
|
| 56 |
+
'newer'
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
Values extraction
|
| 60 |
+
|
| 61 |
+
>>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]})
|
| 62 |
+
>>> isinstance(d.bar, list)
|
| 63 |
+
True
|
| 64 |
+
>>> from operator import attrgetter
|
| 65 |
+
>>> list(map(attrgetter('x'), d.bar))
|
| 66 |
+
[1, 3]
|
| 67 |
+
>>> list(map(attrgetter('y'), d.bar))
|
| 68 |
+
[2, 4]
|
| 69 |
+
>>> d = EasyDict()
|
| 70 |
+
>>> list(d.keys())
|
| 71 |
+
[]
|
| 72 |
+
>>> d = EasyDict(foo=3, bar=dict(x=1, y=2))
|
| 73 |
+
>>> d.foo
|
| 74 |
+
3
|
| 75 |
+
>>> d.bar.x
|
| 76 |
+
1
|
| 77 |
+
|
| 78 |
+
Still like a dict though
|
| 79 |
+
|
| 80 |
+
>>> o = EasyDict({'clean':True})
|
| 81 |
+
>>> list(o.items())
|
| 82 |
+
[('clean', True)]
|
| 83 |
+
|
| 84 |
+
And like a class
|
| 85 |
+
|
| 86 |
+
>>> class Flower(EasyDict):
|
| 87 |
+
... power = 1
|
| 88 |
+
...
|
| 89 |
+
>>> f = Flower()
|
| 90 |
+
>>> f.power
|
| 91 |
+
1
|
| 92 |
+
>>> f = Flower({'height': 12})
|
| 93 |
+
>>> f.height
|
| 94 |
+
12
|
| 95 |
+
>>> f['power']
|
| 96 |
+
1
|
| 97 |
+
>>> sorted(f.keys())
|
| 98 |
+
['height', 'power']
|
| 99 |
+
|
| 100 |
+
update and pop items
|
| 101 |
+
>>> d = EasyDict(a=1, b='2')
|
| 102 |
+
>>> e = EasyDict(c=3.0, a=9.0)
|
| 103 |
+
>>> d.update(e)
|
| 104 |
+
>>> d.c
|
| 105 |
+
3.0
|
| 106 |
+
>>> d['c']
|
| 107 |
+
3.0
|
| 108 |
+
>>> d.get('c')
|
| 109 |
+
3.0
|
| 110 |
+
>>> d.update(a=4, b=4)
|
| 111 |
+
>>> d.b
|
| 112 |
+
4
|
| 113 |
+
>>> d.pop('a')
|
| 114 |
+
4
|
| 115 |
+
>>> d.a
|
| 116 |
+
Traceback (most recent call last):
|
| 117 |
+
...
|
| 118 |
+
AttributeError: 'EasyDict' object has no attribute 'a'
|
| 119 |
+
"""
|
| 120 |
+
def __init__(self, d=None, **kwargs):
|
| 121 |
+
if d is None:
|
| 122 |
+
d = {}
|
| 123 |
+
else:
|
| 124 |
+
d = dict(d)
|
| 125 |
+
if kwargs:
|
| 126 |
+
d.update(**kwargs)
|
| 127 |
+
for k, v in d.items():
|
| 128 |
+
setattr(self, k, v)
|
| 129 |
+
# Class attributes
|
| 130 |
+
for k in self.__class__.__dict__.keys():
|
| 131 |
+
if not (k.startswith('__') and k.endswith('__')) and not k in ('update', 'pop'):
|
| 132 |
+
setattr(self, k, getattr(self, k))
|
| 133 |
+
|
| 134 |
+
def __setattr__(self, name, value):
|
| 135 |
+
if isinstance(value, (list, tuple)):
|
| 136 |
+
value = [self.__class__(x)
|
| 137 |
+
if isinstance(x, dict) else x for x in value]
|
| 138 |
+
elif isinstance(value, dict) and not isinstance(value, self.__class__):
|
| 139 |
+
value = self.__class__(value)
|
| 140 |
+
super(EasyDict, self).__setattr__(name, value)
|
| 141 |
+
super(EasyDict, self).__setitem__(name, value)
|
| 142 |
+
|
| 143 |
+
__setitem__ = __setattr__
|
| 144 |
+
|
| 145 |
+
def update(self, e=None, **f):
|
| 146 |
+
d = e or dict()
|
| 147 |
+
d.update(f)
|
| 148 |
+
for k in d:
|
| 149 |
+
setattr(self, k, d[k])
|
| 150 |
+
|
| 151 |
+
def pop(self, k, d=None):
|
| 152 |
+
delattr(self, k)
|
| 153 |
+
return super(EasyDict, self).pop(k, d)
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
if __name__ == "__main__":
|
| 157 |
+
import doctest
|
| 158 |
+
doctest.testmod()
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/geometry.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
| 25 |
+
import numpy as np
|
| 26 |
+
|
| 27 |
+
def get_intrinsics(H,W):
|
| 28 |
+
"""
|
| 29 |
+
Intrinsics for a pinhole camera model.
|
| 30 |
+
Assume fov of 55 degrees and central principal point.
|
| 31 |
+
"""
|
| 32 |
+
f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0)
|
| 33 |
+
cx = 0.5 * W
|
| 34 |
+
cy = 0.5 * H
|
| 35 |
+
return np.array([[f, 0, cx],
|
| 36 |
+
[0, f, cy],
|
| 37 |
+
[0, 0, 1]])
|
| 38 |
+
|
| 39 |
+
def depth_to_points(depth, R=None, t=None):
|
| 40 |
+
|
| 41 |
+
K = get_intrinsics(depth.shape[1], depth.shape[2])
|
| 42 |
+
Kinv = np.linalg.inv(K)
|
| 43 |
+
if R is None:
|
| 44 |
+
R = np.eye(3)
|
| 45 |
+
if t is None:
|
| 46 |
+
t = np.zeros(3)
|
| 47 |
+
|
| 48 |
+
# M converts from your coordinate to PyTorch3D's coordinate system
|
| 49 |
+
M = np.eye(3)
|
| 50 |
+
M[0, 0] = -1.0
|
| 51 |
+
M[1, 1] = -1.0
|
| 52 |
+
|
| 53 |
+
height, width = depth.shape[1:3]
|
| 54 |
+
|
| 55 |
+
x = np.arange(width)
|
| 56 |
+
y = np.arange(height)
|
| 57 |
+
coord = np.stack(np.meshgrid(x, y), -1)
|
| 58 |
+
coord = np.concatenate((coord, np.ones_like(coord)[:, :, [0]]), -1) # z=1
|
| 59 |
+
coord = coord.astype(np.float32)
|
| 60 |
+
# coord = torch.as_tensor(coord, dtype=torch.float32, device=device)
|
| 61 |
+
coord = coord[None] # bs, h, w, 3
|
| 62 |
+
|
| 63 |
+
D = depth[:, :, :, None, None]
|
| 64 |
+
# print(D.shape, Kinv[None, None, None, ...].shape, coord[:, :, :, :, None].shape )
|
| 65 |
+
pts3D_1 = D * Kinv[None, None, None, ...] @ coord[:, :, :, :, None]
|
| 66 |
+
# pts3D_1 live in your coordinate system. Convert them to Py3D's
|
| 67 |
+
pts3D_1 = M[None, None, None, ...] @ pts3D_1
|
| 68 |
+
# from reference to targe tviewpoint
|
| 69 |
+
pts3D_2 = R[None, None, None, ...] @ pts3D_1 + t[None, None, None, :, None]
|
| 70 |
+
# pts3D_2 = pts3D_1
|
| 71 |
+
# depth_2 = pts3D_2[:, :, :, 2, :] # b,1,h,w
|
| 72 |
+
return pts3D_2[:, :, :, :3, 0][0]
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def create_triangles(h, w, mask=None):
|
| 76 |
+
"""
|
| 77 |
+
Reference: https://github.com/google-research/google-research/blob/e96197de06613f1b027d20328e06d69829fa5a89/infinite_nature/render_utils.py#L68
|
| 78 |
+
Creates mesh triangle indices from a given pixel grid size.
|
| 79 |
+
This function is not and need not be differentiable as triangle indices are
|
| 80 |
+
fixed.
|
| 81 |
+
Args:
|
| 82 |
+
h: (int) denoting the height of the image.
|
| 83 |
+
w: (int) denoting the width of the image.
|
| 84 |
+
Returns:
|
| 85 |
+
triangles: 2D numpy array of indices (int) with shape (2(W-1)(H-1) x 3)
|
| 86 |
+
"""
|
| 87 |
+
x, y = np.meshgrid(range(w - 1), range(h - 1))
|
| 88 |
+
tl = y * w + x
|
| 89 |
+
tr = y * w + x + 1
|
| 90 |
+
bl = (y + 1) * w + x
|
| 91 |
+
br = (y + 1) * w + x + 1
|
| 92 |
+
triangles = np.array([tl, bl, tr, br, tr, bl])
|
| 93 |
+
triangles = np.transpose(triangles, (1, 2, 0)).reshape(
|
| 94 |
+
((w - 1) * (h - 1) * 2, 3))
|
| 95 |
+
if mask is not None:
|
| 96 |
+
mask = mask.reshape(-1)
|
| 97 |
+
triangles = triangles[mask[triangles].all(1)]
|
| 98 |
+
return triangles
|
microsoftexcel-controlnet/annotator/zoe/zoedepth/utils/misc.py
ADDED
|
@@ -0,0 +1,368 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MIT License
|
| 2 |
+
|
| 3 |
+
# Copyright (c) 2022 Intelligent Systems Lab Org
|
| 4 |
+
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
# in the Software without restriction, including without limitation the rights
|
| 8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
# furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
# copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
# SOFTWARE.
|
| 22 |
+
|
| 23 |
+
# File author: Shariq Farooq Bhat
|
| 24 |
+
|
| 25 |
+
"""Miscellaneous utility functions."""
|
| 26 |
+
|
| 27 |
+
from scipy import ndimage
|
| 28 |
+
|
| 29 |
+
import base64
|
| 30 |
+
import math
|
| 31 |
+
import re
|
| 32 |
+
from io import BytesIO
|
| 33 |
+
|
| 34 |
+
import matplotlib
|
| 35 |
+
import matplotlib.cm
|
| 36 |
+
import numpy as np
|
| 37 |
+
import requests
|
| 38 |
+
import torch
|
| 39 |
+
import torch.distributed as dist
|
| 40 |
+
import torch.nn
|
| 41 |
+
import torch.nn as nn
|
| 42 |
+
import torch.utils.data.distributed
|
| 43 |
+
from PIL import Image
|
| 44 |
+
from torchvision.transforms import ToTensor
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class RunningAverage:
|
| 48 |
+
def __init__(self):
|
| 49 |
+
self.avg = 0
|
| 50 |
+
self.count = 0
|
| 51 |
+
|
| 52 |
+
def append(self, value):
|
| 53 |
+
self.avg = (value + self.count * self.avg) / (self.count + 1)
|
| 54 |
+
self.count += 1
|
| 55 |
+
|
| 56 |
+
def get_value(self):
|
| 57 |
+
return self.avg
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def denormalize(x):
|
| 61 |
+
"""Reverses the imagenet normalization applied to the input.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
x (torch.Tensor - shape(N,3,H,W)): input tensor
|
| 65 |
+
|
| 66 |
+
Returns:
|
| 67 |
+
torch.Tensor - shape(N,3,H,W): Denormalized input
|
| 68 |
+
"""
|
| 69 |
+
mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device)
|
| 70 |
+
std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device)
|
| 71 |
+
return x * std + mean
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
class RunningAverageDict:
|
| 75 |
+
"""A dictionary of running averages."""
|
| 76 |
+
def __init__(self):
|
| 77 |
+
self._dict = None
|
| 78 |
+
|
| 79 |
+
def update(self, new_dict):
|
| 80 |
+
if new_dict is None:
|
| 81 |
+
return
|
| 82 |
+
|
| 83 |
+
if self._dict is None:
|
| 84 |
+
self._dict = dict()
|
| 85 |
+
for key, value in new_dict.items():
|
| 86 |
+
self._dict[key] = RunningAverage()
|
| 87 |
+
|
| 88 |
+
for key, value in new_dict.items():
|
| 89 |
+
self._dict[key].append(value)
|
| 90 |
+
|
| 91 |
+
def get_value(self):
|
| 92 |
+
if self._dict is None:
|
| 93 |
+
return None
|
| 94 |
+
return {key: value.get_value() for key, value in self._dict.items()}
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def colorize(value, vmin=None, vmax=None, cmap='gray_r', invalid_val=-99, invalid_mask=None, background_color=(128, 128, 128, 255), gamma_corrected=False, value_transform=None):
|
| 98 |
+
"""Converts a depth map to a color image.
|
| 99 |
+
|
| 100 |
+
Args:
|
| 101 |
+
value (torch.Tensor, numpy.ndarry): Input depth map. Shape: (H, W) or (1, H, W) or (1, 1, H, W). All singular dimensions are squeezed
|
| 102 |
+
vmin (float, optional): vmin-valued entries are mapped to start color of cmap. If None, value.min() is used. Defaults to None.
|
| 103 |
+
vmax (float, optional): vmax-valued entries are mapped to end color of cmap. If None, value.max() is used. Defaults to None.
|
| 104 |
+
cmap (str, optional): matplotlib colormap to use. Defaults to 'magma_r'.
|
| 105 |
+
invalid_val (int, optional): Specifies value of invalid pixels that should be colored as 'background_color'. Defaults to -99.
|
| 106 |
+
invalid_mask (numpy.ndarray, optional): Boolean mask for invalid regions. Defaults to None.
|
| 107 |
+
background_color (tuple[int], optional): 4-tuple RGB color to give to invalid pixels. Defaults to (128, 128, 128, 255).
|
| 108 |
+
gamma_corrected (bool, optional): Apply gamma correction to colored image. Defaults to False.
|
| 109 |
+
value_transform (Callable, optional): Apply transform function to valid pixels before coloring. Defaults to None.
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
numpy.ndarray, dtype - uint8: Colored depth map. Shape: (H, W, 4)
|
| 113 |
+
"""
|
| 114 |
+
if isinstance(value, torch.Tensor):
|
| 115 |
+
value = value.detach().cpu().numpy()
|
| 116 |
+
|
| 117 |
+
value = value.squeeze()
|
| 118 |
+
if invalid_mask is None:
|
| 119 |
+
invalid_mask = value == invalid_val
|
| 120 |
+
mask = np.logical_not(invalid_mask)
|
| 121 |
+
|
| 122 |
+
# normalize
|
| 123 |
+
vmin = np.percentile(value[mask],2) if vmin is None else vmin
|
| 124 |
+
vmax = np.percentile(value[mask],85) if vmax is None else vmax
|
| 125 |
+
if vmin != vmax:
|
| 126 |
+
value = (value - vmin) / (vmax - vmin) # vmin..vmax
|
| 127 |
+
else:
|
| 128 |
+
# Avoid 0-division
|
| 129 |
+
value = value * 0.
|
| 130 |
+
|
| 131 |
+
# squeeze last dim if it exists
|
| 132 |
+
# grey out the invalid values
|
| 133 |
+
|
| 134 |
+
value[invalid_mask] = np.nan
|
| 135 |
+
cmapper = matplotlib.cm.get_cmap(cmap)
|
| 136 |
+
if value_transform:
|
| 137 |
+
value = value_transform(value)
|
| 138 |
+
# value = value / value.max()
|
| 139 |
+
value = cmapper(value, bytes=True) # (nxmx4)
|
| 140 |
+
|
| 141 |
+
# img = value[:, :, :]
|
| 142 |
+
img = value[...]
|
| 143 |
+
img[invalid_mask] = background_color
|
| 144 |
+
|
| 145 |
+
# return img.transpose((2, 0, 1))
|
| 146 |
+
if gamma_corrected:
|
| 147 |
+
# gamma correction
|
| 148 |
+
img = img / 255
|
| 149 |
+
img = np.power(img, 2.2)
|
| 150 |
+
img = img * 255
|
| 151 |
+
img = img.astype(np.uint8)
|
| 152 |
+
return img
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def count_parameters(model, include_all=False):
|
| 156 |
+
return sum(p.numel() for p in model.parameters() if p.requires_grad or include_all)
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def compute_errors(gt, pred):
|
| 160 |
+
"""Compute metrics for 'pred' compared to 'gt'
|
| 161 |
+
|
| 162 |
+
Args:
|
| 163 |
+
gt (numpy.ndarray): Ground truth values
|
| 164 |
+
pred (numpy.ndarray): Predicted values
|
| 165 |
+
|
| 166 |
+
gt.shape should be equal to pred.shape
|
| 167 |
+
|
| 168 |
+
Returns:
|
| 169 |
+
dict: Dictionary containing the following metrics:
|
| 170 |
+
'a1': Delta1 accuracy: Fraction of pixels that are within a scale factor of 1.25
|
| 171 |
+
'a2': Delta2 accuracy: Fraction of pixels that are within a scale factor of 1.25^2
|
| 172 |
+
'a3': Delta3 accuracy: Fraction of pixels that are within a scale factor of 1.25^3
|
| 173 |
+
'abs_rel': Absolute relative error
|
| 174 |
+
'rmse': Root mean squared error
|
| 175 |
+
'log_10': Absolute log10 error
|
| 176 |
+
'sq_rel': Squared relative error
|
| 177 |
+
'rmse_log': Root mean squared error on the log scale
|
| 178 |
+
'silog': Scale invariant log error
|
| 179 |
+
"""
|
| 180 |
+
thresh = np.maximum((gt / pred), (pred / gt))
|
| 181 |
+
a1 = (thresh < 1.25).mean()
|
| 182 |
+
a2 = (thresh < 1.25 ** 2).mean()
|
| 183 |
+
a3 = (thresh < 1.25 ** 3).mean()
|
| 184 |
+
|
| 185 |
+
abs_rel = np.mean(np.abs(gt - pred) / gt)
|
| 186 |
+
sq_rel = np.mean(((gt - pred) ** 2) / gt)
|
| 187 |
+
|
| 188 |
+
rmse = (gt - pred) ** 2
|
| 189 |
+
rmse = np.sqrt(rmse.mean())
|
| 190 |
+
|
| 191 |
+
rmse_log = (np.log(gt) - np.log(pred)) ** 2
|
| 192 |
+
rmse_log = np.sqrt(rmse_log.mean())
|
| 193 |
+
|
| 194 |
+
err = np.log(pred) - np.log(gt)
|
| 195 |
+
silog = np.sqrt(np.mean(err ** 2) - np.mean(err) ** 2) * 100
|
| 196 |
+
|
| 197 |
+
log_10 = (np.abs(np.log10(gt) - np.log10(pred))).mean()
|
| 198 |
+
return dict(a1=a1, a2=a2, a3=a3, abs_rel=abs_rel, rmse=rmse, log_10=log_10, rmse_log=rmse_log,
|
| 199 |
+
silog=silog, sq_rel=sq_rel)
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
def compute_metrics(gt, pred, interpolate=True, garg_crop=False, eigen_crop=True, dataset='nyu', min_depth_eval=0.1, max_depth_eval=10, **kwargs):
|
| 203 |
+
"""Compute metrics of predicted depth maps. Applies cropping and masking as necessary or specified via arguments. Refer to compute_errors for more details on metrics.
|
| 204 |
+
"""
|
| 205 |
+
if 'config' in kwargs:
|
| 206 |
+
config = kwargs['config']
|
| 207 |
+
garg_crop = config.garg_crop
|
| 208 |
+
eigen_crop = config.eigen_crop
|
| 209 |
+
min_depth_eval = config.min_depth_eval
|
| 210 |
+
max_depth_eval = config.max_depth_eval
|
| 211 |
+
|
| 212 |
+
if gt.shape[-2:] != pred.shape[-2:] and interpolate:
|
| 213 |
+
pred = nn.functional.interpolate(
|
| 214 |
+
pred, gt.shape[-2:], mode='bilinear', align_corners=True)
|
| 215 |
+
|
| 216 |
+
pred = pred.squeeze().cpu().numpy()
|
| 217 |
+
pred[pred < min_depth_eval] = min_depth_eval
|
| 218 |
+
pred[pred > max_depth_eval] = max_depth_eval
|
| 219 |
+
pred[np.isinf(pred)] = max_depth_eval
|
| 220 |
+
pred[np.isnan(pred)] = min_depth_eval
|
| 221 |
+
|
| 222 |
+
gt_depth = gt.squeeze().cpu().numpy()
|
| 223 |
+
valid_mask = np.logical_and(
|
| 224 |
+
gt_depth > min_depth_eval, gt_depth < max_depth_eval)
|
| 225 |
+
|
| 226 |
+
if garg_crop or eigen_crop:
|
| 227 |
+
gt_height, gt_width = gt_depth.shape
|
| 228 |
+
eval_mask = np.zeros(valid_mask.shape)
|
| 229 |
+
|
| 230 |
+
if garg_crop:
|
| 231 |
+
eval_mask[int(0.40810811 * gt_height):int(0.99189189 * gt_height),
|
| 232 |
+
int(0.03594771 * gt_width):int(0.96405229 * gt_width)] = 1
|
| 233 |
+
|
| 234 |
+
elif eigen_crop:
|
| 235 |
+
# print("-"*10, " EIGEN CROP ", "-"*10)
|
| 236 |
+
if dataset == 'kitti':
|
| 237 |
+
eval_mask[int(0.3324324 * gt_height):int(0.91351351 * gt_height),
|
| 238 |
+
int(0.0359477 * gt_width):int(0.96405229 * gt_width)] = 1
|
| 239 |
+
else:
|
| 240 |
+
# assert gt_depth.shape == (480, 640), "Error: Eigen crop is currently only valid for (480, 640) images"
|
| 241 |
+
eval_mask[45:471, 41:601] = 1
|
| 242 |
+
else:
|
| 243 |
+
eval_mask = np.ones(valid_mask.shape)
|
| 244 |
+
valid_mask = np.logical_and(valid_mask, eval_mask)
|
| 245 |
+
return compute_errors(gt_depth[valid_mask], pred[valid_mask])
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
#################################### Model uilts ################################################
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
def parallelize(config, model, find_unused_parameters=True):
|
| 252 |
+
|
| 253 |
+
if config.gpu is not None:
|
| 254 |
+
torch.cuda.set_device(config.gpu)
|
| 255 |
+
model = model.cuda(config.gpu)
|
| 256 |
+
|
| 257 |
+
config.multigpu = False
|
| 258 |
+
if config.distributed:
|
| 259 |
+
# Use DDP
|
| 260 |
+
config.multigpu = True
|
| 261 |
+
config.rank = config.rank * config.ngpus_per_node + config.gpu
|
| 262 |
+
dist.init_process_group(backend=config.dist_backend, init_method=config.dist_url,
|
| 263 |
+
world_size=config.world_size, rank=config.rank)
|
| 264 |
+
config.batch_size = int(config.batch_size / config.ngpus_per_node)
|
| 265 |
+
# config.batch_size = 8
|
| 266 |
+
config.workers = int(
|
| 267 |
+
(config.num_workers + config.ngpus_per_node - 1) / config.ngpus_per_node)
|
| 268 |
+
print("Device", config.gpu, "Rank", config.rank, "batch size",
|
| 269 |
+
config.batch_size, "Workers", config.workers)
|
| 270 |
+
torch.cuda.set_device(config.gpu)
|
| 271 |
+
model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
|
| 272 |
+
model = model.cuda(config.gpu)
|
| 273 |
+
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.gpu], output_device=config.gpu,
|
| 274 |
+
find_unused_parameters=find_unused_parameters)
|
| 275 |
+
|
| 276 |
+
elif config.gpu is None:
|
| 277 |
+
# Use DP
|
| 278 |
+
config.multigpu = True
|
| 279 |
+
model = model.cuda()
|
| 280 |
+
model = torch.nn.DataParallel(model)
|
| 281 |
+
|
| 282 |
+
return model
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
#################################################################################################
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
#####################################################################################################
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
class colors:
|
| 292 |
+
'''Colors class:
|
| 293 |
+
Reset all colors with colors.reset
|
| 294 |
+
Two subclasses fg for foreground and bg for background.
|
| 295 |
+
Use as colors.subclass.colorname.
|
| 296 |
+
i.e. colors.fg.red or colors.bg.green
|
| 297 |
+
Also, the generic bold, disable, underline, reverse, strikethrough,
|
| 298 |
+
and invisible work with the main class
|
| 299 |
+
i.e. colors.bold
|
| 300 |
+
'''
|
| 301 |
+
reset = '\033[0m'
|
| 302 |
+
bold = '\033[01m'
|
| 303 |
+
disable = '\033[02m'
|
| 304 |
+
underline = '\033[04m'
|
| 305 |
+
reverse = '\033[07m'
|
| 306 |
+
strikethrough = '\033[09m'
|
| 307 |
+
invisible = '\033[08m'
|
| 308 |
+
|
| 309 |
+
class fg:
|
| 310 |
+
black = '\033[30m'
|
| 311 |
+
red = '\033[31m'
|
| 312 |
+
green = '\033[32m'
|
| 313 |
+
orange = '\033[33m'
|
| 314 |
+
blue = '\033[34m'
|
| 315 |
+
purple = '\033[35m'
|
| 316 |
+
cyan = '\033[36m'
|
| 317 |
+
lightgrey = '\033[37m'
|
| 318 |
+
darkgrey = '\033[90m'
|
| 319 |
+
lightred = '\033[91m'
|
| 320 |
+
lightgreen = '\033[92m'
|
| 321 |
+
yellow = '\033[93m'
|
| 322 |
+
lightblue = '\033[94m'
|
| 323 |
+
pink = '\033[95m'
|
| 324 |
+
lightcyan = '\033[96m'
|
| 325 |
+
|
| 326 |
+
class bg:
|
| 327 |
+
black = '\033[40m'
|
| 328 |
+
red = '\033[41m'
|
| 329 |
+
green = '\033[42m'
|
| 330 |
+
orange = '\033[43m'
|
| 331 |
+
blue = '\033[44m'
|
| 332 |
+
purple = '\033[45m'
|
| 333 |
+
cyan = '\033[46m'
|
| 334 |
+
lightgrey = '\033[47m'
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def printc(text, color):
|
| 338 |
+
print(f"{color}{text}{colors.reset}")
|
| 339 |
+
|
| 340 |
+
############################################
|
| 341 |
+
|
| 342 |
+
def get_image_from_url(url):
|
| 343 |
+
response = requests.get(url)
|
| 344 |
+
img = Image.open(BytesIO(response.content)).convert("RGB")
|
| 345 |
+
return img
|
| 346 |
+
|
| 347 |
+
def url_to_torch(url, size=(384, 384)):
|
| 348 |
+
img = get_image_from_url(url)
|
| 349 |
+
img = img.resize(size, Image.ANTIALIAS)
|
| 350 |
+
img = torch.from_numpy(np.asarray(img)).float()
|
| 351 |
+
img = img.permute(2, 0, 1)
|
| 352 |
+
img.div_(255)
|
| 353 |
+
return img
|
| 354 |
+
|
| 355 |
+
def pil_to_batched_tensor(img):
|
| 356 |
+
return ToTensor()(img).unsqueeze(0)
|
| 357 |
+
|
| 358 |
+
def save_raw_16bit(depth, fpath="raw.png"):
|
| 359 |
+
if isinstance(depth, torch.Tensor):
|
| 360 |
+
depth = depth.squeeze().cpu().numpy()
|
| 361 |
+
|
| 362 |
+
assert isinstance(depth, np.ndarray), "Depth must be a torch tensor or numpy array"
|
| 363 |
+
assert depth.ndim == 2, "Depth must be 2D"
|
| 364 |
+
depth = depth * 256 # scale for 16-bit png
|
| 365 |
+
depth = depth.astype(np.uint16)
|
| 366 |
+
depth = Image.fromarray(depth)
|
| 367 |
+
depth.save(fpath)
|
| 368 |
+
print("Saved raw depth to", fpath)
|