File size: 6,630 Bytes
194b4ef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import torch, gc
import cv2
import requests
import os.path
import contextlib
from PIL import Image
from modules.shared import opts, cmd_opts
from modules import processing, images, shared, devices
from torchvision.transforms import Compose
from repositories.midas.midas.dpt_depth import DPTDepthModel
from repositories.midas.midas.midas_net import MidasNet
from repositories.midas.midas.midas_net_custom import MidasNet_small
from repositories.midas.midas.transforms import Resize, NormalizeImage, PrepareForNet
import numpy as np
class SimpleDepthMapGenerator(object):
def calculate_depth_maps(self,image,img_x,img_y,model_type,invert_depth):
try:
def download_file(filename, url):
# print("Downloading midas model weights to %s" % filename)
with open(filename, 'wb') as fout:
response = requests.get(url, stream=True)
response.raise_for_status()
# Write response data to file
for block in response.iter_content(4096):
fout.write(block)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model path and name
model_dir = "./models/midas"
# create path to model if not present
os.makedirs(model_dir, exist_ok=True)
# print("Loading midas model weights ..")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#"dpt_large"
if model_type == 0:
model_path = f"{model_dir}/dpt_large-midas-2f21e586.pt"
# print(model_path)
if not os.path.exists(model_path):
download_file(model_path,"https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt")
model = DPTDepthModel(
path=model_path,
backbone="vitl16_384",
non_negative=True,
)
net_w, net_h = 384, 384
resize_mode = "minimal"
normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
#"midas_v21"
elif model_type == 1:
model_path = f"{model_dir}/midas_v21-f6b98070.pt"
# print(model_path)
if not os.path.exists(model_path):
download_file(model_path,"https://github.com/AlexeyAB/MiDaS/releases/download/midas_dpt/midas_v21-f6b98070.pt")
model = MidasNet(model_path, non_negative=True)
net_w, net_h = 384, 384
resize_mode="upper_bound"
normalization = NormalizeImage(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)
#"midas_v21_small"
elif model_type == 2:
model_path = f"{model_dir}/midas_v21_small-70d6b9c8.pt"
# print(model_path)
if not os.path.exists(model_path):
download_file(model_path,"https://github.com/AlexeyAB/MiDaS/releases/download/midas_dpt/midas_v21_small-70d6b9c8.pt")
model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True})
net_w, net_h = 256, 256
resize_mode="upper_bound"
normalization = NormalizeImage(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)
# init transform
transform = Compose(
[
Resize(
img_x,
img_y,
resize_target=None,
keep_aspect_ratio=True,
ensure_multiple_of=32,
resize_method=resize_mode,
image_interpolation_method=cv2.INTER_CUBIC,
),
normalization,
PrepareForNet(),
]
)
model.eval()
# optimize
if device == torch.device("cuda"):
model = model.to(memory_format=torch.channels_last)
if not cmd_opts.no_half:
model = model.half()
model.to(device)
img = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2RGB) / 255.0
img_input = transform({"image": img})["image"]
precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" and device == torch.device("cuda") else contextlib.nullcontext
# compute
with torch.no_grad(), precision_scope("cuda"):
sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
if device == torch.device("cuda"):
sample = sample.to(memory_format=torch.channels_last)
if not cmd_opts.no_half:
sample = sample.half()
prediction = model.forward(sample)
prediction = (
torch.nn.functional.interpolate(
prediction.unsqueeze(1),
size=img.shape[:2],
mode="bicubic",
align_corners=False,
)
.squeeze()
.cpu()
.numpy()
)
# output
depth = prediction
numbytes=2
depth_min = depth.min()
depth_max = depth.max()
max_val = (2**(8*numbytes))-1
# check output before normalizing and mapping to 16 bit
if depth_max - depth_min > np.finfo("float").eps:
out = max_val * (depth - depth_min) / (depth_max - depth_min)
else:
out = np.zeros(depth.shape)
# single channel, 16 bit image
img_output = out.astype("uint16")
# # invert depth map
if invert_depth:
img_output = cv2.bitwise_not(img_output)
# three channel, 8 bits per channel image
img_output2 = np.zeros_like(image)
img_output2[:,:,0] = img_output / 256.0
img_output2[:,:,1] = img_output / 256.0
img_output2[:,:,2] = img_output / 256.0
img = Image.fromarray(img_output2)
return img
except Exception:
raise
finally:
del model
gc.collect()
devices.torch_gc()
|