File size: 6,630 Bytes
194b4ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import torch, gc
import cv2
import requests
import os.path
import contextlib
from PIL import Image
from modules.shared import opts, cmd_opts
from modules import processing, images, shared, devices

from torchvision.transforms import Compose
from repositories.midas.midas.dpt_depth import DPTDepthModel
from repositories.midas.midas.midas_net import MidasNet
from repositories.midas.midas.midas_net_custom import MidasNet_small
from repositories.midas.midas.transforms import Resize, NormalizeImage, PrepareForNet

import numpy as np

class SimpleDepthMapGenerator(object):
    def calculate_depth_maps(self,image,img_x,img_y,model_type,invert_depth):
        try:
            def download_file(filename, url):
                # print("Downloading midas model weights to %s" % filename)
                with open(filename, 'wb') as fout:
                    response = requests.get(url, stream=True)
                    response.raise_for_status()
                    # Write response data to file
                    for block in response.iter_content(4096):
                        fout.write(block)

            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            # model path and name
            model_dir = "./models/midas"
            # create path to model if not present
            os.makedirs(model_dir, exist_ok=True)
            # print("Loading midas model weights ..")
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

            #"dpt_large"
            if model_type == 0:
                model_path = f"{model_dir}/dpt_large-midas-2f21e586.pt"
                # print(model_path)
                if not os.path.exists(model_path):
                    download_file(model_path,"https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt")
                model = DPTDepthModel(
                    path=model_path,
                    backbone="vitl16_384",
                    non_negative=True,
                )
                net_w, net_h = 384, 384
                resize_mode = "minimal"
                normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])

            #"midas_v21"
            elif model_type == 1:
                model_path = f"{model_dir}/midas_v21-f6b98070.pt"
                # print(model_path)
                if not os.path.exists(model_path):
                    download_file(model_path,"https://github.com/AlexeyAB/MiDaS/releases/download/midas_dpt/midas_v21-f6b98070.pt")
                model = MidasNet(model_path, non_negative=True)
                net_w, net_h = 384, 384
                resize_mode="upper_bound"
                normalization = NormalizeImage(
                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                )

            #"midas_v21_small"
            elif model_type == 2:
                model_path = f"{model_dir}/midas_v21_small-70d6b9c8.pt"
                # print(model_path)
                if not os.path.exists(model_path):
                    download_file(model_path,"https://github.com/AlexeyAB/MiDaS/releases/download/midas_dpt/midas_v21_small-70d6b9c8.pt")
                model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True})
                net_w, net_h = 256, 256
                resize_mode="upper_bound"
                normalization = NormalizeImage(
                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                )

            # init transform
            transform = Compose(
                [
                    Resize(
                        img_x,
                        img_y,
                        resize_target=None,
                        keep_aspect_ratio=True,
                        ensure_multiple_of=32,
                        resize_method=resize_mode,
                        image_interpolation_method=cv2.INTER_CUBIC,
                    ),
                    normalization,
                    PrepareForNet(),
                ]
            )
            model.eval()
            # optimize
            if device == torch.device("cuda"):
                model = model.to(memory_format=torch.channels_last)
                if not cmd_opts.no_half:
                    model = model.half()
            model.to(device)

            img = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2RGB) / 255.0
            img_input = transform({"image": img})["image"]
            precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" and device == torch.device("cuda") else contextlib.nullcontext
            # compute
            with torch.no_grad(), precision_scope("cuda"):
                sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
                if device == torch.device("cuda"):
                    sample = sample.to(memory_format=torch.channels_last)
                    if not cmd_opts.no_half:
                        sample = sample.half()
                prediction = model.forward(sample)
                prediction = (
                    torch.nn.functional.interpolate(
                        prediction.unsqueeze(1),
                        size=img.shape[:2],
                        mode="bicubic",
                        align_corners=False,
                    )
                    .squeeze()
                    .cpu()
                    .numpy()
                )
            # output
            depth = prediction
            numbytes=2
            depth_min = depth.min()
            depth_max = depth.max()
            max_val = (2**(8*numbytes))-1

            # check output before normalizing and mapping to 16 bit
            if depth_max - depth_min > np.finfo("float").eps:
                out = max_val * (depth - depth_min) / (depth_max - depth_min)
            else:
                out = np.zeros(depth.shape)
            # single channel, 16 bit image
            img_output = out.astype("uint16")

            # # invert depth map
            if invert_depth:
                img_output = cv2.bitwise_not(img_output)

            # three channel, 8 bits per channel image
            img_output2 = np.zeros_like(image)
            img_output2[:,:,0] = img_output / 256.0
            img_output2[:,:,1] = img_output / 256.0
            img_output2[:,:,2] = img_output / 256.0
            img = Image.fromarray(img_output2)
            return img
        except Exception:
            raise
        finally:
            del model
            gc.collect()
            devices.torch_gc()