File size: 5,929 Bytes
5db43ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import cv2
import numpy as np
# from util.garment_heatmap import HeatmapGenerator
import torch
import torchvision.transforms as transforms
import ffmpeg
#from OpticalFlow.optical_flow import OpticalFlow

class VideoLoader:
    def __init__(self, path):
        self.path = path
        self.frames = self.load_video()
        self.min_h = 0
        self.min_w = 0
        self.max_h = self.frameHeight
        self.max_w = self.frameWidth
        self.crop2square()
        self.l = 0
        self.r = 0
        self.u = 0
        self.d = 0
        if self.frameHeight > self.frameWidth:
            self.l = (self.frameHeight - self.frameWidth) // 2
            self.r = self.l
        # self.heatmap_gen = HeatmapGenerator()
        self.post_transform = transforms.Resize((512, 512))
        self.opt_flow = None
        #self.optical_flow = OpticalFlow()



    def crop2square(self):
        if self.frameWidth > self.frameHeight:
            offset = (self.frameWidth - self.frameHeight) // 2
            self.min_w = offset
            self.max_w = offset + self.frameHeight

    def __getitem__(self, idx):
        im = self.get_image(idx)

        normalize = transforms.Normalize(
            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
        )
        resize = transforms.Resize((384, 288))

        all_transforms = transforms.Compose([normalize, resize])

        # with torch.no_grad():
        #    heatmaps = self.heatmap_gen.model(all_transforms(im))

        # heatmaps = self.post_transform(heatmaps)
        return im  # , heatmaps

    def __len__(self):
        return self.frames.shape[0]

    def set_bbox(self, min_h, min_w, max_h, max_w):
        self.min_h = min_h
        self.min_w = min_w
        self.max_h = max_h
        self.max_w = max_w

    def set_padding(self, l, r, u, d):
        self.l = l
        self.r = r
        self.u = u
        self.d = d

    def get_image(self, idx):
        frame = self.get_numpy_image(idx)
        img = torch.from_numpy(frame) / 255.0
        img = img.permute(2, 0, 1)  # CHW, BGR
        if torch.cuda.is_available():
            img = img.cuda()
        img = img.unsqueeze(0)
        img = self.post_transform(img)
        return img

    def get_numpy_image(self, idx):
        frame = self.frames[idx]
        frame = frame[self.min_h:self.max_h, self.min_w:self.max_w, :]
        if self.l > 0:
            left = np.zeros((frame.shape[0], self.l, frame.shape[2]), np.uint8)
            frame = np.concatenate((left, frame), 1)
        if self.r > 0:
            right = np.zeros((frame.shape[0], self.r, frame.shape[2]), np.uint8)
            frame = np.concatenate((frame, right), 1)
        if self.u > 0:
            up = np.zeros((self.u, frame.shape[1], frame.shape[2]), np.uint8)
            frame = np.concatenate((up, frame), 0)
        if self.d > 0:
            down = np.zeros((self.d, frame.shape[1], frame.shape[2]), np.uint8)
            frame = np.concatenate((frame, down), 0)
        frame = cv2.resize(frame, dsize=(512, 512), interpolation=cv2.INTER_CUBIC)
        return frame

    def get_raw_numpy_image(self, idx):
        frame = self.frames[idx]
        return frame

    def get_heatmap(self, idx):
        _, heatmaps = self.__getitem__(idx)
        return heatmaps

    def get_motor(self, idx):
        return torch.zeros(6).cuda() if torch.cuda.is_available() else torch.zeros(6)

    def check_rotation(self, path_video_file):
        # this returns meta-data of the video file in form of a dictionary
        meta_dict = ffmpeg.probe(path_video_file)
        # from the dictionary, meta_dict['streams'][0]['tags']['rotate'] is the key
        # we are looking for
        rotate_code = None
        rotate = meta_dict.get('streams', [dict(tags=dict())])[0].get('tags', dict()).get('rotate', 0)
        return round(int(rotate) / 90.0) * 90

    def load_video(self):
        # rotateCode = self.check_rotation(self.path)
        cap = cv2.VideoCapture(self.path)
        assert cap.isOpened(), self.path+":video load failed!"
        self.frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        self.frameWidth = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.frameHeight = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        max_height=2048#todo:
        need_resize=False
        if self.frameHeight>max_height:
            need_resize=True
            tmp=self.frameHeight
            self.frameHeight=max_height
            self.frameWidth=int(self.frameWidth*self.frameHeight/tmp)

        fc = 0
        ret = True

        frame_list = []

        while (fc < self.frameCount and ret):
            ret, temp = cap.read()
            if temp is None:
                break
            buff = np.empty((1, self.frameHeight, self.frameWidth, 3), np.dtype('uint8'))
            # print(fc,temp.shape)
            buff = temp
            if need_resize:
                buff=cv2.resize(buff,(self.frameWidth,self.frameHeight))
            buff = np.expand_dims(buff, 0)

            frame_list.append(buff)
            fc += 1
        frames = np.concatenate(frame_list, 0)
        #n, h, w = frames.shape

        cap.release()
        return frames

    '''
        def compute_opt_flow(self):
        print("Start computing optical flow")
        opt_flow_list = []
        for i in range(self.__len__()-1):
            with torch.no_grad():
                opt_flow = self.optical_flow(self.frames[i],self.frames[i+1]).cpu()
            opt_flow_list.append(opt_flow)
        self.opt_flow=opt_flow_list
        print("Finish computing optical flow:",self.opt_flow.__len__())
    '''


if __name__ == '__main__':
    path = './videos/garment_test.mov'
    video_loader = VideoLoader(path)
    print(video_loader.frames.shape)
    print(len(video_loader))
    import matplotlib.pyplot as plt

    video_loader.set_bbox(0, 180, 720, 1280 - 180)
    plt.imshow(video_loader[200])
    plt.show()