File size: 5,929 Bytes
5db43ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import cv2
import numpy as np
# from util.garment_heatmap import HeatmapGenerator
import torch
import torchvision.transforms as transforms
import ffmpeg
#from OpticalFlow.optical_flow import OpticalFlow
class VideoLoader:
def __init__(self, path):
self.path = path
self.frames = self.load_video()
self.min_h = 0
self.min_w = 0
self.max_h = self.frameHeight
self.max_w = self.frameWidth
self.crop2square()
self.l = 0
self.r = 0
self.u = 0
self.d = 0
if self.frameHeight > self.frameWidth:
self.l = (self.frameHeight - self.frameWidth) // 2
self.r = self.l
# self.heatmap_gen = HeatmapGenerator()
self.post_transform = transforms.Resize((512, 512))
self.opt_flow = None
#self.optical_flow = OpticalFlow()
def crop2square(self):
if self.frameWidth > self.frameHeight:
offset = (self.frameWidth - self.frameHeight) // 2
self.min_w = offset
self.max_w = offset + self.frameHeight
def __getitem__(self, idx):
im = self.get_image(idx)
normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)
resize = transforms.Resize((384, 288))
all_transforms = transforms.Compose([normalize, resize])
# with torch.no_grad():
# heatmaps = self.heatmap_gen.model(all_transforms(im))
# heatmaps = self.post_transform(heatmaps)
return im # , heatmaps
def __len__(self):
return self.frames.shape[0]
def set_bbox(self, min_h, min_w, max_h, max_w):
self.min_h = min_h
self.min_w = min_w
self.max_h = max_h
self.max_w = max_w
def set_padding(self, l, r, u, d):
self.l = l
self.r = r
self.u = u
self.d = d
def get_image(self, idx):
frame = self.get_numpy_image(idx)
img = torch.from_numpy(frame) / 255.0
img = img.permute(2, 0, 1) # CHW, BGR
if torch.cuda.is_available():
img = img.cuda()
img = img.unsqueeze(0)
img = self.post_transform(img)
return img
def get_numpy_image(self, idx):
frame = self.frames[idx]
frame = frame[self.min_h:self.max_h, self.min_w:self.max_w, :]
if self.l > 0:
left = np.zeros((frame.shape[0], self.l, frame.shape[2]), np.uint8)
frame = np.concatenate((left, frame), 1)
if self.r > 0:
right = np.zeros((frame.shape[0], self.r, frame.shape[2]), np.uint8)
frame = np.concatenate((frame, right), 1)
if self.u > 0:
up = np.zeros((self.u, frame.shape[1], frame.shape[2]), np.uint8)
frame = np.concatenate((up, frame), 0)
if self.d > 0:
down = np.zeros((self.d, frame.shape[1], frame.shape[2]), np.uint8)
frame = np.concatenate((frame, down), 0)
frame = cv2.resize(frame, dsize=(512, 512), interpolation=cv2.INTER_CUBIC)
return frame
def get_raw_numpy_image(self, idx):
frame = self.frames[idx]
return frame
def get_heatmap(self, idx):
_, heatmaps = self.__getitem__(idx)
return heatmaps
def get_motor(self, idx):
return torch.zeros(6).cuda() if torch.cuda.is_available() else torch.zeros(6)
def check_rotation(self, path_video_file):
# this returns meta-data of the video file in form of a dictionary
meta_dict = ffmpeg.probe(path_video_file)
# from the dictionary, meta_dict['streams'][0]['tags']['rotate'] is the key
# we are looking for
rotate_code = None
rotate = meta_dict.get('streams', [dict(tags=dict())])[0].get('tags', dict()).get('rotate', 0)
return round(int(rotate) / 90.0) * 90
def load_video(self):
# rotateCode = self.check_rotation(self.path)
cap = cv2.VideoCapture(self.path)
assert cap.isOpened(), self.path+":video load failed!"
self.frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
self.frameWidth = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
self.frameHeight = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
max_height=2048#todo:
need_resize=False
if self.frameHeight>max_height:
need_resize=True
tmp=self.frameHeight
self.frameHeight=max_height
self.frameWidth=int(self.frameWidth*self.frameHeight/tmp)
fc = 0
ret = True
frame_list = []
while (fc < self.frameCount and ret):
ret, temp = cap.read()
if temp is None:
break
buff = np.empty((1, self.frameHeight, self.frameWidth, 3), np.dtype('uint8'))
# print(fc,temp.shape)
buff = temp
if need_resize:
buff=cv2.resize(buff,(self.frameWidth,self.frameHeight))
buff = np.expand_dims(buff, 0)
frame_list.append(buff)
fc += 1
frames = np.concatenate(frame_list, 0)
#n, h, w = frames.shape
cap.release()
return frames
'''
def compute_opt_flow(self):
print("Start computing optical flow")
opt_flow_list = []
for i in range(self.__len__()-1):
with torch.no_grad():
opt_flow = self.optical_flow(self.frames[i],self.frames[i+1]).cpu()
opt_flow_list.append(opt_flow)
self.opt_flow=opt_flow_list
print("Finish computing optical flow:",self.opt_flow.__len__())
'''
if __name__ == '__main__':
path = './videos/garment_test.mov'
video_loader = VideoLoader(path)
print(video_loader.frames.shape)
print(len(video_loader))
import matplotlib.pyplot as plt
video_loader.set_bbox(0, 180, 720, 1280 - 180)
plt.imshow(video_loader[200])
plt.show()
|