# -*- coding: utf-8 -*- # Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is # holder of all proprietary rights on this computer program. # You can only use this computer program if you have closed # a license agreement with MPG or you get the right to use the computer # program from someone who is authorized to grant you that right. # Any use of the computer program without a valid license is prohibited and # liable to prosecution. # # Copyright©2019 Max-Planck-Gesellschaft zur Förderung # der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute # for Intelligent Systems. All rights reserved. # # Contact: ps-license@tuebingen.mpg.de import os import cv2 import torch import random import numpy as np import torchvision.transforms as transforms from skimage.util.shape import view_as_windows def get_image(filename): image = cv2.imread(filename) return cv2.cvtColor(image, cv2.COLOR_RGB2BGR) def do_augmentation(scale_factor=0.3, color_factor=0.2): scale = random.uniform(1.2, 1.2+scale_factor) # scale = np.clip(np.random.randn(), 0.0, 1.0) * scale_factor + 1.2 rot = 0 # np.clip(np.random.randn(), -2.0, 2.0) * aug_config.rot_factor if random.random() <= aug_config.rot_aug_rate else 0 do_flip = False # aug_config.do_flip_aug and random.random() <= aug_config.flip_aug_rate c_up = 1.0 + color_factor c_low = 1.0 - color_factor color_scale = [random.uniform(c_low, c_up), random.uniform(c_low, c_up), random.uniform(c_low, c_up)] return scale, rot, do_flip, color_scale def trans_point2d(pt_2d, trans): src_pt = np.array([pt_2d[0], pt_2d[1], 1.]).T dst_pt = np.dot(trans, src_pt) return dst_pt[0:2] def rotate_2d(pt_2d, rot_rad): x = pt_2d[0] y = pt_2d[1] sn, cs = np.sin(rot_rad), np.cos(rot_rad) xx = x * cs - y * sn yy = x * sn + y * cs return np.array([xx, yy], dtype=np.float32) def gen_trans_from_patch_cv(c_x, c_y, src_width, src_height, dst_width, dst_height, scale, rot, inv=False): # augment size with scale src_w = src_width * scale src_h = src_height * scale src_center = np.zeros(2) src_center[0] = c_x src_center[1] = c_y # np.array([c_x, c_y], dtype=np.float32) # augment rotation rot_rad = np.pi * rot / 180 src_downdir = rotate_2d(np.array([0, src_h * 0.5], dtype=np.float32), rot_rad) src_rightdir = rotate_2d(np.array([src_w * 0.5, 0], dtype=np.float32), rot_rad) dst_w = dst_width dst_h = dst_height dst_center = np.array([dst_w * 0.5, dst_h * 0.5], dtype=np.float32) dst_downdir = np.array([0, dst_h * 0.5], dtype=np.float32) dst_rightdir = np.array([dst_w * 0.5, 0], dtype=np.float32) src = np.zeros((3, 2), dtype=np.float32) src[0, :] = src_center src[1, :] = src_center + src_downdir src[2, :] = src_center + src_rightdir dst = np.zeros((3, 2), dtype=np.float32) dst[0, :] = dst_center dst[1, :] = dst_center + dst_downdir dst[2, :] = dst_center + dst_rightdir if inv: trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) else: trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) return trans def generate_patch_image_cv(cvimg, c_x, c_y, bb_width, bb_height, patch_width, patch_height, do_flip, scale, rot): img = cvimg.copy() img_height, img_width, img_channels = img.shape if do_flip: img = img[:, ::-1, :] c_x = img_width - c_x - 1 trans = gen_trans_from_patch_cv(c_x, c_y, bb_width, bb_height, patch_width, patch_height, scale, rot, inv=False) img_patch = cv2.warpAffine(img, trans, (int(patch_width), int(patch_height)), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT) return img_patch, trans def crop_image(image, kp_2d, center_x, center_y, width, height, patch_width, patch_height, do_augment): # get augmentation params if do_augment: scale, rot, do_flip, color_scale = do_augmentation() else: scale, rot, do_flip, color_scale = 1.3, 0, False, [1.0, 1.0, 1.0] # generate image patch image, trans = generate_patch_image_cv( image, center_x, center_y, width, height, patch_width, patch_height, do_flip, scale, rot ) for n_jt in range(kp_2d.shape[0]): kp_2d[n_jt] = trans_point2d(kp_2d[n_jt], trans) return image, kp_2d, trans def transfrom_keypoints(kp_2d, center_x, center_y, width, height, patch_width, patch_height, do_augment): if do_augment: scale, rot, do_flip, color_scale = do_augmentation() else: scale, rot, do_flip, color_scale = 1.2, 0, False, [1.0, 1.0, 1.0] # generate transformation trans = gen_trans_from_patch_cv( center_x, center_y, width, height, patch_width, patch_height, scale, rot, inv=False, ) for n_jt in range(kp_2d.shape[0]): kp_2d[n_jt] = trans_point2d(kp_2d[n_jt], trans) return kp_2d, trans def get_image_crops(image_file, bboxes): image = cv2.cvtColor(cv2.imread(image_file), cv2.COLOR_BGR2RGB) crop_images = [] for bb in bboxes: c_y, c_x = (bb[0]+bb[2]) // 2, (bb[1]+bb[3]) // 2 h, w = bb[2]-bb[0], bb[3]-bb[1] w = h = np.where(w / h > 1, w, h) crop_image, _ = generate_patch_image_cv( cvimg=image.copy(), c_x=c_x, c_y=c_y, bb_width=w, bb_height=h, patch_width=224, patch_height=224, do_flip=False, scale=1.3, rot=0, ) crop_image = convert_cvimg_to_tensor(crop_image) crop_images.append(crop_image) batch_image = torch.cat([x.unsqueeze(0) for x in crop_images]) return batch_image def get_single_image_crop(image, bbox, scale=1.3): if isinstance(image, str): if os.path.isfile(image): image = cv2.cvtColor(cv2.imread(image), cv2.COLOR_BGR2RGB) else: print(image) raise BaseException(image, 'is not a valid file!') elif isinstance(image, torch.Tensor): image = image.numpy() elif not isinstance(image, np.ndarray): raise('Unknown type for object', type(image)) crop_image, _ = generate_patch_image_cv( cvimg=image.copy(), c_x=bbox[0], c_y=bbox[1], bb_width=bbox[2], bb_height=bbox[3], patch_width=224, patch_height=224, do_flip=False, scale=scale, rot=0, ) crop_image = convert_cvimg_to_tensor(crop_image) return crop_image def get_single_image_crop_demo(image, bbox, kp_2d, scale=1.2, crop_size=224): if isinstance(image, str): if os.path.isfile(image): image = cv2.cvtColor(cv2.imread(image), cv2.COLOR_BGR2RGB) else: print(image) raise BaseException(image, 'is not a valid file!') elif isinstance(image, torch.Tensor): image = image.numpy() elif not isinstance(image, np.ndarray): raise('Unknown type for object', type(image)) crop_image, trans = generate_patch_image_cv( cvimg=image.copy(), c_x=bbox[0], c_y=bbox[1], bb_width=bbox[2], bb_height=bbox[3], patch_width=crop_size, patch_height=crop_size, do_flip=False, scale=scale, rot=0, ) if kp_2d is not None: for n_jt in range(kp_2d.shape[0]): kp_2d[n_jt, :2] = trans_point2d(kp_2d[n_jt], trans) raw_image = crop_image.copy() crop_image = convert_cvimg_to_tensor(crop_image) return crop_image, raw_image, kp_2d def read_image(filename): image = cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2RGB) image = cv2.resize(image, (224,224)) return convert_cvimg_to_tensor(image) def convert_cvimg_to_tensor(image): transform = get_default_transform() image = transform(image) return image def torch_inv_normal(image): image = image * torch.tensor([0.229, 0.224, 0.225], device=image.device).reshape(1, 3, 1, 1) image = image + torch.tensor([0.485, 0.456, 0.406], device=image.device).reshape(1, 3, 1, 1) image = image.clamp(0., 1.) return image def torch2numpy(image): image = image.detach().cpu() inv_normalize = transforms.Normalize( mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.255], std=[1 / 0.229, 1 / 0.224, 1 / 0.255] ) image = inv_normalize(image) image = image.clamp(0., 1.) image = image.numpy() * 255. image = np.transpose(image, (1, 2, 0)) return image.astype(np.uint8) def torch_vid2numpy(video): video = video.detach().cpu().numpy() # video = np.transpose(video, (0, 2, 1, 3, 4)) # NCTHW->NTCHW # Denormalize mean = np.array([-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.255]) std = np.array([1 / 0.229, 1 / 0.224, 1 / 0.255]) mean = mean[np.newaxis, np.newaxis, ..., np.newaxis, np.newaxis] std = std[np.newaxis, np.newaxis, ..., np.newaxis, np.newaxis] video = (video - mean) / std # [:, :, i, :, :].sub_(mean[i]).div_(std[i]).clamp_(0., 1.).mul_(255.) video = video.clip(0.,1.) * 255 video = video.astype(np.uint8) return video def get_bbox_from_kp2d(kp_2d): # get bbox if len(kp_2d.shape) > 2: ul = np.array([kp_2d[:, :, 0].min(axis=1), kp_2d[:, :, 1].min(axis=1)]) # upper left lr = np.array([kp_2d[:, :, 0].max(axis=1), kp_2d[:, :, 1].max(axis=1)]) # lower right else: ul = np.array([kp_2d[:, 0].min(), kp_2d[:, 1].min()]) # upper left lr = np.array([kp_2d[:, 0].max(), kp_2d[:, 1].max()]) # lower right # ul[1] -= (lr[1] - ul[1]) * 0.10 # prevent cutting the head w = lr[0] - ul[0] h = lr[1] - ul[1] c_x, c_y = ul[0] + w / 2, ul[1] + h / 2 # to keep the aspect ratio w = h = np.where(w / h > 1, w, h) w = h = h * 1.1 bbox = np.array([c_x, c_y, w, h]) # shape = (4,N) return bbox def normalize_2d_kp(kp_2d, crop_size=224, inv=False): # Normalize keypoints between -1, 1 if not inv: ratio = 1.0 / crop_size kp_2d = 2.0 * kp_2d * ratio - 1.0 else: ratio = 1.0 / crop_size kp_2d = (kp_2d + 1.0)/(2*ratio) return kp_2d def get_default_transform(): normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) transform = transforms.Compose([ transforms.ToTensor(), normalize, ]) return transform def split_into_chunks(vid_names, seqlen, stride): video_start_end_indices = [] video_names, group = np.unique(vid_names, return_index=True) perm = np.argsort(group) video_names, group = video_names[perm], group[perm] indices = np.split(np.arange(0, vid_names.shape[0]), group[1:]) for idx in range(len(video_names)): indexes = indices[idx] if indexes.shape[0] < seqlen: continue chunks = view_as_windows(indexes, (seqlen,), step=stride) start_finish = chunks[:, (0, -1)].tolist() video_start_end_indices += start_finish return video_start_end_indices