Spaces:
Running
Running
| from __future__ import division | |
| import torch | |
| import numpy as np | |
| from PIL import Image | |
| import torchvision.transforms.functional as F | |
| import random | |
| import cv2 | |
| class Compose(object): | |
| def __init__(self, transforms): | |
| self.transforms = transforms | |
| def __call__(self, sample): | |
| for t in self.transforms: | |
| sample = t(sample) | |
| return sample | |
| class ToTensor(object): | |
| """Convert numpy array to torch tensor""" | |
| def __init__(self, no_normalize=False): | |
| self.no_normalize = no_normalize | |
| def __call__(self, sample): | |
| left = np.transpose(sample['left'], (2, 0, 1)) # [3, H, W] | |
| if self.no_normalize: | |
| sample['left'] = torch.from_numpy(left) | |
| else: | |
| sample['left'] = torch.from_numpy(left) / 255. | |
| right = np.transpose(sample['right'], (2, 0, 1)) | |
| if self.no_normalize: | |
| sample['right'] = torch.from_numpy(right) | |
| else: | |
| sample['right'] = torch.from_numpy(right) / 255. | |
| # disp = np.expand_dims(sample['disp'], axis=0) # [1, H, W] | |
| if 'disp' in sample.keys(): | |
| disp = sample['disp'] # [H, W] | |
| sample['disp'] = torch.from_numpy(disp) | |
| return sample | |
| class Normalize(object): | |
| """Normalize image, with type tensor""" | |
| def __init__(self, mean, std): | |
| self.mean = mean | |
| self.std = std | |
| def __call__(self, sample): | |
| norm_keys = ['left', 'right'] | |
| for key in norm_keys: | |
| # Images have converted to tensor, with shape [C, H, W] | |
| for t, m, s in zip(sample[key], self.mean, self.std): | |
| t.sub_(m).div_(s) | |
| return sample | |
| class RandomCrop(object): | |
| def __init__(self, img_height, img_width): | |
| self.img_height = img_height | |
| self.img_width = img_width | |
| def __call__(self, sample): | |
| ori_height, ori_width = sample['left'].shape[:2] | |
| # pad zero when crop size is larger than original image size | |
| if self.img_height > ori_height or self.img_width > ori_width: | |
| # can be used for only pad one side | |
| top_pad = max(self.img_height - ori_height, 0) | |
| right_pad = max(self.img_width - ori_width, 0) | |
| # try edge padding | |
| sample['left'] = np.lib.pad(sample['left'], | |
| ((top_pad, 0), (0, right_pad), (0, 0)), | |
| mode='edge') | |
| sample['right'] = np.lib.pad(sample['right'], | |
| ((top_pad, 0), (0, right_pad), (0, 0)), | |
| mode='edge') | |
| if 'disp' in sample.keys(): | |
| sample['disp'] = np.lib.pad(sample['disp'], | |
| ((top_pad, 0), (0, right_pad)), | |
| mode='constant', | |
| constant_values=0) | |
| # update image resolution | |
| ori_height, ori_width = sample['left'].shape[:2] | |
| assert self.img_height <= ori_height and self.img_width <= ori_width | |
| # Training: random crop | |
| self.offset_x = np.random.randint(ori_width - self.img_width + 1) | |
| start_height = 0 | |
| assert ori_height - start_height >= self.img_height | |
| self.offset_y = np.random.randint(start_height, ori_height - self.img_height + 1) | |
| sample['left'] = self.crop_img(sample['left']) | |
| sample['right'] = self.crop_img(sample['right']) | |
| if 'disp' in sample.keys(): | |
| sample['disp'] = self.crop_img(sample['disp']) | |
| return sample | |
| def crop_img(self, img): | |
| return img[self.offset_y:self.offset_y + self.img_height, | |
| self.offset_x:self.offset_x + self.img_width] | |
| class RandomVerticalFlip(object): | |
| """Randomly vertically filps""" | |
| def __call__(self, sample): | |
| if np.random.random() < 0.5: | |
| sample['left'] = np.copy(np.flipud(sample['left'])) | |
| sample['right'] = np.copy(np.flipud(sample['right'])) | |
| sample['disp'] = np.copy(np.flipud(sample['disp'])) | |
| return sample | |
| class ToPILImage(object): | |
| def __call__(self, sample): | |
| sample['left'] = Image.fromarray(sample['left'].astype('uint8')) | |
| sample['right'] = Image.fromarray(sample['right'].astype('uint8')) | |
| return sample | |
| class ToNumpyArray(object): | |
| def __call__(self, sample): | |
| sample['left'] = np.array(sample['left']).astype(np.float32) | |
| sample['right'] = np.array(sample['right']).astype(np.float32) | |
| return sample | |
| # Random coloring | |
| class RandomContrast(object): | |
| """Random contrast""" | |
| def __init__(self, | |
| asymmetric_color_aug=True, | |
| ): | |
| self.asymmetric_color_aug = asymmetric_color_aug | |
| def __call__(self, sample): | |
| if np.random.random() < 0.5: | |
| contrast_factor = np.random.uniform(0.8, 1.2) | |
| sample['left'] = F.adjust_contrast(sample['left'], contrast_factor) | |
| if self.asymmetric_color_aug and np.random.random() < 0.5: | |
| contrast_factor = np.random.uniform(0.8, 1.2) | |
| sample['right'] = F.adjust_contrast(sample['right'], contrast_factor) | |
| return sample | |
| class RandomGamma(object): | |
| def __init__(self, | |
| asymmetric_color_aug=True, | |
| ): | |
| self.asymmetric_color_aug = asymmetric_color_aug | |
| def __call__(self, sample): | |
| if np.random.random() < 0.5: | |
| gamma = np.random.uniform(0.7, 1.5) # adopted from FlowNet | |
| sample['left'] = F.adjust_gamma(sample['left'], gamma) | |
| if self.asymmetric_color_aug and np.random.random() < 0.5: | |
| gamma = np.random.uniform(0.7, 1.5) # adopted from FlowNet | |
| sample['right'] = F.adjust_gamma(sample['right'], gamma) | |
| return sample | |
| class RandomBrightness(object): | |
| def __init__(self, | |
| asymmetric_color_aug=True, | |
| ): | |
| self.asymmetric_color_aug = asymmetric_color_aug | |
| def __call__(self, sample): | |
| if np.random.random() < 0.5: | |
| brightness = np.random.uniform(0.5, 2.0) | |
| sample['left'] = F.adjust_brightness(sample['left'], brightness) | |
| if self.asymmetric_color_aug and np.random.random() < 0.5: | |
| brightness = np.random.uniform(0.5, 2.0) | |
| sample['right'] = F.adjust_brightness(sample['right'], brightness) | |
| return sample | |
| class RandomHue(object): | |
| def __init__(self, | |
| asymmetric_color_aug=True, | |
| ): | |
| self.asymmetric_color_aug = asymmetric_color_aug | |
| def __call__(self, sample): | |
| if np.random.random() < 0.5: | |
| hue = np.random.uniform(-0.1, 0.1) | |
| sample['left'] = F.adjust_hue(sample['left'], hue) | |
| if self.asymmetric_color_aug and np.random.random() < 0.5: | |
| hue = np.random.uniform(-0.1, 0.1) | |
| sample['right'] = F.adjust_hue(sample['right'], hue) | |
| return sample | |
| class RandomSaturation(object): | |
| def __init__(self, | |
| asymmetric_color_aug=True, | |
| ): | |
| self.asymmetric_color_aug = asymmetric_color_aug | |
| def __call__(self, sample): | |
| if np.random.random() < 0.5: | |
| saturation = np.random.uniform(0.8, 1.2) | |
| sample['left'] = F.adjust_saturation(sample['left'], saturation) | |
| if self.asymmetric_color_aug and np.random.random() < 0.5: | |
| saturation = np.random.uniform(0.8, 1.2) | |
| sample['right'] = F.adjust_saturation(sample['right'], saturation) | |
| return sample | |
| class RandomColor(object): | |
| def __init__(self, | |
| asymmetric_color_aug=True, | |
| ): | |
| self.asymmetric_color_aug = asymmetric_color_aug | |
| def __call__(self, sample): | |
| transforms = [RandomContrast(asymmetric_color_aug=self.asymmetric_color_aug), | |
| RandomGamma(asymmetric_color_aug=self.asymmetric_color_aug), | |
| RandomBrightness(asymmetric_color_aug=self.asymmetric_color_aug), | |
| RandomHue(asymmetric_color_aug=self.asymmetric_color_aug), | |
| RandomSaturation(asymmetric_color_aug=self.asymmetric_color_aug)] | |
| sample = ToPILImage()(sample) | |
| if np.random.random() < 0.5: | |
| # A single transform | |
| t = random.choice(transforms) | |
| sample = t(sample) | |
| else: | |
| # Combination of transforms | |
| # Random order | |
| random.shuffle(transforms) | |
| for t in transforms: | |
| sample = t(sample) | |
| sample = ToNumpyArray()(sample) | |
| return sample | |
| class RandomScale(object): | |
| def __init__(self, | |
| min_scale=-0.4, | |
| max_scale=0.4, | |
| crop_width=512, | |
| nearest_interp=False, # for sparse gt | |
| ): | |
| self.min_scale = min_scale | |
| self.max_scale = max_scale | |
| self.crop_width = crop_width | |
| self.nearest_interp = nearest_interp | |
| def __call__(self, sample): | |
| if np.random.rand() < 0.5: | |
| h, w = sample['disp'].shape | |
| scale_x = 2 ** np.random.uniform(self.min_scale, self.max_scale) | |
| scale_x = np.clip(scale_x, self.crop_width / float(w), None) | |
| # only random scale x axis | |
| sample['left'] = cv2.resize(sample['left'], None, fx=scale_x, fy=1., interpolation=cv2.INTER_LINEAR) | |
| sample['right'] = cv2.resize(sample['right'], None, fx=scale_x, fy=1., interpolation=cv2.INTER_LINEAR) | |
| sample['disp'] = cv2.resize( | |
| sample['disp'], None, fx=scale_x, fy=1., | |
| interpolation=cv2.INTER_LINEAR if not self.nearest_interp else cv2.INTER_NEAREST | |
| ) * scale_x | |
| if 'pseudo_disp' in sample and sample['pseudo_disp'] is not None: | |
| sample['pseudo_disp'] = cv2.resize(sample['pseudo_disp'], None, fx=scale_x, fy=1., | |
| interpolation=cv2.INTER_LINEAR) * scale_x | |
| return sample | |
| class Resize(object): | |
| def __init__(self, | |
| scale_x=1, | |
| scale_y=1, | |
| nearest_interp=True, # for sparse gt | |
| ): | |
| """ | |
| Resize low-resolution data to high-res for mixed dataset training | |
| """ | |
| self.scale_x = scale_x | |
| self.scale_y = scale_y | |
| self.nearest_interp = nearest_interp | |
| def __call__(self, sample): | |
| scale_x = self.scale_x | |
| scale_y = self.scale_y | |
| sample['left'] = cv2.resize(sample['left'], None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR) | |
| sample['right'] = cv2.resize(sample['right'], None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR) | |
| sample['disp'] = cv2.resize( | |
| sample['disp'], None, fx=scale_x, fy=scale_y, | |
| interpolation=cv2.INTER_LINEAR if not self.nearest_interp else cv2.INTER_NEAREST | |
| ) * scale_x | |
| return sample | |
| class RandomGrayscale(object): | |
| def __init__(self, p=0.2): | |
| self.p = p | |
| def __call__(self, sample): | |
| if np.random.random() < self.p: | |
| sample = ToPILImage()(sample) | |
| # only supported in higher version pytorch | |
| # default output channels is 1 | |
| sample['left'] = F.rgb_to_grayscale(sample['left'], num_output_channels=3) | |
| sample['right'] = F.rgb_to_grayscale(sample['right'], num_output_channels=3) | |
| sample = ToNumpyArray()(sample) | |
| return sample | |
| class RandomRotateShiftRight(object): | |
| def __init__(self, p=0.5): | |
| self.p = p | |
| def __call__(self, sample): | |
| if np.random.random() < self.p: | |
| angle, pixel = 0.1, 2 | |
| px = np.random.uniform(-pixel, pixel) | |
| ag = np.random.uniform(-angle, angle) | |
| right_img = sample['right'] | |
| image_center = ( | |
| np.random.uniform(0, right_img.shape[0]), | |
| np.random.uniform(0, right_img.shape[1]) | |
| ) | |
| rot_mat = cv2.getRotationMatrix2D(image_center, ag, 1.0) | |
| right_img = cv2.warpAffine( | |
| right_img, rot_mat, right_img.shape[1::-1], flags=cv2.INTER_LINEAR | |
| ) | |
| trans_mat = np.float32([[1, 0, 0], [0, 1, px]]) | |
| right_img = cv2.warpAffine( | |
| right_img, trans_mat, right_img.shape[1::-1], flags=cv2.INTER_LINEAR | |
| ) | |
| sample['right'] = right_img | |
| return sample | |
| class RandomOcclusion(object): | |
| def __init__(self, p=0.5, | |
| occlusion_mask_zero=False): | |
| self.p = p | |
| self.occlusion_mask_zero = occlusion_mask_zero | |
| def __call__(self, sample): | |
| bounds = [50, 100] | |
| if np.random.random() < self.p: | |
| img2 = sample['right'] | |
| ht, wd = img2.shape[:2] | |
| if self.occlusion_mask_zero: | |
| mean_color = 0 | |
| else: | |
| mean_color = np.mean(img2.reshape(-1, 3), axis=0) | |
| x0 = np.random.randint(0, wd) | |
| y0 = np.random.randint(0, ht) | |
| dx = np.random.randint(bounds[0], bounds[1]) | |
| dy = np.random.randint(bounds[0], bounds[1]) | |
| img2[y0:y0 + dy, x0:x0 + dx, :] = mean_color | |
| sample['right'] = img2 | |
| return sample | |