| | |
| |
|
| | |
| |
|
| | import random |
| | from math import pi |
| |
|
| | import cv2 |
| | import numpy as np |
| | import torch |
| | import torchvision |
| | import torchvision.transforms as transforms |
| | from PIL import Image |
| |
|
| | from lanet_utils import image_grid |
| |
|
| |
|
| | def filter_dict(dict, keywords): |
| | """ |
| | Returns only the keywords that are part of a dictionary |
| | |
| | Parameters |
| | ---------- |
| | dictionary : dict |
| | Dictionary for filtering |
| | keywords : list of str |
| | Keywords that will be filtered |
| | |
| | Returns |
| | ------- |
| | keywords : list of str |
| | List containing the keywords that are keys in dictionary |
| | """ |
| | return [key for key in keywords if key in dict] |
| |
|
| |
|
| | def resize_sample(sample, image_shape, image_interpolation=Image.ANTIALIAS): |
| | """ |
| | Resizes a sample, which contains an input image. |
| | |
| | Parameters |
| | ---------- |
| | sample : dict |
| | Dictionary with sample values (output from a dataset's __getitem__ method) |
| | shape : tuple (H,W) |
| | Output shape |
| | image_interpolation : int |
| | Interpolation mode |
| | |
| | Returns |
| | ------- |
| | sample : dict |
| | Resized sample |
| | """ |
| | |
| | image_transform = transforms.Resize(image_shape, interpolation=image_interpolation) |
| | sample["image"] = image_transform(sample["image"]) |
| | return sample |
| |
|
| |
|
| | def spatial_augment_sample(sample): |
| | """Apply spatial augmentation to an image (flipping and random affine transformation).""" |
| | augment_image = transforms.Compose( |
| | [ |
| | transforms.RandomVerticalFlip(p=0.5), |
| | transforms.RandomHorizontalFlip(p=0.5), |
| | transforms.RandomAffine(15, translate=(0.1, 0.1), scale=(0.9, 1.1)), |
| | ] |
| | ) |
| | sample["image"] = augment_image(sample["image"]) |
| |
|
| | return sample |
| |
|
| |
|
| | def unnormalize_image(tensor, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)): |
| | """Counterpart method of torchvision.transforms.Normalize.""" |
| | for t, m, s in zip(tensor, mean, std): |
| | t.div_(1 / s).sub_(-m) |
| | return tensor |
| |
|
| |
|
| | def sample_homography( |
| | shape, |
| | perspective=True, |
| | scaling=True, |
| | rotation=True, |
| | translation=True, |
| | n_scales=100, |
| | n_angles=100, |
| | scaling_amplitude=0.1, |
| | perspective_amplitude=0.4, |
| | patch_ratio=0.8, |
| | max_angle=pi / 4, |
| | ): |
| | """Sample a random homography that includes perspective, scale, translation and rotation operations.""" |
| |
|
| | width = float(shape[1]) |
| | hw_ratio = float(shape[0]) / float(shape[1]) |
| |
|
| | pts1 = np.stack([[-1.0, -1.0], [-1.0, 1.0], [1.0, -1.0], [1.0, 1.0]], axis=0) |
| | pts2 = pts1.copy() * patch_ratio |
| | pts2[:, 1] *= hw_ratio |
| |
|
| | if perspective: |
| |
|
| | perspective_amplitude_x = np.random.normal(0.0, perspective_amplitude / 2, (2)) |
| | perspective_amplitude_y = np.random.normal( |
| | 0.0, hw_ratio * perspective_amplitude / 2, (2) |
| | ) |
| |
|
| | perspective_amplitude_x = np.clip( |
| | perspective_amplitude_x, |
| | -perspective_amplitude / 2, |
| | perspective_amplitude / 2, |
| | ) |
| | perspective_amplitude_y = np.clip( |
| | perspective_amplitude_y, |
| | hw_ratio * -perspective_amplitude / 2, |
| | hw_ratio * perspective_amplitude / 2, |
| | ) |
| |
|
| | pts2[0, 0] -= perspective_amplitude_x[1] |
| | pts2[0, 1] -= perspective_amplitude_y[1] |
| |
|
| | pts2[1, 0] -= perspective_amplitude_x[0] |
| | pts2[1, 1] += perspective_amplitude_y[1] |
| |
|
| | pts2[2, 0] += perspective_amplitude_x[1] |
| | pts2[2, 1] -= perspective_amplitude_y[0] |
| |
|
| | pts2[3, 0] += perspective_amplitude_x[0] |
| | pts2[3, 1] += perspective_amplitude_y[0] |
| |
|
| | if scaling: |
| |
|
| | random_scales = np.random.normal(1, scaling_amplitude / 2, (n_scales)) |
| | random_scales = np.clip( |
| | random_scales, 1 - scaling_amplitude / 2, 1 + scaling_amplitude / 2 |
| | ) |
| |
|
| | scales = np.concatenate([[1.0], random_scales], 0) |
| | center = np.mean(pts2, axis=0, keepdims=True) |
| | scaled = ( |
| | np.expand_dims(pts2 - center, axis=0) |
| | * np.expand_dims(np.expand_dims(scales, 1), 1) |
| | + center |
| | ) |
| | valid = np.arange(n_scales) |
| | idx = valid[np.random.randint(valid.shape[0])] |
| | pts2 = scaled[idx] |
| |
|
| | if translation: |
| | t_min, t_max = np.min(pts2 - [-1.0, -hw_ratio], axis=0), np.min( |
| | [1.0, hw_ratio] - pts2, axis=0 |
| | ) |
| | pts2 += np.expand_dims( |
| | np.stack( |
| | [ |
| | np.random.uniform(-t_min[0], t_max[0]), |
| | np.random.uniform(-t_min[1], t_max[1]), |
| | ] |
| | ), |
| | axis=0, |
| | ) |
| |
|
| | if rotation: |
| | angles = np.linspace(-max_angle, max_angle, n_angles) |
| | angles = np.concatenate([[0.0], angles], axis=0) |
| |
|
| | center = np.mean(pts2, axis=0, keepdims=True) |
| | rot_mat = np.reshape( |
| | np.stack( |
| | [np.cos(angles), -np.sin(angles), np.sin(angles), np.cos(angles)], |
| | axis=1, |
| | ), |
| | [-1, 2, 2], |
| | ) |
| | rotated = ( |
| | np.matmul( |
| | np.tile(np.expand_dims(pts2 - center, axis=0), [n_angles + 1, 1, 1]), |
| | rot_mat, |
| | ) |
| | + center |
| | ) |
| |
|
| | valid = np.where( |
| | np.all( |
| | (rotated >= [-1.0, -hw_ratio]) & (rotated < [1.0, hw_ratio]), |
| | axis=(1, 2), |
| | ) |
| | )[0] |
| |
|
| | idx = valid[np.random.randint(valid.shape[0])] |
| | pts2 = rotated[idx] |
| |
|
| | pts2[:, 1] /= hw_ratio |
| |
|
| | def ax(p, q): |
| | return [p[0], p[1], 1, 0, 0, 0, -p[0] * q[0], -p[1] * q[0]] |
| |
|
| | def ay(p, q): |
| | return [0, 0, 0, p[0], p[1], 1, -p[0] * q[1], -p[1] * q[1]] |
| |
|
| | a_mat = np.stack([f(pts1[i], pts2[i]) for i in range(4) for f in (ax, ay)], axis=0) |
| | p_mat = np.transpose( |
| | np.stack([[pts2[i][j] for i in range(4) for j in range(2)]], axis=0) |
| | ) |
| |
|
| | homography = np.matmul(np.linalg.pinv(a_mat), p_mat).squeeze() |
| | homography = np.concatenate([homography, [1.0]]).reshape(3, 3) |
| | return homography |
| |
|
| |
|
| | def warp_homography(sources, homography): |
| | """Warp features given a homography |
| | |
| | Parameters |
| | ---------- |
| | sources: torch.tensor (1,H,W,2) |
| | Keypoint vector. |
| | homography: torch.Tensor (3,3) |
| | Homography. |
| | |
| | Returns |
| | ------- |
| | warped_sources: torch.tensor (1,H,W,2) |
| | Warped feature vector. |
| | """ |
| | _, H, W, _ = sources.shape |
| | warped_sources = sources.clone().squeeze() |
| | warped_sources = warped_sources.view(-1, 2) |
| | warped_sources = torch.addmm( |
| | homography[:, 2], warped_sources, homography[:, :2].t() |
| | ) |
| | warped_sources.mul_(1 / warped_sources[:, 2].unsqueeze(1)) |
| | warped_sources = warped_sources[:, :2].contiguous().view(1, H, W, 2) |
| | return warped_sources |
| |
|
| |
|
| | def add_noise(img, mode="gaussian", percent=0.02): |
| | """Add image noise |
| | |
| | Parameters |
| | ---------- |
| | image : np.array |
| | Input image |
| | mode: str |
| | Type of noise, from ['gaussian','salt','pepper','s&p'] |
| | percent: float |
| | Percentage image points to add noise to. |
| | Returns |
| | ------- |
| | image : np.array |
| | Image plus noise. |
| | """ |
| | original_dtype = img.dtype |
| | if mode == "gaussian": |
| | mean = 0 |
| | var = 0.1 |
| | sigma = var * 0.5 |
| |
|
| | if img.ndim == 2: |
| | h, w = img.shape |
| | gauss = np.random.normal(mean, sigma, (h, w)) |
| | else: |
| | h, w, c = img.shape |
| | gauss = np.random.normal(mean, sigma, (h, w, c)) |
| |
|
| | if img.dtype not in [np.float32, np.float64]: |
| | gauss = gauss * np.iinfo(img.dtype).max |
| | img = np.clip(img.astype(np.float) + gauss, 0, np.iinfo(img.dtype).max) |
| | else: |
| | img = np.clip(img.astype(np.float) + gauss, 0, 1) |
| |
|
| | elif mode == "salt": |
| | print(img.dtype) |
| | s_vs_p = 1 |
| | num_salt = np.ceil(percent * img.size * s_vs_p) |
| | coords = tuple([np.random.randint(0, i - 1, int(num_salt)) for i in img.shape]) |
| |
|
| | if img.dtype in [np.float32, np.float64]: |
| | img[coords] = 1 |
| | else: |
| | img[coords] = np.iinfo(img.dtype).max |
| | print(img.dtype) |
| | elif mode == "pepper": |
| | s_vs_p = 0 |
| | num_pepper = np.ceil(percent * img.size * (1.0 - s_vs_p)) |
| | coords = tuple( |
| | [np.random.randint(0, i - 1, int(num_pepper)) for i in img.shape] |
| | ) |
| | img[coords] = 0 |
| |
|
| | elif mode == "s&p": |
| | s_vs_p = 0.5 |
| |
|
| | |
| | num_salt = np.ceil(percent * img.size * s_vs_p) |
| | coords = tuple([np.random.randint(0, i - 1, int(num_salt)) for i in img.shape]) |
| | if img.dtype in [np.float32, np.float64]: |
| | img[coords] = 1 |
| | else: |
| | img[coords] = np.iinfo(img.dtype).max |
| |
|
| | |
| | num_pepper = np.ceil(percent * img.size * (1.0 - s_vs_p)) |
| | coords = tuple( |
| | [np.random.randint(0, i - 1, int(num_pepper)) for i in img.shape] |
| | ) |
| | img[coords] = 0 |
| | else: |
| | raise ValueError("not support mode for {}".format(mode)) |
| |
|
| | noisy = img.astype(original_dtype) |
| | return noisy |
| |
|
| |
|
| | def non_spatial_augmentation( |
| | img_warp_ori, jitter_paramters, color_order=[0, 1, 2], to_gray=False |
| | ): |
| | """Apply non-spatial augmentation to an image (jittering, color swap, convert to gray scale, Gaussian blur).""" |
| |
|
| | brightness, contrast, saturation, hue = jitter_paramters |
| | color_augmentation = transforms.ColorJitter(brightness, contrast, saturation, hue) |
| | """ |
| | augment_image = color_augmentation.get_params(brightness=[max(0, 1 - brightness), 1 + brightness], |
| | contrast=[max(0, 1 - contrast), 1 + contrast], |
| | saturation=[max(0, 1 - saturation), 1 + saturation], |
| | hue=[-hue, hue]) |
| | """ |
| |
|
| | B = img_warp_ori.shape[0] |
| | img_warp = [] |
| | kernel_sizes = [0, 1, 3, 5] |
| | for b in range(B): |
| | img_warp_sub = img_warp_ori[b].cpu() |
| | img_warp_sub = torchvision.transforms.functional.to_pil_image(img_warp_sub) |
| |
|
| | img_warp_sub_np = np.array(img_warp_sub) |
| | img_warp_sub_np = img_warp_sub_np[:, :, color_order] |
| |
|
| | if np.random.rand() > 0.5: |
| | img_warp_sub_np = add_noise(img_warp_sub_np) |
| |
|
| | rand_index = np.random.randint(4) |
| | kernel_size = kernel_sizes[rand_index] |
| | if kernel_size > 0: |
| | img_warp_sub_np = cv2.GaussianBlur( |
| | img_warp_sub_np, (kernel_size, kernel_size), sigmaX=0 |
| | ) |
| |
|
| | if to_gray: |
| | img_warp_sub_np = cv2.cvtColor(img_warp_sub_np, cv2.COLOR_RGB2GRAY) |
| | img_warp_sub_np = cv2.cvtColor(img_warp_sub_np, cv2.COLOR_GRAY2RGB) |
| |
|
| | img_warp_sub = Image.fromarray(img_warp_sub_np) |
| | img_warp_sub = color_augmentation(img_warp_sub) |
| |
|
| | img_warp_sub = torchvision.transforms.functional.to_tensor(img_warp_sub).to( |
| | img_warp_ori.device |
| | ) |
| |
|
| | img_warp.append(img_warp_sub) |
| |
|
| | img_warp = torch.stack(img_warp, dim=0) |
| | return img_warp |
| |
|
| |
|
| | def ha_augment_sample( |
| | data, |
| | jitter_paramters=[0.5, 0.5, 0.2, 0.05], |
| | patch_ratio=0.7, |
| | scaling_amplitude=0.2, |
| | max_angle=pi / 4, |
| | ): |
| | """Apply Homography Adaptation image augmentation.""" |
| | input_img = data["image"].unsqueeze(0) |
| | _, _, H, W = input_img.shape |
| | device = input_img.device |
| |
|
| | homography = ( |
| | torch.from_numpy( |
| | sample_homography( |
| | [H, W], |
| | patch_ratio=patch_ratio, |
| | scaling_amplitude=scaling_amplitude, |
| | max_angle=max_angle, |
| | ) |
| | ) |
| | .float() |
| | .to(device) |
| | ) |
| | homography_inv = torch.inverse(homography) |
| |
|
| | source = ( |
| | image_grid( |
| | 1, H, W, dtype=input_img.dtype, device=device, ones=False, normalized=True |
| | ) |
| | .clone() |
| | .permute(0, 2, 3, 1) |
| | ) |
| |
|
| | target_warped = warp_homography(source, homography) |
| | img_warp = torch.nn.functional.grid_sample(input_img, target_warped) |
| |
|
| | color_order = [0, 1, 2] |
| | if np.random.rand() > 0.5: |
| | random.shuffle(color_order) |
| |
|
| | to_gray = False |
| | if np.random.rand() > 0.5: |
| | to_gray = True |
| |
|
| | input_img = non_spatial_augmentation( |
| | input_img, |
| | jitter_paramters=jitter_paramters, |
| | color_order=color_order, |
| | to_gray=to_gray, |
| | ) |
| | img_warp = non_spatial_augmentation( |
| | img_warp, |
| | jitter_paramters=jitter_paramters, |
| | color_order=color_order, |
| | to_gray=to_gray, |
| | ) |
| |
|
| | data["image"] = input_img.squeeze() |
| | data["image_aug"] = img_warp.squeeze() |
| | data["homography"] = homography |
| | data["homography_inv"] = homography_inv |
| | return data |
| |
|