snap2scene / utils /data_transforms.py
adirathor07's picture
initial commit
757ed1c
# -*- coding: utf-8 -*-
#
# Developed by Haozhe Xie <cshzxie@gmail.com>
# References:
# - https://github.com/xiumingzhang/GenRe-ShapeHD
import cv2
# import matplotlib.pyplot as plt
# import matplotlib.patches as patches
import numpy as np
import os
import random
import torch
class Compose(object):
""" Composes several transforms together.
For example:
>>> transforms.Compose([
>>> transforms.RandomBackground(),
>>> transforms.CenterCrop(127, 127, 3),
>>> ])
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, rendering_images, bounding_box=None):
for t in self.transforms:
if t.__class__.__name__ == 'RandomCrop' or t.__class__.__name__ == 'CenterCrop':
rendering_images = t(rendering_images, bounding_box)
else:
rendering_images = t(rendering_images)
return rendering_images
class ToTensor(object):
"""
Convert a PIL Image or numpy.ndarray to tensor.
Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0].
"""
def __call__(self, rendering_images):
assert (isinstance(rendering_images, np.ndarray))
array = np.transpose(rendering_images, (0, 3, 1, 2))
# handle numpy array
tensor = torch.from_numpy(array)
# put it from HWC to CHW format
return tensor.float()
class Normalize(object):
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, rendering_images):
assert (isinstance(rendering_images, np.ndarray))
rendering_images -= self.mean
rendering_images /= self.std
return rendering_images
class RandomPermuteRGB(object):
def __call__(self, rendering_images):
assert (isinstance(rendering_images, np.ndarray))
random_permutation = np.random.permutation(3)
for img_idx, img in enumerate(rendering_images):
rendering_images[img_idx] = img[..., random_permutation]
return rendering_images
class CenterCrop(object):
def __init__(self, img_size, crop_size):
"""Set the height and weight before and after cropping"""
self.img_size_h = img_size[0]
self.img_size_w = img_size[1]
self.crop_size_h = crop_size[0]
self.crop_size_w = crop_size[1]
def __call__(self, rendering_images, bounding_box=None):
if len(rendering_images) == 0:
return rendering_images
crop_size_c = rendering_images[0].shape[2]
processed_images = np.empty(shape=(0, self.img_size_h, self.img_size_w, crop_size_c))
for img_idx, img in enumerate(rendering_images):
img_height, img_width, _ = img.shape
if bounding_box is not None:
bounding_box = [
bounding_box[0] * img_width,
bounding_box[1] * img_height,
bounding_box[2] * img_width,
bounding_box[3] * img_height
] # yapf: disable
# Calculate the size of bounding boxes
bbox_width = bounding_box[2] - bounding_box[0]
bbox_height = bounding_box[3] - bounding_box[1]
bbox_x_mid = (bounding_box[2] + bounding_box[0]) * .5
bbox_y_mid = (bounding_box[3] + bounding_box[1]) * .5
# Make the crop area as a square
square_object_size = max(bbox_width, bbox_height)
x_left = int(bbox_x_mid - square_object_size * .5)
x_right = int(bbox_x_mid + square_object_size * .5)
y_top = int(bbox_y_mid - square_object_size * .5)
y_bottom = int(bbox_y_mid + square_object_size * .5)
# If the crop position is out of the image, fix it with padding
pad_x_left = 0
if x_left < 0:
pad_x_left = -x_left
x_left = 0
pad_x_right = 0
if x_right >= img_width:
pad_x_right = x_right - img_width + 1
x_right = img_width - 1
pad_y_top = 0
if y_top < 0:
pad_y_top = -y_top
y_top = 0
pad_y_bottom = 0
if y_bottom >= img_height:
pad_y_bottom = y_bottom - img_height + 1
y_bottom = img_height - 1
# Padding the image and resize the image
processed_image = np.pad(img[y_top:y_bottom + 1, x_left:x_right + 1],
((pad_y_top, pad_y_bottom), (pad_x_left, pad_x_right), (0, 0)),
mode='edge')
processed_image = cv2.resize(processed_image, (self.img_size_w, self.img_size_h))
else:
if img_height > self.crop_size_h and img_width > self.crop_size_w:
x_left = int(img_width - self.crop_size_w) // 2
x_right = int(x_left + self.crop_size_w)
y_top = int(img_height - self.crop_size_h) // 2
y_bottom = int(y_top + self.crop_size_h)
else:
x_left = 0
x_right = img_width
y_top = 0
y_bottom = img_height
processed_image = cv2.resize(img[y_top:y_bottom, x_left:x_right], (self.img_size_w, self.img_size_h))
processed_images = np.append(processed_images, [processed_image], axis=0)
# Debug
# fig = plt.figure()
# ax1 = fig.add_subplot(1, 2, 1)
# ax1.imshow(img)
# if not bounding_box is None:
# rect = patches.Rectangle((bounding_box[0], bounding_box[1]),
# bbox_width,
# bbox_height,
# linewidth=1,
# edgecolor='r',
# facecolor='none')
# ax1.add_patch(rect)
# ax2 = fig.add_subplot(1, 2, 2)
# ax2.imshow(processed_image)
# plt.show()
return processed_images
class RandomCrop(object):
def __init__(self, img_size, crop_size):
"""Set the height and weight before and after cropping"""
self.img_size_h = img_size[0]
self.img_size_w = img_size[1]
self.crop_size_h = crop_size[0]
self.crop_size_w = crop_size[1]
def __call__(self, rendering_images, bounding_box=None):
if len(rendering_images) == 0:
return rendering_images
crop_size_c = rendering_images[0].shape[2]
processed_images = np.empty(shape=(0, self.img_size_h, self.img_size_w, crop_size_c))
for img_idx, img in enumerate(rendering_images):
img_height, img_width, _ = img.shape
if bounding_box is not None:
bounding_box = [
bounding_box[0] * img_width,
bounding_box[1] * img_height,
bounding_box[2] * img_width,
bounding_box[3] * img_height
] # yapf: disable
# Calculate the size of bounding boxes
bbox_width = bounding_box[2] - bounding_box[0]
bbox_height = bounding_box[3] - bounding_box[1]
bbox_x_mid = (bounding_box[2] + bounding_box[0]) * .5
bbox_y_mid = (bounding_box[3] + bounding_box[1]) * .5
# Make the crop area as a square
square_object_size = max(bbox_width, bbox_height)
square_object_size = square_object_size * random.uniform(0.8, 1.2)
x_left = int(bbox_x_mid - square_object_size * random.uniform(.4, .6))
x_right = int(bbox_x_mid + square_object_size * random.uniform(.4, .6))
y_top = int(bbox_y_mid - square_object_size * random.uniform(.4, .6))
y_bottom = int(bbox_y_mid + square_object_size * random.uniform(.4, .6))
# If the crop position is out of the image, fix it with padding
pad_x_left = 0
if x_left < 0:
pad_x_left = -x_left
x_left = 0
pad_x_right = 0
if x_right >= img_width:
pad_x_right = x_right - img_width + 1
x_right = img_width - 1
pad_y_top = 0
if y_top < 0:
pad_y_top = -y_top
y_top = 0
pad_y_bottom = 0
if y_bottom >= img_height:
pad_y_bottom = y_bottom - img_height + 1
y_bottom = img_height - 1
# Padding the image and resize the image
processed_image = np.pad(img[y_top:y_bottom + 1, x_left:x_right + 1],
((pad_y_top, pad_y_bottom), (pad_x_left, pad_x_right), (0, 0)),
mode='edge')
processed_image = cv2.resize(processed_image, (self.img_size_w, self.img_size_h))
else:
if img_height > self.crop_size_h and img_width > self.crop_size_w:
x_left = int(img_width - self.crop_size_w) // 2
x_right = int(x_left + self.crop_size_w)
y_top = int(img_height - self.crop_size_h) // 2
y_bottom = int(y_top + self.crop_size_h)
else:
x_left = 0
x_right = img_width
y_top = 0
y_bottom = img_height
processed_image = cv2.resize(img[y_top:y_bottom, x_left:x_right], (self.img_size_w, self.img_size_h))
processed_images = np.append(processed_images, [processed_image], axis=0)
return processed_images
class RandomFlip(object):
def __call__(self, rendering_images):
assert (isinstance(rendering_images, np.ndarray))
for img_idx, img in enumerate(rendering_images):
if random.randint(0, 1):
rendering_images[img_idx] = np.fliplr(img)
return rendering_images
class ColorJitter(object):
def __init__(self, brightness, contrast, saturation):
self.brightness = brightness
self.contrast = contrast
self.saturation = saturation
def __call__(self, rendering_images):
if len(rendering_images) == 0:
return rendering_images
# Allocate new space for storing processed images
img_height, img_width, img_channels = rendering_images[0].shape
processed_images = np.empty(shape=(0, img_height, img_width, img_channels))
# Randomize the value of changing brightness, contrast, and saturation
brightness = 1 + np.random.uniform(low=-self.brightness, high=self.brightness)
contrast = 1 + np.random.uniform(low=-self.contrast, high=self.contrast)
saturation = 1 + np.random.uniform(low=-self.saturation, high=self.saturation)
# Randomize the order of changing brightness, contrast, and saturation
attr_names = ['brightness', 'contrast', 'saturation']
attr_values = [brightness, contrast, saturation] # The value of changing attrs
attr_indexes = np.array(range(len(attr_names))) # The order of changing attrs
np.random.shuffle(attr_indexes)
for img_idx, img in enumerate(rendering_images):
processed_image = img
for idx in attr_indexes:
processed_image = self._adjust_image_attr(processed_image, attr_names[idx], attr_values[idx])
processed_images = np.append(processed_images, [processed_image], axis=0)
# print('ColorJitter', np.mean(ori_img), np.mean(processed_image))
# fig = plt.figure(figsize=(8, 4))
# ax1 = fig.add_subplot(1, 2, 1)
# ax1.imshow(ori_img)
# ax2 = fig.add_subplot(1, 2, 2)
# ax2.imshow(processed_image)
# plt.show()
return processed_images
def _adjust_image_attr(self, img, attr_name, attr_value):
"""
Adjust or randomize the specified attribute of the image
Args:
img: Image in BGR format
Numpy array of shape (h, w, 3)
attr_name: Image attribute to adjust or randomize
'brightness', 'saturation', or 'contrast'
attr_value: the alpha for blending is randomly drawn from [1 - d, 1 + d]
Returns:
Output image in BGR format
Numpy array of the same shape as input
"""
gs = self._bgr_to_gray(img)
if attr_name == 'contrast':
img = self._alpha_blend(img, np.mean(gs[:, :, 0]), attr_value)
elif attr_name == 'saturation':
img = self._alpha_blend(img, gs, attr_value)
elif attr_name == 'brightness':
img = self._alpha_blend(img, 0, attr_value)
else:
raise NotImplementedError(attr_name)
return img
def _bgr_to_gray(self, bgr):
"""
Convert a RGB image to a grayscale image
Differences from cv2.cvtColor():
1. Input image can be float
2. Output image has three repeated channels, other than a single channel
Args:
bgr: Image in BGR format
Numpy array of shape (h, w, 3)
Returns:
gs: Grayscale image
Numpy array of the same shape as input; the three channels are the same
"""
ch = 0.114 * bgr[:, :, 0] + 0.587 * bgr[:, :, 1] + 0.299 * bgr[:, :, 2]
gs = np.dstack((ch, ch, ch))
return gs
def _alpha_blend(self, im1, im2, alpha):
"""
Alpha blending of two images or one image and a scalar
Args:
im1, im2: Image or scalar
Numpy array and a scalar or two numpy arrays of the same shape
alpha: Weight of im1
Float ranging usually from 0 to 1
Returns:
im_blend: Blended image -- alpha * im1 + (1 - alpha) * im2
Numpy array of the same shape as input image
"""
im_blend = alpha * im1 + (1 - alpha) * im2
return im_blend
class RandomNoise(object):
def __init__(self,
noise_std,
eigvals=(0.2175, 0.0188, 0.0045),
eigvecs=((-0.5675, 0.7192, 0.4009), (-0.5808, -0.0045, -0.8140), (-0.5836, -0.6948, 0.4203))):
self.noise_std = noise_std
self.eigvals = np.array(eigvals)
self.eigvecs = np.array(eigvecs)
def __call__(self, rendering_images):
alpha = np.random.normal(loc=0, scale=self.noise_std, size=3)
noise_rgb = \
np.sum(
np.multiply(
np.multiply(
self.eigvecs,
np.tile(alpha, (3, 1))
),
np.tile(self.eigvals, (3, 1))
),
axis=1
)
# Allocate new space for storing processed images
img_height, img_width, img_channels = rendering_images[0].shape
assert (img_channels == 3), "Please use RandomBackground to normalize image channels"
processed_images = np.empty(shape=(0, img_height, img_width, img_channels))
for img_idx, img in enumerate(rendering_images):
processed_image = img[:, :, ::-1] # BGR -> RGB
for i in range(img_channels):
processed_image[:, :, i] += noise_rgb[i]
processed_image = processed_image[:, :, ::-1] # RGB -> BGR
processed_images = np.append(processed_images, [processed_image], axis=0)
# from copy import deepcopy
# ori_img = deepcopy(img)
# print(noise_rgb, np.mean(processed_image), np.mean(ori_img))
# print('RandomNoise', np.mean(ori_img), np.mean(processed_image))
# fig = plt.figure(figsize=(8, 4))
# ax1 = fig.add_subplot(1, 2, 1)
# ax1.imshow(ori_img)
# ax2 = fig.add_subplot(1, 2, 2)
# ax2.imshow(processed_image)
# plt.show()
return processed_images
class RandomBackground(object):
def __init__(self, random_bg_color_range, random_bg_folder_path=None):
self.random_bg_color_range = random_bg_color_range
self.random_bg_files = []
if random_bg_folder_path is not None:
self.random_bg_files = os.listdir(random_bg_folder_path)
self.random_bg_files = [os.path.join(random_bg_folder_path, rbf) for rbf in self.random_bg_files]
def __call__(self, rendering_images):
if len(rendering_images) == 0:
return rendering_images
img_height, img_width, img_channels = rendering_images[0].shape
# If the image has the alpha channel, add the background
if not img_channels == 4:
return rendering_images
# Generate random background
r, g, b = np.array([
np.random.randint(self.random_bg_color_range[i][0], self.random_bg_color_range[i][1] + 1) for i in range(3)
]) / 255.
random_bg = None
if len(self.random_bg_files) > 0:
random_bg_file_path = random.choice(self.random_bg_files)
random_bg = cv2.imread(random_bg_file_path).astype(np.float32) / 255.
# Apply random background
processed_images = np.empty(shape=(0, img_height, img_width, img_channels - 1))
for img_idx, img in enumerate(rendering_images):
alpha = (np.expand_dims(img[:, :, 3], axis=2) == 0).astype(np.float32)
img = img[:, :, :3]
bg_color = random_bg if random.randint(0, 1) and random_bg is not None else np.array([[[r, g, b]]])
img = alpha * bg_color + (1 - alpha) * img
processed_images = np.append(processed_images, [img], axis=0)
return processed_images