Spaces:

adirathor07
/

snap2scene

Sleeping

App Files Files Community

snap2scene / utils /data_transforms.py

adirathor07

initial commit

757ed1c 10 months ago

raw

history blame contribute delete

18.3 kB

	# -- coding: utf-8 --
	#
	# Developed by Haozhe Xie <cshzxie@gmail.com>
	# References:
	# - https://github.com/xiumingzhang/GenRe-ShapeHD

	import cv2
	# import matplotlib.pyplot as plt
	# import matplotlib.patches as patches
	import numpy as np
	import os
	import random
	import torch


	class Compose(object):
	""" Composes several transforms together.
	For example:
	>>> transforms.Compose([
	>>> transforms.RandomBackground(),
	>>> transforms.CenterCrop(127, 127, 3),
	>>> ])
	"""
	def __init__(self, transforms):
	self.transforms = transforms

	def __call__(self, rendering_images, bounding_box=None):
	for t in self.transforms:
	if t.__class__.__name__ == 'RandomCrop' or t.__class__.__name__ == 'CenterCrop':
	rendering_images = t(rendering_images, bounding_box)
	else:
	rendering_images = t(rendering_images)

	return rendering_images


	class ToTensor(object):
	"""
	Convert a PIL Image or numpy.ndarray to tensor.
	Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0].
	"""
	def __call__(self, rendering_images):
	assert (isinstance(rendering_images, np.ndarray))
	array = np.transpose(rendering_images, (0, 3, 1, 2))
	# handle numpy array
	tensor = torch.from_numpy(array)

	# put it from HWC to CHW format
	return tensor.float()


	class Normalize(object):
	def __init__(self, mean, std):
	self.mean = mean
	self.std = std

	def __call__(self, rendering_images):
	assert (isinstance(rendering_images, np.ndarray))
	rendering_images -= self.mean
	rendering_images /= self.std

	return rendering_images


	class RandomPermuteRGB(object):
	def __call__(self, rendering_images):
	assert (isinstance(rendering_images, np.ndarray))

	random_permutation = np.random.permutation(3)
	for img_idx, img in enumerate(rendering_images):
	rendering_images[img_idx] = img[..., random_permutation]

	return rendering_images


	class CenterCrop(object):
	def __init__(self, img_size, crop_size):
	"""Set the height and weight before and after cropping"""
	self.img_size_h = img_size[0]
	self.img_size_w = img_size[1]
	self.crop_size_h = crop_size[0]
	self.crop_size_w = crop_size[1]

	def __call__(self, rendering_images, bounding_box=None):
	if len(rendering_images) == 0:
	return rendering_images

	crop_size_c = rendering_images[0].shape[2]
	processed_images = np.empty(shape=(0, self.img_size_h, self.img_size_w, crop_size_c))
	for img_idx, img in enumerate(rendering_images):
	img_height, img_width, _ = img.shape

	if bounding_box is not None:
	bounding_box = [
	bounding_box[0] * img_width,
	bounding_box[1] * img_height,
	bounding_box[2] * img_width,
	bounding_box[3] * img_height
	] # yapf: disable

	# Calculate the size of bounding boxes
	bbox_width = bounding_box[2] - bounding_box[0]
	bbox_height = bounding_box[3] - bounding_box[1]
	bbox_x_mid = (bounding_box[2] + bounding_box[0]) * .5
	bbox_y_mid = (bounding_box[3] + bounding_box[1]) * .5

	# Make the crop area as a square
	square_object_size = max(bbox_width, bbox_height)
	x_left = int(bbox_x_mid - square_object_size * .5)
	x_right = int(bbox_x_mid + square_object_size * .5)
	y_top = int(bbox_y_mid - square_object_size * .5)
	y_bottom = int(bbox_y_mid + square_object_size * .5)

	# If the crop position is out of the image, fix it with padding
	pad_x_left = 0
	if x_left < 0:
	pad_x_left = -x_left
	x_left = 0
	pad_x_right = 0
	if x_right >= img_width:
	pad_x_right = x_right - img_width + 1
	x_right = img_width - 1
	pad_y_top = 0
	if y_top < 0:
	pad_y_top = -y_top
	y_top = 0
	pad_y_bottom = 0
	if y_bottom >= img_height:
	pad_y_bottom = y_bottom - img_height + 1
	y_bottom = img_height - 1

	# Padding the image and resize the image
	processed_image = np.pad(img[y_top:y_bottom + 1, x_left:x_right + 1],
	((pad_y_top, pad_y_bottom), (pad_x_left, pad_x_right), (0, 0)),
	mode='edge')
	processed_image = cv2.resize(processed_image, (self.img_size_w, self.img_size_h))
	else:
	if img_height > self.crop_size_h and img_width > self.crop_size_w:
	x_left = int(img_width - self.crop_size_w) // 2
	x_right = int(x_left + self.crop_size_w)
	y_top = int(img_height - self.crop_size_h) // 2
	y_bottom = int(y_top + self.crop_size_h)
	else:
	x_left = 0
	x_right = img_width
	y_top = 0
	y_bottom = img_height

	processed_image = cv2.resize(img[y_top:y_bottom, x_left:x_right], (self.img_size_w, self.img_size_h))

	processed_images = np.append(processed_images, [processed_image], axis=0)
	# Debug
	# fig = plt.figure()
	# ax1 = fig.add_subplot(1, 2, 1)
	# ax1.imshow(img)
	# if not bounding_box is None:
	# rect = patches.Rectangle((bounding_box[0], bounding_box[1]),
	# bbox_width,
	# bbox_height,
	# linewidth=1,
	# edgecolor='r',
	# facecolor='none')
	# ax1.add_patch(rect)
	# ax2 = fig.add_subplot(1, 2, 2)
	# ax2.imshow(processed_image)
	# plt.show()
	return processed_images


	class RandomCrop(object):
	def __init__(self, img_size, crop_size):
	"""Set the height and weight before and after cropping"""
	self.img_size_h = img_size[0]
	self.img_size_w = img_size[1]
	self.crop_size_h = crop_size[0]
	self.crop_size_w = crop_size[1]

	def __call__(self, rendering_images, bounding_box=None):
	if len(rendering_images) == 0:
	return rendering_images

	crop_size_c = rendering_images[0].shape[2]
	processed_images = np.empty(shape=(0, self.img_size_h, self.img_size_w, crop_size_c))
	for img_idx, img in enumerate(rendering_images):
	img_height, img_width, _ = img.shape

	if bounding_box is not None:
	bounding_box = [
	bounding_box[0] * img_width,
	bounding_box[1] * img_height,
	bounding_box[2] * img_width,
	bounding_box[3] * img_height
	] # yapf: disable

	# Calculate the size of bounding boxes
	bbox_width = bounding_box[2] - bounding_box[0]
	bbox_height = bounding_box[3] - bounding_box[1]
	bbox_x_mid = (bounding_box[2] + bounding_box[0]) * .5
	bbox_y_mid = (bounding_box[3] + bounding_box[1]) * .5

	# Make the crop area as a square
	square_object_size = max(bbox_width, bbox_height)
	square_object_size = square_object_size * random.uniform(0.8, 1.2)

	x_left = int(bbox_x_mid - square_object_size * random.uniform(.4, .6))
	x_right = int(bbox_x_mid + square_object_size * random.uniform(.4, .6))
	y_top = int(bbox_y_mid - square_object_size * random.uniform(.4, .6))
	y_bottom = int(bbox_y_mid + square_object_size * random.uniform(.4, .6))

	# If the crop position is out of the image, fix it with padding
	pad_x_left = 0
	if x_left < 0:
	pad_x_left = -x_left
	x_left = 0
	pad_x_right = 0
	if x_right >= img_width:
	pad_x_right = x_right - img_width + 1
	x_right = img_width - 1
	pad_y_top = 0
	if y_top < 0:
	pad_y_top = -y_top
	y_top = 0
	pad_y_bottom = 0
	if y_bottom >= img_height:
	pad_y_bottom = y_bottom - img_height + 1
	y_bottom = img_height - 1

	# Padding the image and resize the image
	processed_image = np.pad(img[y_top:y_bottom + 1, x_left:x_right + 1],
	((pad_y_top, pad_y_bottom), (pad_x_left, pad_x_right), (0, 0)),
	mode='edge')
	processed_image = cv2.resize(processed_image, (self.img_size_w, self.img_size_h))
	else:
	if img_height > self.crop_size_h and img_width > self.crop_size_w:
	x_left = int(img_width - self.crop_size_w) // 2
	x_right = int(x_left + self.crop_size_w)
	y_top = int(img_height - self.crop_size_h) // 2
	y_bottom = int(y_top + self.crop_size_h)
	else:
	x_left = 0
	x_right = img_width
	y_top = 0
	y_bottom = img_height

	processed_image = cv2.resize(img[y_top:y_bottom, x_left:x_right], (self.img_size_w, self.img_size_h))

	processed_images = np.append(processed_images, [processed_image], axis=0)

	return processed_images


	class RandomFlip(object):
	def __call__(self, rendering_images):
	assert (isinstance(rendering_images, np.ndarray))

	for img_idx, img in enumerate(rendering_images):
	if random.randint(0, 1):
	rendering_images[img_idx] = np.fliplr(img)

	return rendering_images


	class ColorJitter(object):
	def __init__(self, brightness, contrast, saturation):
	self.brightness = brightness
	self.contrast = contrast
	self.saturation = saturation

	def __call__(self, rendering_images):
	if len(rendering_images) == 0:
	return rendering_images

	# Allocate new space for storing processed images
	img_height, img_width, img_channels = rendering_images[0].shape
	processed_images = np.empty(shape=(0, img_height, img_width, img_channels))

	# Randomize the value of changing brightness, contrast, and saturation
	brightness = 1 + np.random.uniform(low=-self.brightness, high=self.brightness)
	contrast = 1 + np.random.uniform(low=-self.contrast, high=self.contrast)
	saturation = 1 + np.random.uniform(low=-self.saturation, high=self.saturation)

	# Randomize the order of changing brightness, contrast, and saturation
	attr_names = ['brightness', 'contrast', 'saturation']
	attr_values = [brightness, contrast, saturation] # The value of changing attrs
	attr_indexes = np.array(range(len(attr_names))) # The order of changing attrs
	np.random.shuffle(attr_indexes)

	for img_idx, img in enumerate(rendering_images):
	processed_image = img
	for idx in attr_indexes:
	processed_image = self._adjust_image_attr(processed_image, attr_names[idx], attr_values[idx])

	processed_images = np.append(processed_images, [processed_image], axis=0)
	# print('ColorJitter', np.mean(ori_img), np.mean(processed_image))
	# fig = plt.figure(figsize=(8, 4))
	# ax1 = fig.add_subplot(1, 2, 1)
	# ax1.imshow(ori_img)
	# ax2 = fig.add_subplot(1, 2, 2)
	# ax2.imshow(processed_image)
	# plt.show()
	return processed_images

	def _adjust_image_attr(self, img, attr_name, attr_value):
	"""
	Adjust or randomize the specified attribute of the image

	Args:
	img: Image in BGR format
	Numpy array of shape (h, w, 3)
	attr_name: Image attribute to adjust or randomize
	'brightness', 'saturation', or 'contrast'
	attr_value: the alpha for blending is randomly drawn from [1 - d, 1 + d]

	Returns:
	Output image in BGR format
	Numpy array of the same shape as input
	"""
	gs = self._bgr_to_gray(img)

	if attr_name == 'contrast':
	img = self._alpha_blend(img, np.mean(gs[:, :, 0]), attr_value)
	elif attr_name == 'saturation':
	img = self._alpha_blend(img, gs, attr_value)
	elif attr_name == 'brightness':
	img = self._alpha_blend(img, 0, attr_value)
	else:
	raise NotImplementedError(attr_name)
	return img

	def _bgr_to_gray(self, bgr):
	"""
	Convert a RGB image to a grayscale image
	Differences from cv2.cvtColor():
	1. Input image can be float
	2. Output image has three repeated channels, other than a single channel

	Args:
	bgr: Image in BGR format
	Numpy array of shape (h, w, 3)

	Returns:
	gs: Grayscale image
	Numpy array of the same shape as input; the three channels are the same
	"""
	ch = 0.114 * bgr[:, :, 0] + 0.587 * bgr[:, :, 1] + 0.299 * bgr[:, :, 2]
	gs = np.dstack((ch, ch, ch))
	return gs

	def _alpha_blend(self, im1, im2, alpha):
	"""
	Alpha blending of two images or one image and a scalar

	Args:
	im1, im2: Image or scalar
	Numpy array and a scalar or two numpy arrays of the same shape
	alpha: Weight of im1
	Float ranging usually from 0 to 1

	Returns:
	im_blend: Blended image -- alpha * im1 + (1 - alpha) * im2
	Numpy array of the same shape as input image
	"""
	im_blend = alpha * im1 + (1 - alpha) * im2
	return im_blend


	class RandomNoise(object):
	def __init__(self,
	noise_std,
	eigvals=(0.2175, 0.0188, 0.0045),
	eigvecs=((-0.5675, 0.7192, 0.4009), (-0.5808, -0.0045, -0.8140), (-0.5836, -0.6948, 0.4203))):
	self.noise_std = noise_std
	self.eigvals = np.array(eigvals)
	self.eigvecs = np.array(eigvecs)

	def __call__(self, rendering_images):
	alpha = np.random.normal(loc=0, scale=self.noise_std, size=3)
	noise_rgb = \
	np.sum(
	np.multiply(
	np.multiply(
	self.eigvecs,
	np.tile(alpha, (3, 1))
	),
	np.tile(self.eigvals, (3, 1))
	),
	axis=1
	)

	# Allocate new space for storing processed images
	img_height, img_width, img_channels = rendering_images[0].shape
	assert (img_channels == 3), "Please use RandomBackground to normalize image channels"
	processed_images = np.empty(shape=(0, img_height, img_width, img_channels))

	for img_idx, img in enumerate(rendering_images):
	processed_image = img[:, :, ::-1] # BGR -> RGB
	for i in range(img_channels):
	processed_image[:, :, i] += noise_rgb[i]

	processed_image = processed_image[:, :, ::-1] # RGB -> BGR
	processed_images = np.append(processed_images, [processed_image], axis=0)
	# from copy import deepcopy
	# ori_img = deepcopy(img)
	# print(noise_rgb, np.mean(processed_image), np.mean(ori_img))
	# print('RandomNoise', np.mean(ori_img), np.mean(processed_image))
	# fig = plt.figure(figsize=(8, 4))
	# ax1 = fig.add_subplot(1, 2, 1)
	# ax1.imshow(ori_img)
	# ax2 = fig.add_subplot(1, 2, 2)
	# ax2.imshow(processed_image)
	# plt.show()
	return processed_images


	class RandomBackground(object):
	def __init__(self, random_bg_color_range, random_bg_folder_path=None):
	self.random_bg_color_range = random_bg_color_range
	self.random_bg_files = []
	if random_bg_folder_path is not None:
	self.random_bg_files = os.listdir(random_bg_folder_path)
	self.random_bg_files = [os.path.join(random_bg_folder_path, rbf) for rbf in self.random_bg_files]

	def __call__(self, rendering_images):
	if len(rendering_images) == 0:
	return rendering_images

	img_height, img_width, img_channels = rendering_images[0].shape
	# If the image has the alpha channel, add the background
	if not img_channels == 4:
	return rendering_images

	# Generate random background
	r, g, b = np.array([
	np.random.randint(self.random_bg_color_range[i][0], self.random_bg_color_range[i][1] + 1) for i in range(3)
	]) / 255.

	random_bg = None
	if len(self.random_bg_files) > 0:
	random_bg_file_path = random.choice(self.random_bg_files)
	random_bg = cv2.imread(random_bg_file_path).astype(np.float32) / 255.

	# Apply random background
	processed_images = np.empty(shape=(0, img_height, img_width, img_channels - 1))
	for img_idx, img in enumerate(rendering_images):
	alpha = (np.expand_dims(img[:, :, 3], axis=2) == 0).astype(np.float32)
	img = img[:, :, :3]
	bg_color = random_bg if random.randint(0, 1) and random_bg is not None else np.array([[[r, g, b]]])
	img = alpha * bg_color + (1 - alpha) * img

	processed_images = np.append(processed_images, [img], axis=0)

	return processed_images