Spaces:

radames
/

UserControllableLT-Latent-Transformer

Runtime error

UserControllableLT-Latent-Transformer / expansion /dataloader /depth_transforms.py

endo-yuki-t

initial commit

d7dbcdd over 3 years ago

20.1 kB

	from __future__ import division
	import torch
	import random
	import numpy as np
	import numbers
	import types
	import scipy.ndimage as ndimage
	import pdb
	import torchvision
	import PIL.Image as Image
	import cv2
	from torch.nn import functional as F


	class Compose(object):
	""" Composes several co_transforms together.
	For example:
	>>> co_transforms.Compose([
	>>> co_transforms.CenterCrop(10),
	>>> co_transforms.ToTensor(),
	>>> ])
	"""

	def __init__(self, co_transforms):
	self.co_transforms = co_transforms

	def __call__(self, input, target,intr):
	for t in self.co_transforms:
	input,target,intr = t(input,target,intr)
	return input,target,intr


	class Scale(object):
	""" Rescales the inputs and target arrays to the given 'size'.
	'size' will be the size of the smaller edge.
	For example, if height > width, then image will be
	rescaled to (size * height / width, size)
	size: size of the smaller edge
	interpolation order: Default: 2 (bilinear)
	"""

	def __init__(self, size, order=1):
	self.ratio = size
	self.order = order
	if order==0:
	self.code=cv2.INTER_NEAREST
	elif order==1:
	self.code=cv2.INTER_LINEAR
	elif order==2:
	self.code=cv2.INTER_CUBIC

	def __call__(self, inputs, target):
	if self.ratio==1:
	return inputs, target
	h, w, _ = inputs[0].shape
	ratio = self.ratio

	inputs[0] = cv2.resize(inputs[0], None, fx=ratio,fy=ratio,interpolation=cv2.INTER_LINEAR)
	inputs[1] = cv2.resize(inputs[1], None, fx=ratio,fy=ratio,interpolation=cv2.INTER_LINEAR)
	# keep the mask same
	tmp = cv2.resize(target[:,:,2], None, fx=ratio,fy=ratio,interpolation=cv2.INTER_NEAREST)
	target = cv2.resize(target, None, fx=ratio,fy=ratio,interpolation=self.code) * ratio
	target[:,:,2] = tmp


	return inputs, target


	class RandomCrop(object):
	"""Crops the given PIL.Image at a random location to have a region of
	the given size. size can be a tuple (target_height, target_width)
	or an integer, in which case the target will be of a square shape (size, size)
	"""

	def __init__(self, size):
	if isinstance(size, numbers.Number):
	self.size = (int(size), int(size))
	else:
	self.size = size

	def __call__(self, inputs,target,intr):
	h, w, _ = inputs[0].shape
	th, tw = self.size
	if w < tw: tw=w
	if h < th: th=h

	x1 = random.randint(0, w - tw)
	y1 = random.randint(0, h - th)
	intr[1] -= x1
	intr[2] -= y1

	inputs[0] = inputs[0][y1: y1 + th,x1: x1 + tw].astype(float)
	inputs[1] = inputs[1][y1: y1 + th,x1: x1 + tw].astype(float)
	return inputs, target[y1: y1 + th,x1: x1 + tw].astype(float), list(np.asarray(intr).astype(float)) + list(np.asarray([1.,0.,0.,1.,0.,0.]).astype(float))



	class SpatialAug(object):
	def __init__(self, crop, scale=None, rot=None, trans=None, squeeze=None, schedule_coeff=1, order=1, black=False):
	self.crop = crop
	self.scale = scale
	self.rot = rot
	self.trans = trans
	self.squeeze = squeeze
	self.t = np.zeros(6)
	self.schedule_coeff = schedule_coeff
	self.order = order
	self.black = black

	def to_identity(self):
	self.t[0] = 1; self.t[2] = 0; self.t[4] = 0; self.t[1] = 0; self.t[3] = 1; self.t[5] = 0;

	def left_multiply(self, u0, u1, u2, u3, u4, u5):
	result = np.zeros(6)
	result[0] = self.t[0]u0 + self.t[1]u2;
	result[1] = self.t[0]u1 + self.t[1]u3;

	result[2] = self.t[2]u0 + self.t[3]u2;
	result[3] = self.t[2]u1 + self.t[3]u3;

	result[4] = self.t[4]u0 + self.t[5]u2 + u4;
	result[5] = self.t[4]u1 + self.t[5]u3 + u5;
	self.t = result

	def inverse(self):
	result = np.zeros(6)
	a = self.t[0]; c = self.t[2]; e = self.t[4];
	b = self.t[1]; d = self.t[3]; f = self.t[5];

	denom = ad - bc;

	result[0] = d / denom;
	result[1] = -b / denom;
	result[2] = -c / denom;
	result[3] = a / denom;
	result[4] = (cf-de) / denom;
	result[5] = (be-af) / denom;

	return result

	def grid_transform(self, meshgrid, t, normalize=True, gridsize=None):
	if gridsize is None:
	h, w = meshgrid[0].shape
	else:
	h, w = gridsize
	vgrid = torch.cat([(meshgrid[0] * t[0] + meshgrid[1] * t[2] + t[4])[:,:,np.newaxis],
	(meshgrid[0] * t[1] + meshgrid[1] * t[3] + t[5])[:,:,np.newaxis]],-1)
	if normalize:
	vgrid[:,:,0] = 2.0*vgrid[:,:,0]/max(w-1,1)-1.0
	vgrid[:,:,1] = 2.0*vgrid[:,:,1]/max(h-1,1)-1.0
	return vgrid


	def __call__(self, inputs, target, intr):
	h, w, _ = inputs[0].shape
	th, tw = self.crop
	meshgrid = torch.meshgrid([torch.Tensor(range(th)), torch.Tensor(range(tw))])[::-1]
	cornergrid = torch.meshgrid([torch.Tensor([0,th-1]), torch.Tensor([0,tw-1])])[::-1]

	for i in range(50):
	# im0
	self.to_identity()
	#TODO add mirror
	if np.random.binomial(1,0.5):
	mirror = True
	else:
	mirror = False
	##TODO
	#mirror = False
	if mirror:
	self.left_multiply(-1, 0, 0, 1, .5 * tw, -.5 * th);
	else:
	self.left_multiply(1, 0, 0, 1, -.5 * tw, -.5 * th);
	scale0 = 1; scale1 = 1; squeeze0 = 1; squeeze1 = 1;
	if not self.rot is None:
	rot0 = np.random.uniform(-self.rot[0],+self.rot[0])
	rot1 = np.random.uniform(-self.rot[1]self.schedule_coeff, self.rot[1]self.schedule_coeff) + rot0
	self.left_multiply(np.cos(rot0), np.sin(rot0), -np.sin(rot0), np.cos(rot0), 0, 0)
	if not self.trans is None:
	trans0 = np.random.uniform(-self.trans[0],+self.trans[0], 2)
	trans1 = np.random.uniform(-self.trans[1]self.schedule_coeff,+self.trans[1]self.schedule_coeff, 2) + trans0
	self.left_multiply(1, 0, 0, 1, trans0[0] * tw, trans0[1] * th)
	if not self.squeeze is None:
	squeeze0 = np.exp(np.random.uniform(-self.squeeze[0], self.squeeze[0]))
	squeeze1 = np.exp(np.random.uniform(-self.squeeze[1]self.schedule_coeff, self.squeeze[1]self.schedule_coeff)) * squeeze0
	if not self.scale is None:
	scale0 = np.exp(np.random.uniform(self.scale[2]-self.scale[0], self.scale[2]+self.scale[0]))
	scale1 = np.exp(np.random.uniform(-self.scale[1]self.schedule_coeff, self.scale[1]self.schedule_coeff)) * scale0
	self.left_multiply(1.0/(scale0*squeeze0), 0, 0, 1.0/(scale0/squeeze0), 0, 0)

	self.left_multiply(1, 0, 0, 1, .5 * w, .5 * h);
	transmat0 = self.t.copy()

	# im1
	self.to_identity()
	if mirror:
	self.left_multiply(-1, 0, 0, 1, .5 * tw, -.5 * th);
	else:
	self.left_multiply(1, 0, 0, 1, -.5 * tw, -.5 * th);
	if not self.rot is None:
	self.left_multiply(np.cos(rot1), np.sin(rot1), -np.sin(rot1), np.cos(rot1), 0, 0)
	if not self.trans is None:
	self.left_multiply(1, 0, 0, 1, trans1[0] * tw, trans1[1] * th)
	self.left_multiply(1.0/(scale1*squeeze1), 0, 0, 1.0/(scale1/squeeze1), 0, 0)
	self.left_multiply(1, 0, 0, 1, .5 * w, .5 * h);
	transmat1 = self.t.copy()
	transmat1_inv = self.inverse()

	if self.black:
	# black augmentation, allowing 0 values in the input images
	# https://github.com/lmb-freiburg/flownet2/blob/master/src/caffe/layers/black_augmentation_layer.cu
	break
	else:
	if ((self.grid_transform(cornergrid, transmat0, gridsize=[float(h),float(w)]).abs()>1).sum() +\
	(self.grid_transform(cornergrid, transmat1, gridsize=[float(h),float(w)]).abs()>1).sum()) == 0:
	break
	if i==49:
	print('max_iter in augmentation')
	self.to_identity()
	self.left_multiply(1, 0, 0, 1, -.5 * tw, -.5 * th);
	self.left_multiply(1, 0, 0, 1, .5 * w, .5 * h);
	transmat0 = self.t.copy()
	transmat1 = self.t.copy()

	# do the real work
	vgrid = self.grid_transform(meshgrid, transmat0,gridsize=[float(h),float(w)])
	inputs_0 = F.grid_sample(torch.Tensor(inputs[0]).permute(2,0,1)[np.newaxis], vgrid[np.newaxis])[0].permute(1,2,0)
	if self.order == 0:
	target_0 = F.grid_sample(torch.Tensor(target).permute(2,0,1)[np.newaxis], vgrid[np.newaxis], mode='nearest')[0].permute(1,2,0)
	else:
	target_0 = F.grid_sample(torch.Tensor(target).permute(2,0,1)[np.newaxis], vgrid[np.newaxis])[0].permute(1,2,0)

	mask_0 = target[:,:,2:3].copy(); mask_0[mask_0==0]=np.nan
	if self.order == 0:
	mask_0 = F.grid_sample(torch.Tensor(mask_0).permute(2,0,1)[np.newaxis], vgrid[np.newaxis], mode='nearest')[0].permute(1,2,0)
	else:
	mask_0 = F.grid_sample(torch.Tensor(mask_0).permute(2,0,1)[np.newaxis], vgrid[np.newaxis])[0].permute(1,2,0)
	mask_0[torch.isnan(mask_0)] = 0


	vgrid = self.grid_transform(meshgrid, transmat1,gridsize=[float(h),float(w)])
	inputs_1 = F.grid_sample(torch.Tensor(inputs[1]).permute(2,0,1)[np.newaxis], vgrid[np.newaxis])[0].permute(1,2,0)

	# flow
	pos = target_0[:,:,:2] + self.grid_transform(meshgrid, transmat0,normalize=False)
	pos = self.grid_transform(pos.permute(2,0,1),transmat1_inv,normalize=False)
	if target_0.shape[2]>=4:
	# scale
	exp = target_0[:,:,3:] * scale1 / scale0
	target = torch.cat([ (pos[:,:,0] - meshgrid[0]).unsqueeze(-1),
	(pos[:,:,1] - meshgrid[1]).unsqueeze(-1),
	mask_0,
	exp], -1)
	else:
	target = torch.cat([ (pos[:,:,0] - meshgrid[0]).unsqueeze(-1),
	(pos[:,:,1] - meshgrid[1]).unsqueeze(-1),
	mask_0], -1)
	inputs = [np.asarray(inputs_0).astype(float), np.asarray(inputs_1).astype(float)]
	target = np.asarray(target).astype(float)
	return inputs,target, list(np.asarray(intr+list(transmat0)).astype(float))



	class pseudoPCAAug(object):
	"""
	Chromatic Eigen Augmentation: https://github.com/lmb-freiburg/flownet2/blob/master/src/caffe/layers/data_augmentation_layer.cu
	This version is faster.
	"""
	def __init__(self, schedule_coeff=1):
	self.augcolor = torchvision.transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.5, hue=0.5/3.14)

	def __call__(self, inputs, target,intr):
	img = np.concatenate([inputs[0],inputs[1]],0)
	shape = img.shape[0]//2
	aug_img = np.asarray(self.augcolor(Image.fromarray(np.uint8(img*255))))/255.
	inputs[0] = aug_img[:shape]
	inputs[1] = aug_img[shape:]
	#inputs[0] = np.asarray(self.augcolor(Image.fromarray(np.uint8(inputs[0]*255))))/255.
	#inputs[1] = np.asarray(self.augcolor(Image.fromarray(np.uint8(inputs[1]*255))))/255.
	return inputs,target,intr


	class PCAAug(object):
	"""
	Chromatic Eigen Augmentation: https://github.com/lmb-freiburg/flownet2/blob/master/src/caffe/layers/data_augmentation_layer.cu
	"""
	def __init__(self, lmult_pow =[0.4, 0,-0.2],
	lmult_mult =[0.4, 0,0, ],
	lmult_add =[0.03,0,0, ],
	sat_pow =[0.4, 0,0, ],
	sat_mult =[0.5, 0,-0.3],
	sat_add =[0.03,0,0, ],
	col_pow =[0.4, 0,0, ],
	col_mult =[0.2, 0,0, ],
	col_add =[0.02,0,0, ],
	ladd_pow =[0.4, 0,0, ],
	ladd_mult =[0.4, 0,0, ],
	ladd_add =[0.04,0,0, ],
	col_rotate =[1., 0,0, ],
	schedule_coeff=1):
	# no mean
	self.pow_nomean = [1,1,1]
	self.add_nomean = [0,0,0]
	self.mult_nomean = [1,1,1]
	self.pow_withmean = [1,1,1]
	self.add_withmean = [0,0,0]
	self.mult_withmean = [1,1,1]
	self.lmult_pow = 1
	self.lmult_mult = 1
	self.lmult_add = 0
	self.col_angle = 0
	if not ladd_pow is None:
	self.pow_nomean[0] =np.exp(np.random.normal(ladd_pow[2], ladd_pow[0]))
	if not col_pow is None:
	self.pow_nomean[1] =np.exp(np.random.normal(col_pow[2], col_pow[0]))
	self.pow_nomean[2] =np.exp(np.random.normal(col_pow[2], col_pow[0]))

	if not ladd_add is None:
	self.add_nomean[0] =np.random.normal(ladd_add[2], ladd_add[0])
	if not col_add is None:
	self.add_nomean[1] =np.random.normal(col_add[2], col_add[0])
	self.add_nomean[2] =np.random.normal(col_add[2], col_add[0])

	if not ladd_mult is None:
	self.mult_nomean[0] =np.exp(np.random.normal(ladd_mult[2], ladd_mult[0]))
	if not col_mult is None:
	self.mult_nomean[1] =np.exp(np.random.normal(col_mult[2], col_mult[0]))
	self.mult_nomean[2] =np.exp(np.random.normal(col_mult[2], col_mult[0]))

	# with mean
	if not sat_pow is None:
	self.pow_withmean[1] =np.exp(np.random.uniform(sat_pow[2]-sat_pow[0], sat_pow[2]+sat_pow[0]))
	self.pow_withmean[2] =self.pow_withmean[1]
	if not sat_add is None:
	self.add_withmean[1] =np.random.uniform(sat_add[2]-sat_add[0], sat_add[2]+sat_add[0])
	self.add_withmean[2] =self.add_withmean[1]
	if not sat_mult is None:
	self.mult_withmean[1] = np.exp(np.random.uniform(sat_mult[2]-sat_mult[0], sat_mult[2]+sat_mult[0]))
	self.mult_withmean[2] = self.mult_withmean[1]

	if not lmult_pow is None:
	self.lmult_pow = np.exp(np.random.uniform(lmult_pow[2]-lmult_pow[0], lmult_pow[2]+lmult_pow[0]))
	if not lmult_mult is None:
	self.lmult_mult= np.exp(np.random.uniform(lmult_mult[2]-lmult_mult[0], lmult_mult[2]+lmult_mult[0]))
	if not lmult_add is None:
	self.lmult_add = np.random.uniform(lmult_add[2]-lmult_add[0], lmult_add[2]+lmult_add[0])
	if not col_rotate is None:
	self.col_angle= np.random.uniform(col_rotate[2]-col_rotate[0], col_rotate[2]+col_rotate[0])

	# eigen vectors
	self.eigvec = np.reshape([0.51,0.56,0.65,0.79,0.01,-0.62,0.35,-0.83,0.44],[3,3]).transpose()


	def __call__(self, inputs, target, intr):
	inputs[0] = self.pca_image(inputs[0])
	inputs[1] = self.pca_image(inputs[1])
	return inputs,target,intr

	def pca_image(self, rgb):
	eig = np.dot(rgb, self.eigvec)
	max_rgb = np.clip(rgb,0,np.inf).max((0,1))
	min_rgb = rgb.min((0,1))
	mean_rgb = rgb.mean((0,1))
	max_abs_eig = np.abs(eig).max((0,1))
	max_l = np.sqrt(np.sum(max_abs_eig*max_abs_eig))
	mean_eig = np.dot(mean_rgb, self.eigvec)

	# no-mean stuff
	eig -= mean_eig[np.newaxis, np.newaxis]

	for c in range(3):
	if max_abs_eig[c] > 1e-2:
	mean_eig[c] /= max_abs_eig[c]
	eig[:,:,c] = eig[:,:,c] / max_abs_eig[c];
	eig[:,:,c] = np.power(np.abs(eig[:,:,c]),self.pow_nomean[c]) *\
	((eig[:,:,c] > 0) -0.5)*2
	eig[:,:,c] = eig[:,:,c] + self.add_nomean[c]
	eig[:,:,c] = eig[:,:,c] * self.mult_nomean[c]
	eig += mean_eig[np.newaxis,np.newaxis]

	# withmean stuff
	if max_abs_eig[0] > 1e-2:
	eig[:,:,0] = np.power(np.abs(eig[:,:,0]),self.pow_withmean[0]) * \
	((eig[:,:,0]>0)-0.5)*2;
	eig[:,:,0] = eig[:,:,0] + self.add_withmean[0];
	eig[:,:,0] = eig[:,:,0] * self.mult_withmean[0];

	s = np.sqrt(eig[:,:,1]eig[:,:,1] + eig[:,:,2] eig[:,:,2])
	smask = s > 1e-2
	s1 = np.power(s, self.pow_withmean[1]);
	s1 = np.clip(s1 + self.add_withmean[1], 0,np.inf)
	s1 = s1 * self.mult_withmean[1]
	s1 = s1 * smask + s*(1-smask)

	# color angle
	if self.col_angle!=0:
	temp1 = np.cos(self.col_angle) * eig[:,:,1] - np.sin(self.col_angle) * eig[:,:,2]
	temp2 = np.sin(self.col_angle) * eig[:,:,1] + np.cos(self.col_angle) * eig[:,:,2]
	eig[:,:,1] = temp1
	eig[:,:,2] = temp2

	# to origin magnitude
	for c in range(3):
	if max_abs_eig[c] > 1e-2:
	eig[:,:,c] = eig[:,:,c] * max_abs_eig[c]

	if max_l > 1e-2:
	l1 = np.sqrt(eig[:,:,0]eig[:,:,0] + eig[:,:,1]eig[:,:,1] + eig[:,:,2]*eig[:,:,2])
	l1 = l1 / max_l

	eig[:,:,1][smask] = (eig[:,:,1] / s * s1)[smask]
	eig[:,:,2][smask] = (eig[:,:,2] / s * s1)[smask]
	#eig[:,:,1] = (eig[:,:,1] / s * s1) * smask + eig[:,:,1] * (1-smask)
	#eig[:,:,2] = (eig[:,:,2] / s * s1) * smask + eig[:,:,2] * (1-smask)

	if max_l > 1e-2:
	l = np.sqrt(eig[:,:,0]eig[:,:,0] + eig[:,:,1]eig[:,:,1] + eig[:,:,2]*eig[:,:,2])
	l1 = np.power(l1, self.lmult_pow)
	l1 = np.clip(l1 + self.lmult_add, 0, np.inf)
	l1 = l1 * self.lmult_mult
	l1 = l1 * max_l
	lmask = l > 1e-2
	eig[lmask] = (eig / l[:,:,np.newaxis] * l1[:,:,np.newaxis])[lmask]
	for c in range(3):
	eig[:,:,c][lmask] = (np.clip(eig[:,:,c], -np.inf, max_abs_eig[c]))[lmask]
	# for c in range(3):
	# # eig[:,:,c][lmask] = (eig[:,:,c] / l * l1)[lmask] * lmask + eig[:,:,c] * (1-lmask)
	# eig[:,:,c][lmask] = (eig[:,:,c] / l * l1)[lmask]
	# eig[:,:,c] = (np.clip(eig[:,:,c], -np.inf, max_abs_eig[c])) * lmask + eig[:,:,c] * (1-lmask)

	return np.clip(np.dot(eig, self.eigvec.transpose()), 0, 1)


	class ChromaticAug(object):
	"""
	Chromatic augmentation: https://github.com/lmb-freiburg/flownet2/blob/master/src/caffe/layers/data_augmentation_layer.cu
	"""
	def __init__(self, noise = 0.06,
	gamma = 0.02,
	brightness = 0.02,
	contrast = 0.02,
	color = 0.02,
	schedule_coeff=1):

	self.noise = np.random.uniform(0,noise)
	self.gamma = np.exp(np.random.normal(0, gamma*schedule_coeff))
	self.brightness = np.random.normal(0, brightness*schedule_coeff)
	self.contrast = np.exp(np.random.normal(0, contrast*schedule_coeff))
	self.color = np.exp(np.random.normal(0, color*schedule_coeff,3))

	def __call__(self, inputs, target, intr):
	inputs[1] = self.chrom_aug(inputs[1])
	# noise
	inputs[0]+=np.random.normal(0, self.noise, inputs[0].shape)
	inputs[1]+=np.random.normal(0, self.noise, inputs[0].shape)
	return inputs,target,intr

	def chrom_aug(self, rgb):
	# color change
	mean_in = rgb.sum(-1)
	rgb = rgb*self.color[np.newaxis,np.newaxis]
	brightness_coeff = mean_in / (rgb.sum(-1)+0.01)
	rgb = np.clip(rgb*brightness_coeff[:,:,np.newaxis],0,1)
	# gamma
	rgb = np.power(rgb,self.gamma)
	# brightness
	rgb += self.brightness
	# contrast
	rgb = 0.5 + ( rgb-0.5)*self.contrast
	rgb = np.clip(rgb, 0, 1)
	return rgb