Thesis / dataset /transforms.py

Upload 94 files

6445525 verified over 1 year ago

5.09 kB

	import random
	import numpy as np
	import torch
	import torchvision.transforms.functional as F
	from PIL import Image


	# Augmentation for Training
	class Augmentation(object):
	def __init__(self, img_size=224, jitter=0.2, hue=0.1, saturation=1.5, exposure=1.5):
	self.img_size = img_size
	self.jitter = jitter
	self.hue = hue
	self.saturation = saturation
	self.exposure = exposure


	def rand_scale(self, s):
	scale = random.uniform(1, s)

	if random.randint(0, 1):
	return scale

	return 1./scale


	def random_distort_image(self, video_clip):
	dhue = random.uniform(-self.hue, self.hue)
	dsat = self.rand_scale(self.saturation)
	dexp = self.rand_scale(self.exposure)

	video_clip_ = []
	for image in video_clip:
	image = image.convert('HSV')
	cs = list(image.split())
	cs[1] = cs[1].point(lambda i: i * dsat)
	cs[2] = cs[2].point(lambda i: i * dexp)

	def change_hue(x):
	x += dhue * 255
	if x > 255:
	x -= 255
	if x < 0:
	x += 255
	return x

	cs[0] = cs[0].point(change_hue)
	image = Image.merge(image.mode, tuple(cs))

	image = image.convert('RGB')

	video_clip_.append(image)

	return video_clip_


	def random_crop(self, video_clip, width, height):
	dw =int(width * self.jitter)
	dh =int(height * self.jitter)

	pleft = random.randint(-dw, dw)
	pright = random.randint(-dw, dw)
	ptop = random.randint(-dh, dh)
	pbot = random.randint(-dh, dh)

	swidth = width - pleft - pright
	sheight = height - ptop - pbot

	sx = float(swidth) / width
	sy = float(sheight) / height

	dx = (float(pleft) / width)/sx
	dy = (float(ptop) / height)/sy

	# random crop
	cropped_clip = [img.crop((pleft, ptop, pleft + swidth - 1, ptop + sheight - 1)) for img in video_clip]

	return cropped_clip, dx, dy, sx, sy


	def apply_bbox(self, target, ow, oh, dx, dy, sx, sy):
	sx, sy = 1./sx, 1./sy
	# apply deltas on bbox
	target[..., 0] = np.minimum(0.999, np.maximum(0, target[..., 0] / ow * sx - dx))
	target[..., 1] = np.minimum(0.999, np.maximum(0, target[..., 1] / oh * sy - dy))
	target[..., 2] = np.minimum(0.999, np.maximum(0, target[..., 2] / ow * sx - dx))
	target[..., 3] = np.minimum(0.999, np.maximum(0, target[..., 3] / oh * sy - dy))

	# refine target
	refine_target = []
	for i in range(target.shape[0]):
	tgt = target[i]
	bw = (tgt[2] - tgt[0]) * ow
	bh = (tgt[3] - tgt[1]) * oh

	if bw < 1. or bh < 1.:
	continue

	refine_target.append(tgt)

	refine_target = np.array(refine_target).reshape(-1, target.shape[-1])

	return refine_target


	def to_tensor(self, video_clip):
	return [F.to_tensor(image) * 255. for image in video_clip]


	def __call__(self, video_clip, target):
	# Initialize Random Variables
	oh = video_clip[0].height
	ow = video_clip[0].width

	# random crop
	video_clip, dx, dy, sx, sy = self.random_crop(video_clip, ow, oh)

	# resize
	video_clip = [img.resize([self.img_size, self.img_size]) for img in video_clip]

	# random flip
	flip = random.randint(0, 1)
	if flip:
	video_clip = [img.transpose(Image.FLIP_LEFT_RIGHT) for img in video_clip]

	# distort
	video_clip = self.random_distort_image(video_clip)

	# process target
	if target is not None:
	target = self.apply_bbox(target, ow, oh, dx, dy, sx, sy)
	if flip:
	target[..., [0, 2]] = 1.0 - target[..., [2, 0]]
	else:
	target = np.array([])

	# to tensor
	video_clip = self.to_tensor(video_clip)
	target = torch.as_tensor(target).float()

	return video_clip, target


	# Transform for Testing
	class BaseTransform(object):
	def __init__(self, img_size=224, ):
	self.img_size = img_size


	def to_tensor(self, video_clip):
	return [F.to_tensor(image) * 255. for image in video_clip]


	def __call__(self, video_clip, target=None, normalize=True):
	oh = video_clip[0].height
	ow = video_clip[0].width

	# resize
	video_clip = [img.resize([self.img_size, self.img_size]) for img in video_clip]

	# normalize target
	# if target is not None:
	# if normalize:
	# target[..., [0, 2]] /= ow
	# target[..., [1, 3]] /= oh

	# else:
	# target = np.array([])

	# to tensor
	video_clip = self.to_tensor(video_clip)
	#target = torch.as_tensor(target).float()

	#return video_clip, target
	return video_clip