Thesis / dataset /transforms.py
Ryan-Pham's picture
Upload 94 files
6445525 verified
import random
import numpy as np
import torch
import torchvision.transforms.functional as F
from PIL import Image
# Augmentation for Training
class Augmentation(object):
def __init__(self, img_size=224, jitter=0.2, hue=0.1, saturation=1.5, exposure=1.5):
self.img_size = img_size
self.jitter = jitter
self.hue = hue
self.saturation = saturation
self.exposure = exposure
def rand_scale(self, s):
scale = random.uniform(1, s)
if random.randint(0, 1):
return scale
return 1./scale
def random_distort_image(self, video_clip):
dhue = random.uniform(-self.hue, self.hue)
dsat = self.rand_scale(self.saturation)
dexp = self.rand_scale(self.exposure)
video_clip_ = []
for image in video_clip:
image = image.convert('HSV')
cs = list(image.split())
cs[1] = cs[1].point(lambda i: i * dsat)
cs[2] = cs[2].point(lambda i: i * dexp)
def change_hue(x):
x += dhue * 255
if x > 255:
x -= 255
if x < 0:
x += 255
return x
cs[0] = cs[0].point(change_hue)
image = Image.merge(image.mode, tuple(cs))
image = image.convert('RGB')
video_clip_.append(image)
return video_clip_
def random_crop(self, video_clip, width, height):
dw =int(width * self.jitter)
dh =int(height * self.jitter)
pleft = random.randint(-dw, dw)
pright = random.randint(-dw, dw)
ptop = random.randint(-dh, dh)
pbot = random.randint(-dh, dh)
swidth = width - pleft - pright
sheight = height - ptop - pbot
sx = float(swidth) / width
sy = float(sheight) / height
dx = (float(pleft) / width)/sx
dy = (float(ptop) / height)/sy
# random crop
cropped_clip = [img.crop((pleft, ptop, pleft + swidth - 1, ptop + sheight - 1)) for img in video_clip]
return cropped_clip, dx, dy, sx, sy
def apply_bbox(self, target, ow, oh, dx, dy, sx, sy):
sx, sy = 1./sx, 1./sy
# apply deltas on bbox
target[..., 0] = np.minimum(0.999, np.maximum(0, target[..., 0] / ow * sx - dx))
target[..., 1] = np.minimum(0.999, np.maximum(0, target[..., 1] / oh * sy - dy))
target[..., 2] = np.minimum(0.999, np.maximum(0, target[..., 2] / ow * sx - dx))
target[..., 3] = np.minimum(0.999, np.maximum(0, target[..., 3] / oh * sy - dy))
# refine target
refine_target = []
for i in range(target.shape[0]):
tgt = target[i]
bw = (tgt[2] - tgt[0]) * ow
bh = (tgt[3] - tgt[1]) * oh
if bw < 1. or bh < 1.:
continue
refine_target.append(tgt)
refine_target = np.array(refine_target).reshape(-1, target.shape[-1])
return refine_target
def to_tensor(self, video_clip):
return [F.to_tensor(image) * 255. for image in video_clip]
def __call__(self, video_clip, target):
# Initialize Random Variables
oh = video_clip[0].height
ow = video_clip[0].width
# random crop
video_clip, dx, dy, sx, sy = self.random_crop(video_clip, ow, oh)
# resize
video_clip = [img.resize([self.img_size, self.img_size]) for img in video_clip]
# random flip
flip = random.randint(0, 1)
if flip:
video_clip = [img.transpose(Image.FLIP_LEFT_RIGHT) for img in video_clip]
# distort
video_clip = self.random_distort_image(video_clip)
# process target
if target is not None:
target = self.apply_bbox(target, ow, oh, dx, dy, sx, sy)
if flip:
target[..., [0, 2]] = 1.0 - target[..., [2, 0]]
else:
target = np.array([])
# to tensor
video_clip = self.to_tensor(video_clip)
target = torch.as_tensor(target).float()
return video_clip, target
# Transform for Testing
class BaseTransform(object):
def __init__(self, img_size=224, ):
self.img_size = img_size
def to_tensor(self, video_clip):
return [F.to_tensor(image) * 255. for image in video_clip]
def __call__(self, video_clip, target=None, normalize=True):
oh = video_clip[0].height
ow = video_clip[0].width
# resize
video_clip = [img.resize([self.img_size, self.img_size]) for img in video_clip]
# normalize target
# if target is not None:
# if normalize:
# target[..., [0, 2]] /= ow
# target[..., [1, 3]] /= oh
# else:
# target = np.array([])
# to tensor
video_clip = self.to_tensor(video_clip)
#target = torch.as_tensor(target).float()
#return video_clip, target
return video_clip