LoCoNet_ASD / videoloaders /transforms_video.py

xiziwang

push files

2e36228 over 2 years ago

10.5 kB

	#copied from https://raw.githubusercontent.com/pytorch/vision/f0d3daa7f65bcde560e242d9bccc284721368f02/torchvision/transforms/functional_video.py
	#copied from https://raw.githubusercontent.com/pytorch/vision/f0d3daa7f65bcde560e242d9bccc284721368f02/torchvision/transforms/transforms_video.py
	#!/usr/bin/env python3

	import numbers
	import random
	import torch

	try:
	import accimage
	except:
	pass

	from torchvision.transforms import (
	RandomResizedCrop,
	)

	from . import functional_video as F

	def _get_image_size(img):
	if isinstance(img, torch.Tensor) and img.dim() > 2:
	return img.shape[-2:][::-1]
	else:
	raise TypeError("Unexpected type {}".format(type(img)))

	class RandomCrop(object):
	"""Crop the given PIL Image at a random location.
	Args:
	size (sequence or int): Desired output size of the crop. If size is an
	int instead of sequence like (h, w), a square crop (size, size) is
	made.
	padding (int or sequence, optional): Optional padding on each border
	of the image. Default is None, i.e no padding. If a sequence of length
	4 is provided, it is used to pad left, top, right, bottom borders
	respectively. If a sequence of length 2 is provided, it is used to
	pad left/right, top/bottom borders, respectively.
	pad_if_needed (boolean): It will pad the image if smaller than the
	desired size to avoid raising an exception. Since cropping is done
	after padding, the padding seems to be done at a random offset.
	fill: Pixel fill value for constant fill. Default is 0. If a tuple of
	length 3, it is used to fill R, G, B channels respectively.
	This value is only used when the padding_mode is constant
	padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
	- constant: pads with a constant value, this value is specified with fill
	- edge: pads with the last value on the edge of the image
	- reflect: pads with reflection of image (without repeating the last value on the edge)
	padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
	will result in [3, 2, 1, 2, 3, 4, 3, 2]
	- symmetric: pads with reflection of image (repeating the last value on the edge)
	padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
	will result in [2, 1, 1, 2, 3, 4, 4, 3]
	"""

	def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode='constant'):
	if isinstance(size, numbers.Number):
	self.size = (int(size), int(size))
	else:
	self.size = size
	self.padding = padding
	self.pad_if_needed = pad_if_needed
	self.fill = fill
	self.padding_mode = padding_mode

	@staticmethod
	def get_params(img, output_size):
	"""Get parameters for ``crop`` for a random crop.
	Args:
	img (PIL Image): Image to be cropped.
	output_size (tuple): Expected output size of the crop.
	Returns:
	tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
	"""
	w, h = _get_image_size(img)
	th, tw = output_size
	if w == tw and h == th:
	return 0, 0, h, w

	i = random.randint(0, h - th)
	j = random.randint(0, w - tw)
	return i, j, th, tw

	def __call__(self, img):
	"""
	Args:
	img (PIL Image): Image to be cropped.
	Returns:
	PIL Image: Cropped image.
	"""
	if self.padding is not None:
	img = F.pad(img, self.padding, self.fill, self.padding_mode)

	# pad the width if needed
	if self.pad_if_needed and img.size[0] < self.size[1]:
	img = F.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode)
	# pad the height if needed
	if self.pad_if_needed and img.size[1] < self.size[0]:
	img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode)

	i, j, h, w = self.get_params(img, self.size)

	return F.crop(img, i, j, h, w)

	def __repr__(self):
	return self.__class__.__name__ + '(size={0}, padding={1})'.format(self.size, self.padding)





	class RandomCropVideo(RandomCrop):
	def __init__(self, size):
	if isinstance(size, numbers.Number):
	self.size = (int(size), int(size))
	else:
	self.size = size

	def __call__(self, clip):
	"""
	Args:
	clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
	Returns:
	torch.tensor: randomly cropped/resized video clip.
	size is (C, T, OH, OW)
	"""
	i, j, h, w = self.get_params(clip, self.size)
	return F.crop(clip, i, j, h, w)

	def __repr__(self):
	return self.__class__.__name__ + '(size={0})'.format(self.size)


	class RandomResizedCropVideo(RandomResizedCrop):
	def __init__(
	self,
	size,
	scale=(0.08, 1.0),
	ratio=(3.0 / 4.0, 4.0 / 3.0),
	interpolation_mode="bilinear",
	):
	if isinstance(size, tuple):
	assert len(size) == 2, "size should be tuple (height, width)"
	self.size = size
	else:
	self.size = (size, size)

	self.interpolation_mode = interpolation_mode
	self.scale = scale
	self.ratio = ratio

	def __call__(self, clip):
	"""
	Args:
	clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
	Returns:
	torch.tensor: randomly cropped/resized video clip.
	size is (C, T, H, W)
	"""
	i, j, h, w = self.get_params(clip, self.scale, self.ratio)
	return F.resized_crop(clip, i, j, h, w, self.size, self.interpolation_mode)

	def __repr__(self):
	return self.__class__.__name__ + \
	'(size={0}, interpolation_mode={1}, scale={2}, ratio={3})'.format(
	self.size, self.interpolation_mode, self.scale, self.ratio
	)


	class CenterCropVideo(object):
	def __init__(self, crop_size):
	if isinstance(crop_size, numbers.Number):
	self.crop_size = (int(crop_size), int(crop_size))
	else:
	self.crop_size = crop_size


	def __call__(self, clip):
	"""
	Args:
	clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
	Returns:
	torch.tensor: central cropping of video clip. Size is
	(C, T, crop_size, crop_size)
	"""

	return F.center_crop(clip, self.crop_size)

	def __repr__(self):
	return self.__class__.__name__ + '(crop_size={0})'.format(self.crop_size)

	class CornerCropVideo(object):
	def __init__(self, crop_size, loc="tr"):
	if isinstance(crop_size, numbers.Number):
	self.crop_size = (int(crop_size), int(crop_size))
	else:
	self.crop_size = crop_size

	def __call__(self, clip, loc="tr"):
	"""
	Args:
	clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
	Returns:
	torch.tensor: central cropping of video clip. Size is
	(C, T, crop_size, crop_size)
	"""
	if loc == "tr":
	i = 0
	j = 0
	elif loc == "center":
	return F.corner_crop(clip, self.crop_size)
	else:
	i = clip.size(-2) - self.crop_size
	j = clip.size(-1) - self.crop_size
	return F.corner_crop(clip, self.crop_size, i, j)

	def __repr__(self):
	return self.__class__.__name__ + '(crop_size={0})'.format(self.crop_size)


	class NormalizeVideo(object):
	"""
	Normalize the video clip by mean subtraction and division by standard deviation
	Args:
	mean (3-tuple): pixel RGB mean
	std (3-tuple): pixel RGB standard deviation
	inplace (boolean): whether do in-place normalization
	"""

	def __init__(self, mean, std, inplace=False):
	self.mean = mean
	self.std = std
	self.inplace = inplace

	def __call__(self, clip):
	"""
	Args:
	clip (torch.tensor): video clip to be normalized. Size is (C, T, H, W)
	"""
	return F.normalize(clip, self.mean, self.std, self.inplace)

	def __repr__(self):
	return self.__class__.__name__ + '(mean={0}, std={1}, inplace={2})'.format(
	self.mean, self.std, self.inplace)


	class ToTensorVideo(object):
	"""
	Convert tensor data type from uint8 to float, divide value by 255.0 and
	permute the dimenions of clip tensor
	"""

	def __init__(self):
	pass

	def __call__(self, clip):
	"""
	Args:
	clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
	Return:
	clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W)
	"""
	return F.to_tensor(clip)

	def __repr__(self):
	return self.__class__.__name__


	class RandomHorizontalFlipVideo(object):
	"""
	Flip the video clip along the horizonal direction with a given probability
	Args:
	p (float): probability of the clip being flipped. Default value is 0.5
	"""

	def __init__(self, p=0.5):
	self.p = p

	def __call__(self, clip):
	"""
	Args:
	clip (torch.tensor): Size is (C, T, H, W)
	Return:
	clip (torch.tensor): Size is (C, T, H, W)
	"""
	if random.random() < self.p:
	clip = F.hflip(clip)
	return clip

	def __repr__(self):
	return self.__class__.__name__ + "(p={0})".format(self.p)



	class ResizeVideo(object):
	"""
	Resize the video clip
	"""
	def __init__(self, w,h):
	self.w = w
	self.h = h
	def __call__(self, clip):
	"""
	Args:
	clip (torch.tensor): Size is (C, T, H, W)
	Return:
	clip (torch.tensor): Size is (C, T, h, w)
	"""
	#interpolare needs (T,C, H, W) order while clip is (C, T, H, W)
	return torch.nn.functional.interpolate(
	clip.permute(1,0,2,3),(self.h,self.w),mode="bilinear",align_corners=False).permute(1,0,2,3)

	def __repr__(self):
	return self.__class__.__name__ + "(w=%d,h=%d)"%(self.w,self.h)