|
|
|
|
|
|
|
|
import numpy as np |
|
|
import numbers |
|
|
import random |
|
|
|
|
|
import torch |
|
|
from torchvision.transforms import ( |
|
|
RandomCrop, |
|
|
RandomResizedCrop, |
|
|
) |
|
|
|
|
|
import functional_video as F |
|
|
|
|
|
|
|
|
__all__ = [ |
|
|
"RandomCropVideo", |
|
|
"RandomResizedCropVideo", |
|
|
"CenterCropVideo", |
|
|
"NormalizeVideo", |
|
|
"ToTensorVideo", |
|
|
"RandomHorizontalFlipVideo", |
|
|
] |
|
|
|
|
|
|
|
|
class RandomCropVideo(RandomCrop): |
|
|
def __init__(self, size): |
|
|
if isinstance(size, numbers.Number): |
|
|
self.size = (int(size), int(size)) |
|
|
else: |
|
|
self.size = size |
|
|
|
|
|
def __call__(self, clip): |
|
|
""" |
|
|
Args: |
|
|
clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) |
|
|
Returns: |
|
|
torch.tensor: randomly cropped/resized video clip. |
|
|
size is (C, T, OH, OW) |
|
|
""" |
|
|
i, j, h, w = self.get_params(clip, self.size) |
|
|
return F.crop(clip, i, j, h, w) |
|
|
|
|
|
def __repr__(self): |
|
|
return self.__class__.__name__ + '(size={0})'.format(self.size) |
|
|
|
|
|
|
|
|
class ResizeVideo: |
|
|
def __init__( |
|
|
self, |
|
|
size, |
|
|
interpolation_mode="bilinear" |
|
|
): |
|
|
self.size = size |
|
|
self.interpolation_mode = interpolation_mode |
|
|
|
|
|
def __call__(self, clip): |
|
|
return F.resize(clip, self.size, self.interpolation_mode) |
|
|
|
|
|
|
|
|
class RandomResizedCropVideo(RandomResizedCrop): |
|
|
def __init__( |
|
|
self, |
|
|
size, |
|
|
crop, |
|
|
interpolation_mode="bilinear", |
|
|
): |
|
|
if isinstance(size, tuple): |
|
|
assert len(size) == 2, "size should be tuple (height, width)" |
|
|
self.size = size |
|
|
else: |
|
|
self.size = (size, size) |
|
|
|
|
|
self.interpolation_mode = interpolation_mode |
|
|
self.crop = crop |
|
|
|
|
|
def __call__(self, clip): |
|
|
""" |
|
|
Args: |
|
|
clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) |
|
|
Returns: |
|
|
torch.tensor: randomly cropped/resized video clip. |
|
|
size is (C, T, H, W) |
|
|
""" |
|
|
clip = F.resize(clip, self.size, self.interpolation_mode) |
|
|
|
|
|
if clip.shape[2] - self.crop > 0: |
|
|
i = np.random.randint(clip.shape[2] - self.crop) |
|
|
else: |
|
|
i = 0 |
|
|
if clip.shape[3] - self.crop > 0: |
|
|
j = np.random.randint(clip.shape[3] - self.crop) |
|
|
else: |
|
|
j = 0 |
|
|
clip = clip[..., i:i+self.crop, j:j+self.crop] |
|
|
return clip |
|
|
|
|
|
def __repr__(self): |
|
|
return self.__class__.__name__ + \ |
|
|
'(size={0}, interpolation_mode={1}, scale={2}, ratio={3})'.format( |
|
|
self.size, self.interpolation_mode, self.scale, self.ratio |
|
|
) |
|
|
|
|
|
|
|
|
class CenterCropVideo(object): |
|
|
def __init__(self, crop_size): |
|
|
if isinstance(crop_size, numbers.Number): |
|
|
self.crop_size = (int(crop_size), int(crop_size)) |
|
|
else: |
|
|
self.crop_size = crop_size |
|
|
|
|
|
def __call__(self, clip): |
|
|
""" |
|
|
Args: |
|
|
clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) |
|
|
Returns: |
|
|
torch.tensor: central cropping of video clip. Size is |
|
|
(C, T, crop_size, crop_size) |
|
|
""" |
|
|
return F.center_crop(clip, self.crop_size) |
|
|
|
|
|
def __repr__(self): |
|
|
return self.__class__.__name__ + '(crop_size={0})'.format(self.crop_size) |
|
|
|
|
|
|
|
|
class NormalizeVideo(object): |
|
|
""" |
|
|
Normalize the video clip by mean subtraction and division by standard deviation |
|
|
Args: |
|
|
mean (3-tuple): pixel RGB mean |
|
|
std (3-tuple): pixel RGB standard deviation |
|
|
inplace (boolean): whether do in-place normalization |
|
|
""" |
|
|
|
|
|
def __init__(self, mean, std, inplace=False): |
|
|
self.mean = mean |
|
|
self.std = std |
|
|
self.inplace = inplace |
|
|
|
|
|
def __call__(self, clip): |
|
|
""" |
|
|
Args: |
|
|
clip (torch.tensor): video clip to be normalized. Size is (C, T, H, W) |
|
|
""" |
|
|
return F.normalize(clip, self.mean, self.std, self.inplace) |
|
|
|
|
|
def __repr__(self): |
|
|
return self.__class__.__name__ + '(mean={0}, std={1}, inplace={2})'.format( |
|
|
self.mean, self.std, self.inplace) |
|
|
|
|
|
|
|
|
class ToTensorVideo(object): |
|
|
""" |
|
|
Convert tensor data type from uint8 to float, divide value by 255.0 and |
|
|
permute the dimenions of clip tensor |
|
|
""" |
|
|
|
|
|
def __init__(self): |
|
|
pass |
|
|
|
|
|
def __call__(self, clip): |
|
|
""" |
|
|
Args: |
|
|
clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C) |
|
|
Return: |
|
|
clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W) |
|
|
""" |
|
|
return F.to_tensor(clip) |
|
|
|
|
|
def __repr__(self): |
|
|
return self.__class__.__name__ |
|
|
|
|
|
|
|
|
class RandomHorizontalFlipVideo(object): |
|
|
""" |
|
|
Flip the video clip along the horizonal direction with a given probability |
|
|
Args: |
|
|
p (float): probability of the clip being flipped. Default value is 0.5 |
|
|
""" |
|
|
|
|
|
def __init__(self, p=0.5): |
|
|
self.p = p |
|
|
|
|
|
def __call__(self, clip): |
|
|
""" |
|
|
Args: |
|
|
clip (torch.tensor): Size is (C, T, H, W) |
|
|
Return: |
|
|
clip (torch.tensor): Size is (C, T, H, W) |
|
|
""" |
|
|
if random.random() < self.p: |
|
|
clip = F.hflip(clip) |
|
|
return clip |
|
|
|
|
|
def __repr__(self): |
|
|
return self.__class__.__name__ + "(p={0})".format(self.p) |