MTFL_UCF_Crime / utils /transforms_video.py
erkutt's picture
Upload open source code of MTFL model
28e129b verified
#!/usr/bin/env python3
import numpy as np
import numbers
import random
import torch
from torchvision.transforms import (
RandomCrop,
RandomResizedCrop,
)
import functional_video as F
__all__ = [
"RandomCropVideo",
"RandomResizedCropVideo",
"CenterCropVideo",
"NormalizeVideo",
"ToTensorVideo",
"RandomHorizontalFlipVideo",
]
class RandomCropVideo(RandomCrop):
def __init__(self, size):
if isinstance(size, numbers.Number):
self.size = (int(size), int(size))
else:
self.size = size
def __call__(self, clip):
"""
Args:
clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
Returns:
torch.tensor: randomly cropped/resized video clip.
size is (C, T, OH, OW)
"""
i, j, h, w = self.get_params(clip, self.size)
return F.crop(clip, i, j, h, w)
def __repr__(self):
return self.__class__.__name__ + '(size={0})'.format(self.size)
class ResizeVideo:
def __init__(
self,
size,
interpolation_mode="bilinear"
):
self.size = size
self.interpolation_mode = interpolation_mode
def __call__(self, clip):
return F.resize(clip, self.size, self.interpolation_mode)
class RandomResizedCropVideo(RandomResizedCrop):
def __init__(
self,
size,
crop,
interpolation_mode="bilinear",
):
if isinstance(size, tuple):
assert len(size) == 2, "size should be tuple (height, width)"
self.size = size
else:
self.size = (size, size)
self.interpolation_mode = interpolation_mode
self.crop = crop
def __call__(self, clip):
"""
Args:
clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
Returns:
torch.tensor: randomly cropped/resized video clip.
size is (C, T, H, W)
"""
clip = F.resize(clip, self.size, self.interpolation_mode)
# print(clip.shape)
if clip.shape[2] - self.crop > 0:
i = np.random.randint(clip.shape[2] - self.crop)
else:
i = 0
if clip.shape[3] - self.crop > 0:
j = np.random.randint(clip.shape[3] - self.crop)
else:
j = 0
clip = clip[..., i:i+self.crop, j:j+self.crop]
return clip
def __repr__(self):
return self.__class__.__name__ + \
'(size={0}, interpolation_mode={1}, scale={2}, ratio={3})'.format(
self.size, self.interpolation_mode, self.scale, self.ratio
)
class CenterCropVideo(object):
def __init__(self, crop_size):
if isinstance(crop_size, numbers.Number):
self.crop_size = (int(crop_size), int(crop_size))
else:
self.crop_size = crop_size
def __call__(self, clip):
"""
Args:
clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
Returns:
torch.tensor: central cropping of video clip. Size is
(C, T, crop_size, crop_size)
"""
return F.center_crop(clip, self.crop_size)
def __repr__(self):
return self.__class__.__name__ + '(crop_size={0})'.format(self.crop_size)
class NormalizeVideo(object):
"""
Normalize the video clip by mean subtraction and division by standard deviation
Args:
mean (3-tuple): pixel RGB mean
std (3-tuple): pixel RGB standard deviation
inplace (boolean): whether do in-place normalization
"""
def __init__(self, mean, std, inplace=False):
self.mean = mean
self.std = std
self.inplace = inplace
def __call__(self, clip):
"""
Args:
clip (torch.tensor): video clip to be normalized. Size is (C, T, H, W)
"""
return F.normalize(clip, self.mean, self.std, self.inplace)
def __repr__(self):
return self.__class__.__name__ + '(mean={0}, std={1}, inplace={2})'.format(
self.mean, self.std, self.inplace)
class ToTensorVideo(object):
"""
Convert tensor data type from uint8 to float, divide value by 255.0 and
permute the dimenions of clip tensor
"""
def __init__(self):
pass
def __call__(self, clip):
"""
Args:
clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
Return:
clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W)
"""
return F.to_tensor(clip)
def __repr__(self):
return self.__class__.__name__
class RandomHorizontalFlipVideo(object):
"""
Flip the video clip along the horizonal direction with a given probability
Args:
p (float): probability of the clip being flipped. Default value is 0.5
"""
def __init__(self, p=0.5):
self.p = p
def __call__(self, clip):
"""
Args:
clip (torch.tensor): Size is (C, T, H, W)
Return:
clip (torch.tensor): Size is (C, T, H, W)
"""
if random.random() < self.p:
clip = F.hflip(clip)
return clip
def __repr__(self):
return self.__class__.__name__ + "(p={0})".format(self.p)