| | |
| | |
| | import torch |
| |
|
| |
|
| | def _is_tensor_video_clip(clip): |
| | if not torch.is_tensor(clip): |
| | raise TypeError("clip should be Tesnor. Got %s" % type(clip)) |
| |
|
| | if not clip.ndimension() == 4: |
| | raise ValueError("clip should be 4D. Got %dD" % clip.dim()) |
| |
|
| | return True |
| |
|
| |
|
| | def crop(clip, i, j, h, w): |
| | """ |
| | Args: |
| | clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) |
| | """ |
| | assert len(clip.size()) == 4, "clip should be a 4D tensor" |
| | return clip[..., i:i + h, j:j + w] |
| |
|
| |
|
| | def resize(clip, target_size, interpolation_mode): |
| | assert len(target_size) == 2, "target size should be tuple (height, width)" |
| | return torch.nn.functional.interpolate( |
| | clip, size=target_size, mode=interpolation_mode |
| | ) |
| |
|
| |
|
| | def resized_crop(clip, i, j, h, w, size, interpolation_mode="bilinear"): |
| | """ |
| | Do spatial cropping and resizing to the video clip |
| | Args: |
| | clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) |
| | i (int): i in (i,j) i.e coordinates of the upper left corner. |
| | j (int): j in (i,j) i.e coordinates of the upper left corner. |
| | h (int): Height of the cropped region. |
| | w (int): Width of the cropped region. |
| | size (tuple(int, int)): height and width of resized clip |
| | Returns: |
| | clip (torch.tensor): Resized and cropped clip. Size is (C, T, H, W) |
| | """ |
| | assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor" |
| | clip = crop(clip, i, j, h, w) |
| | clip = resize(clip, size, interpolation_mode) |
| | return clip |
| |
|
| |
|
| | def center_crop(clip, crop_size): |
| | assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor" |
| | h, w = clip.size(-2), clip.size(-1) |
| | th, tw = crop_size |
| | assert h >= th and w >= tw, "height and width must be no smaller than crop_size" |
| |
|
| | i = int(round((h - th) / 2.0)) |
| | j = int(round((w - tw) / 2.0)) |
| | return crop(clip, i, j, th, tw) |
| |
|
| | def corner_crop(clip, crop_size, i, j): |
| | assert _is_tensor_video_clip(clip),"clip should be a 4d torch tensor" |
| | h, w = clip.size(-2), clip.size(-1) |
| | th, tw = crop_size |
| | assert h>=th and w>=tw, "height and width must be no smaller than crop_size" |
| | return crop(clip, i, j, th, tw) |
| |
|
| |
|
| | def to_tensor(clip): |
| | """ |
| | Convert tensor data type from uint8 to float, divide value by 255.0 and |
| | permute the dimenions of clip tensor |
| | Args: |
| | clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C) |
| | Return: |
| | """ |
| | _is_tensor_video_clip(clip) |
| | if not clip.dtype == torch.uint8: |
| | raise TypeError("clip tensor should have data type uint8. Got %s" % str(clip.dtype)) |
| | return clip.float().permute(3, 0, 1, 2) / 255.0 |
| |
|
| |
|
| | def normalize(clip, mean, std, inplace=False): |
| | """ |
| | Args: |
| | clip (torch.tensor): Video clip to be normalized. Size is (C, T, H, W) |
| | mean (tuple): pixel RGB mean. Size is (3) |
| | std (tuple): pixel standard deviation. Size is (3) |
| | Returns: |
| | normalized clip (torch.tensor): Size is (C, T, H, W) |
| | """ |
| | assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor" |
| | if not inplace: |
| | clip = clip.clone() |
| | mean = torch.as_tensor(mean, dtype=clip.dtype, device=clip.device) |
| | std = torch.as_tensor(std, dtype=clip.dtype, device=clip.device) |
| | if clip.size(0) == 3: |
| | clip.sub_(mean[:, None, None, None]).div_(std[:, None, None, None]) |
| | elif clip.size(0) == 1: |
| | |
| | mean = mean.mean() |
| | std = std.mean() |
| | clip.sub_(mean).div_(std) |
| | else: |
| | raise NotImplementedError() |
| | return clip |
| |
|
| |
|
| | def hflip(clip): |
| | """ |
| | Args: |
| | clip (torch.tensor): Video clip to be normalized. Size is (C, T, H, W) |
| | Returns: |
| | flipped clip (torch.tensor): Size is (C, T, H, W) |
| | """ |
| | assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor" |
| | return clip.flip((-1)) |
| |
|