Spaces:
Running
Running
| import tensorflow as tf | |
| import numpy as np | |
| from einops import rearrange | |
| from decord import VideoReader | |
| num_frames = 16 | |
| input_size = 224 | |
| patch_size = (16, 16) | |
| IMAGENET_MEAN = np.array([0.485, 0.456, 0.406]) | |
| IMAGENET_STD = np.array([0.229, 0.224, 0.225]) | |
| def format_frames(frame, output_size): | |
| frame = tf.image.convert_image_dtype(frame, tf.uint8) | |
| frame = tf.image.resize(frame, size=output_size) | |
| frame = frame / 255. | |
| frame = frame - IMAGENET_MEAN | |
| frame = frame / IMAGENET_STD | |
| return frame | |
| def read_video(file_path): | |
| container = VideoReader(file_path) | |
| return container | |
| def frame_sampling(container, num_frames): | |
| interval = len(container) // num_frames | |
| bids = np.arange(num_frames) * interval | |
| offset = np.random.randint(interval, size=bids.shape) | |
| frame_index = bids + offset | |
| frames = container.get_batch(frame_index).asnumpy() | |
| frames = np.stack(frames) | |
| frames = format_frames(frames, [input_size] * 2) | |
| return frames | |
| def denormalize(z): | |
| mean = np.array([0.485, 0.456, 0.406]) | |
| std = np.array([0.225, 0.225, 0.225]) | |
| x = (z * std) + mean | |
| x = x * 255 | |
| return x.clip(0, 255) |