Create utils.py
Browse files
utils.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import tensorflow as tf
|
| 2 |
+
import numpy as np
|
| 3 |
+
from einops import rearrange
|
| 4 |
+
from decord import VideoReader
|
| 5 |
+
|
| 6 |
+
num_frames = 32
|
| 7 |
+
input_size = 224
|
| 8 |
+
patch_size = (16, 16)
|
| 9 |
+
IMAGENET_MEAN = np.array([123.675, 116.28, 103.53])
|
| 10 |
+
IMAGENET_STD = np.array([58.395, 57.12, 57.375])
|
| 11 |
+
|
| 12 |
+
def format_frames(frame, output_size):
|
| 13 |
+
frame = tf.image.convert_image_dtype(frame, tf.uint8)
|
| 14 |
+
frame = tf.image.resize(frame, size=output_size)
|
| 15 |
+
frame = frame - IMAGENET_MEAN
|
| 16 |
+
frame = frame / IMAGENET_STD
|
| 17 |
+
return frame
|
| 18 |
+
|
| 19 |
+
def read_video(file_path):
|
| 20 |
+
container = VideoReader(file_path)
|
| 21 |
+
return container
|
| 22 |
+
|
| 23 |
+
def frame_sampling(container, num_frames):
|
| 24 |
+
interval = len(container) // num_frames
|
| 25 |
+
bids = np.arange(num_frames) * interval
|
| 26 |
+
offset = np.random.randint(interval, size=bids.shape)
|
| 27 |
+
frame_index = bids + offset
|
| 28 |
+
frames = container.get_batch(frame_index).asnumpy()
|
| 29 |
+
frames = np.stack(frames)
|
| 30 |
+
frames = format_frames(frames, [input_size] * 2)
|
| 31 |
+
return frames
|
| 32 |
+
|
| 33 |
+
def denormalize(z):
|
| 34 |
+
mean = np.array([123.675, 116.28, 103.53])
|
| 35 |
+
variance = np.array([np.square(58.395), np.square(57.12), np.square(57.375)])
|
| 36 |
+
std = np.sqrt(variance) # no need var and std, todo: update here!
|
| 37 |
+
x = (z * std) + mean
|
| 38 |
+
x = x.clip(0, 255)
|
| 39 |
+
return x
|