Spaces:

innat
/

VideoSwin

Running

innat commited on Oct 14, 2023

Commit

c3c0446

1 Parent(s): 5779623

Create utils.py

Files changed (1) hide show

utils.py ADDED Viewed

+import tensorflow as tf
+import numpy as np
+from einops import rearrange
+from decord import VideoReader
+num_frames = 32
+input_size = 224
+patch_size = (16, 16)
+IMAGENET_MEAN = np.array([123.675, 116.28, 103.53])
+IMAGENET_STD = np.array([58.395, 57.12, 57.375])
+def format_frames(frame, output_size):
+    frame = tf.image.convert_image_dtype(frame, tf.uint8)
+    frame = tf.image.resize(frame, size=output_size)
+    frame = frame - IMAGENET_MEAN
+    frame = frame / IMAGENET_STD
+    return frame
+def read_video(file_path):
+    container = VideoReader(file_path)
+    return container
+def frame_sampling(container, num_frames):
+    interval = len(container) // num_frames
+    bids = np.arange(num_frames) * interval
+    offset = np.random.randint(interval, size=bids.shape)
+    frame_index = bids + offset
+    frames = container.get_batch(frame_index).asnumpy()
+    frames = np.stack(frames)
+    frames = format_frames(frames, [input_size] * 2)
+    return frames
+def denormalize(z):
+    mean = np.array([123.675, 116.28, 103.53])
+    variance = np.array([np.square(58.395), np.square(57.12), np.square(57.375)])
+    std = np.sqrt(variance) # no need var and std, todo: update here!
+    x = (z * std) + mean
+    x = x.clip(0, 255)
+    return x