Safetensors
English
llava
video-retrieval
text-to-video-search
multimodal-embedding
File size: 4,073 Bytes
7daf628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import sys
from glob import glob 
from collections import defaultdict

import json
from hydra import compose, initialize
from omegaconf import OmegaConf
import PIL, PIL.Image
import decord

from adapt4change.utils.speednet import *
import shared.utils as su

classes_selected = [
    "skipping rope",
    "gymnastics tumbling",
    "somersaulting",
    "cartwheeling",
    "trampolines bouncing",
    "swinging on something",
    "vault",
    "deadlifting",
    "clean and jerk",
    "diving cliff",
]

SAVE_DIR = "/scratch/shared/beegfs/piyush/datasets/SpeedyKinetics/clips"
os.makedirs(SAVE_DIR, exist_ok=True)

def check_all_files_exist(save_paths):
    for path in save_paths:
        if not os.path.exists(path):
            return False
    return True

def save_clips_for_single_video(video_path, show=False):
    """
    Note that I randomly sample 3s clips out of the original video.
    """
    video_id = os.path.basename(video_path).split(".mp4")[0]
    save_paths = [
        f"{SAVE_DIR}/{video_id}-normal.mp4",
        f"{SAVE_DIR}/{video_id}-spedup.mp4",
        f"{SAVE_DIR}/{video_id}-slowdn.mp4",
    ]
    if check_all_files_exist(save_paths):
        return

    try:
        vr = decord.VideoReader(video_path)
    except Exception as e:
        print(f"Error opening video {video_path}: {e}")
        return

    total_frames = len(vr)
    fps = vr.get_avg_fps()

    # Initialize sampler for a video 
    sampler = FrameIndexSampler(total_frames=total_frames)
    
    # Sample clips 
    clip_duration = 3.
    T = int(clip_duration * fps)
    start_frame = sampler.get_valid_start_frame(T)

    # Get all clip indices
    normal_indices, sped_up_indices, slowed_down_indices = sampler.sample_all_clip_indices(start_frame, T)

    try:
        frames_normal = [PIL.Image.fromarray(f) for f in vr.get_batch(normal_indices).asnumpy()]
        frames_spedup = [PIL.Image.fromarray(f) for f in vr.get_batch(sped_up_indices).asnumpy()]
        frames_slowdn = [PIL.Image.fromarray(f) for f in vr.get_batch(slowed_down_indices).asnumpy()]
    except Exception as e:
        print(f"Error processing video {video_path}: {e}")
        return

    su.io.save_video(frames_normal, save_paths[0], fps=vr.get_avg_fps())
    su.io.save_video(frames_spedup, save_paths[1], fps=vr.get_avg_fps())
    su.io.save_video(frames_slowdn, save_paths[2], fps=vr.get_avg_fps())
    if show:
        su.visualize.show_grid_of_videos(
            files=save_paths,
            labels=["Normal", "Sped up", "Slowed down"],
        )


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--start_index", type=int, default=0)
    parser.add_argument("--end_index", type=int, default=1000000)
    args = parser.parse_args()

    data_dir = "/datasets/KineticsClean/"
    verbose = True
    total_train = []
    total_valid = []
    for c in classes_selected:
        files_train = glob(f"{data_dir}/train_split/{c}/*.mp4")
        files_valid = glob(f"{data_dir}/val_split/{c}/*.mp4")
        if verbose:
            print(c)
            print("Train videos: ", len(files_train))
            print("Valid videos: ", len(files_valid))
            print("-" * 80)
        total_train.extend(files_train)
        total_valid.extend(files_valid)
    print("Total train files: ", len(total_train))
    print("Total valid files: ", len(total_valid))
    
    files = total_train + total_valid
    print(f"Total files: {len(files)}")
    print(f"Start index: {args.start_index}")
    print(f"End index: {args.end_index}")
    files = files[args.start_index:args.end_index]
    print(f"Total files to process: {len(files)}")
    
    parallelize = True
    if not parallelize:
        for file in su.log.tqdm_iterator(files, desc="Processing files"):
            save_clips_for_single_video(file)
    else:
        from joblib import Parallel, delayed
        iterator = su.log.tqdm_iterator(files, desc="Processing files")
        Parallel(n_jobs=16)(delayed(save_clips_for_single_video)(file) for file in iterator)