Safetensors
English
llava
video-retrieval
text-to-video-search
multimodal-embedding
File size: 1,580 Bytes
7daf628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""Checks cut files."""
import os
import sys
from glob import glob
from tqdm import tqdm
from joblib import Parallel, delayed

import decord
import numpy as np
import pandas as pd


if __name__ == "__main__":
    video_dir = "/work/piyush/from_nfs2/datasets/EPIC-Kitchens-100/cut_clips"
    files = glob(os.path.join(video_dir, "*/*/*.MP4"))
    print("Total files:", len(files))

    parallel = True

    if not parallel:
        failed = []
        iterator = tqdm(files, desc="Checking files")
        for f in iterator:
            try:
                vr = decord.VideoReader(f, ctx=decord.cpu(), num_threads=1)
                random_frame = np.random.randint(0, len(vr))
                random_frame = vr.get_batch([random_frame]).asnumpy()
            except Exception as e:
                failed.append(f)
        import ipdb; ipdb.set_trace()
    else:
        def check_file(f):
            try:
                vr = decord.VideoReader(f, ctx=decord.cpu(), num_threads=1)
                random_frame = np.random.randint(0, len(vr))
                random_frame = len(vr) - 1
                random_frame = vr.get_batch([random_frame]).asnumpy()
                return None
            except Exception as e:
                return f

        status = Parallel(n_jobs=24)(
            delayed(check_file)(f) for f in tqdm(files, desc="Checking files")
        )
        failed = [f for f in status if f is not None]
        print("Number of files on which loading failed:", len(failed))
        import ipdb; ipdb.set_trace()

        for f in failed: os.remove(f)