File size: 1,580 Bytes
7daf628 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | """Checks cut files."""
import os
import sys
from glob import glob
from tqdm import tqdm
from joblib import Parallel, delayed
import decord
import numpy as np
import pandas as pd
if __name__ == "__main__":
video_dir = "/work/piyush/from_nfs2/datasets/EPIC-Kitchens-100/cut_clips"
files = glob(os.path.join(video_dir, "*/*/*.MP4"))
print("Total files:", len(files))
parallel = True
if not parallel:
failed = []
iterator = tqdm(files, desc="Checking files")
for f in iterator:
try:
vr = decord.VideoReader(f, ctx=decord.cpu(), num_threads=1)
random_frame = np.random.randint(0, len(vr))
random_frame = vr.get_batch([random_frame]).asnumpy()
except Exception as e:
failed.append(f)
import ipdb; ipdb.set_trace()
else:
def check_file(f):
try:
vr = decord.VideoReader(f, ctx=decord.cpu(), num_threads=1)
random_frame = np.random.randint(0, len(vr))
random_frame = len(vr) - 1
random_frame = vr.get_batch([random_frame]).asnumpy()
return None
except Exception as e:
return f
status = Parallel(n_jobs=24)(
delayed(check_file)(f) for f in tqdm(files, desc="Checking files")
)
failed = [f for f in status if f is not None]
print("Number of files on which loading failed:", len(failed))
import ipdb; ipdb.set_trace()
for f in failed: os.remove(f)
|