| """Checks cut files.""" |
| import os |
| import sys |
| from glob import glob |
| from tqdm import tqdm |
| from joblib import Parallel, delayed |
|
|
| import decord |
| import numpy as np |
| import pandas as pd |
|
|
|
|
| if __name__ == "__main__": |
| video_dir = "/work/piyush/from_nfs2/datasets/EPIC-Kitchens-100/cut_clips" |
| files = glob(os.path.join(video_dir, "*/*/*.MP4")) |
| print("Total files:", len(files)) |
|
|
| parallel = True |
|
|
| if not parallel: |
| failed = [] |
| iterator = tqdm(files, desc="Checking files") |
| for f in iterator: |
| try: |
| vr = decord.VideoReader(f, ctx=decord.cpu(), num_threads=1) |
| random_frame = np.random.randint(0, len(vr)) |
| random_frame = vr.get_batch([random_frame]).asnumpy() |
| except Exception as e: |
| failed.append(f) |
| import ipdb; ipdb.set_trace() |
| else: |
| def check_file(f): |
| try: |
| vr = decord.VideoReader(f, ctx=decord.cpu(), num_threads=1) |
| random_frame = np.random.randint(0, len(vr)) |
| random_frame = len(vr) - 1 |
| random_frame = vr.get_batch([random_frame]).asnumpy() |
| return None |
| except Exception as e: |
| return f |
|
|
| status = Parallel(n_jobs=24)( |
| delayed(check_file)(f) for f in tqdm(files, desc="Checking files") |
| ) |
| failed = [f for f in status if f is not None] |
| print("Number of files on which loading failed:", len(failed)) |
| import ipdb; ipdb.set_trace() |
|
|
| for f in failed: os.remove(f) |
| |