| """ |
| Faster clip cutting script generated by Claude. |
| |
| S=/datasets/EpicKitchens-100/ |
| D=/work/piyush/from_nfs2/datasets/EPIC-Kitchens-100/cut_clips |
| csv=$D/../epic-kitchens-100-annotations/EPIC_100_train_with_id.csv |
| python shared/scripts/cut_clips_fast.py --csv $csv --video_id_key path_id --start_time_key start_sec --end_time_key stop_sec --video_dir $S/ --cut_dir $D/ --ext MP4 --max_workers 4 |
| |
| """ |
| import os |
| from os.path import join, exists |
| import time |
| from concurrent.futures import ThreadPoolExecutor, as_completed |
|
|
| import numpy as np |
| import pandas as pd |
| from tqdm import tqdm |
| from moviepy.editor import VideoFileClip |
| from moviepy.video.fx.resize import resize |
|
|
| def time_float_to_str(time_in_seconds): |
| import datetime |
| hours, remainder = divmod(time_in_seconds, 3600) |
| minutes, seconds_with_ms = divmod(remainder, 60) |
| seconds, milliseconds = divmod(int(seconds_with_ms * 1000), 1000) |
| time_delta = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds, milliseconds=milliseconds) |
| return str(time_delta) |
|
|
| def process_video(row, args): |
| """Process a single video clip""" |
| try: |
| f = row["video_path"] |
| v, s, e = row[args.video_id_key], float(row[args.start_time_key]), float(row[args.end_time_key]) |
| |
| if args.no_round_times: |
| clip_filename = f"{v}_{s}_{e}.{args.ext}" |
| else: |
| clip_filename = f"{v}_{np.round(s, 1)}_{np.round(e, 1)}.{args.ext}" |
| |
| clip_filepath = join(args.cut_dir, clip_filename) |
| os.makedirs(os.path.dirname(clip_filepath), exist_ok=True) |
|
|
| if os.path.exists(clip_filepath) and not args.overwrite: |
| return None |
|
|
| |
| with VideoFileClip(f) as video: |
| |
| aspect_ratio = video.w / video.h |
| target_height = 480 |
| target_width = int(target_height * aspect_ratio) |
| |
| |
| clip = video.subclip(s, e) |
| clip = clip.resize(width=target_width, height=target_height) |
| |
| |
| clip.write_videofile( |
| clip_filepath, |
| codec='libx264', |
| audio_codec='aac', |
| preset='faster', |
| threads=2, |
| logger=None if not args.verbose else None |
| ) |
| |
| return clip_filepath |
| except Exception as e: |
| if args.verbose: |
| print(f"Error processing {row[args.video_id_key]}: {str(e)}") |
| return None |
|
|
| if __name__ == "__main__": |
| import argparse |
| parser = argparse.ArgumentParser() |
| parser.add_argument( |
| "--csv", type=str, required=True, |
| help="Path to CSV file containing video IDs and timestamps", |
| ) |
| parser.add_argument( |
| "--video_id_key", type=str, default="video_id", |
| ) |
| parser.add_argument( |
| "--start_time_key", type=str, default="start_time", |
| ) |
| parser.add_argument( |
| "--end_time_key", type=str, default="end_time", |
| ) |
| parser.add_argument( |
| "--video_dir", type=str, required=True, |
| help="Path to directory containing downloaded videos", |
| ) |
| parser.add_argument( |
| "--cut_dir", type=str, required=True, |
| help="Path to directory where cut videos will be saved", |
| ) |
| parser.add_argument( |
| "--overwrite", action="store_true", |
| help="Whether to overwrite existing cut videos", |
| ) |
| parser.add_argument( |
| "--verbose", action="store_true", |
| ) |
| parser.add_argument( |
| "--no_round_times", action="store_true", |
| help="Whether to round start and end times to nearest second in filenames", |
| ) |
| parser.add_argument( |
| "--debug", action="store_true", |
| ) |
| parser.add_argument( |
| "--ext", type=str, default="mp4", |
| ) |
| parser.add_argument( |
| "--si", type=int, default=0, |
| ) |
| parser.add_argument( |
| "--ei", type=int, default=None, |
| ) |
| parser.add_argument( |
| "--filter_csv", type=str, default=None, required=False, |
| ) |
| parser.add_argument( |
| "--filter_key", type=str, default=None, required=False, |
| ) |
| parser.add_argument( |
| "--max_workers", type=int, default=4, |
| help="Number of parallel workers for processing videos", |
| ) |
| args = parser.parse_args() |
| |
| |
| os.makedirs(args.cut_dir, exist_ok=True) |
| |
| |
| assert os.path.exists(args.csv), f"CSV file {args.csv} does not exist." |
| df = pd.read_csv(args.csv) |
| print(">>> Loaded CSV file with shape", df.shape) |
| assert {args.video_id_key, args.start_time_key, args.end_time_key}.issubset(df.columns) |
|
|
| |
| if args.filter_csv is not None: |
| path = args.filter_csv |
| assert os.path.exists(path), f"CSV file {path} does not exist." |
| key = args.filter_key |
| df_filter = pd.read_csv(path) |
| assert key in df_filter.columns, f"CSV file must contain column {key}." |
| keep_values = df_filter[key].unique() |
| df = df[df[key].isin(keep_values)] |
| print(">>> Filtered CSV file with shape", df.shape) |
| |
| |
| si = args.si |
| ei = args.ei if args.ei is not None else len(df) |
| df = df.iloc[si:ei] |
| print("Start index:", si, "End index:", ei) |
| |
| |
| print(">>> Adding video paths to dataframe") |
| video_ids = df[args.video_id_key].unique() |
| video_paths = [join(args.video_dir, f"{video_id}.{args.ext}") for video_id in video_ids] |
| video_id_to_path = {video_id: path for video_id, path in zip(video_ids, video_paths)} |
| df["video_path"] = df[args.video_id_key].map(video_id_to_path) |
| |
| df['check_video'] = df['video_path'].apply(exists) |
| df = df[df['check_video']] |
| del df['check_video'] |
| print(">>> Found videos for", df.shape[0], "rows.") |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| if len(df) == 0: |
| print(">>> No videos to cut.") |
| exit() |
|
|
|
|
| if args.debug: |
| args.verbose = True |
| |
| process_video(df.iloc[0], args) |
| else: |
| |
| with ThreadPoolExecutor(max_workers=args.max_workers) as executor: |
| futures = [executor.submit(process_video, row, args) |
| for _, row in df.iterrows()] |
| |
| |
| with tqdm(total=len(futures), desc="Cutting clips") as pbar: |
| for future in as_completed(futures): |
| result = future.result() |
| pbar.update(1) |
| |
| print(">>> Number of cut files:", len(os.listdir(args.cut_dir))) |