Spaces:
Configuration error
Configuration error
| import numpy as np | |
| import pandas as pd | |
| import imageio | |
| import os | |
| import subprocess | |
| from multiprocessing import Pool | |
| from itertools import cycle | |
| import warnings | |
| import glob | |
| import time | |
| from tqdm import tqdm | |
| from argparse import ArgumentParser | |
| from skimage import img_as_ubyte | |
| from skimage.transform import resize | |
| warnings.filterwarnings("ignore") | |
| DEVNULL = open(os.devnull, 'wb') | |
| def save(path, frames, format): | |
| if format == '.mp4': | |
| imageio.mimsave(path, frames) | |
| elif format == '.png': | |
| if os.path.exists(path): | |
| print ("Warning: skiping video %s" % os.path.basename(path)) | |
| return | |
| else: | |
| os.makedirs(path) | |
| for j, frame in enumerate(frames): | |
| imageio.imsave(os.path.join(path, str(j).zfill(7) + '.png'), frames[j]) | |
| else: | |
| print ("Unknown format %s" % format) | |
| exit() | |
| def download(video_id, args): | |
| video_path = os.path.join(args.video_folder, video_id + ".mp4") | |
| subprocess.call([args.youtube, '-f', "''best/mp4''", '--write-auto-sub', '--write-sub', | |
| '--sub-lang', 'en', '--skip-unavailable-fragments', | |
| "https://www.youtube.com/watch?v=" + video_id, "--output", | |
| video_path], stdout=DEVNULL, stderr=DEVNULL) | |
| return video_path | |
| def run(data): | |
| video_id, args = data | |
| if not os.path.exists(os.path.join(args.video_folder, video_id.split('#')[0] + '.mp4')): | |
| download(video_id.split('#')[0], args) | |
| if not os.path.exists(os.path.join(args.video_folder, video_id.split('#')[0] + '.mp4')): | |
| print ('Can not load video %s, broken link' % video_id.split('#')[0]) | |
| return | |
| reader = imageio.get_reader(os.path.join(args.video_folder, video_id.split('#')[0] + '.mp4')) | |
| fps = reader.get_meta_data()['fps'] | |
| df = pd.read_csv(args.metadata) | |
| df = df[df['video_id'] == video_id] | |
| all_chunks_dict = [{'start': df['start'].iloc[j], 'end': df['end'].iloc[j], | |
| 'bbox': list(map(int, df['bbox'].iloc[j].split('-'))), 'frames':[]} for j in range(df.shape[0])] | |
| ref_fps = df['fps'].iloc[0] | |
| ref_height = df['height'].iloc[0] | |
| ref_width = df['width'].iloc[0] | |
| partition = df['partition'].iloc[0] | |
| try: | |
| for i, frame in enumerate(reader): | |
| for entry in all_chunks_dict: | |
| if (i * ref_fps >= entry['start'] * fps) and (i * ref_fps < entry['end'] * fps): | |
| left, top, right, bot = entry['bbox'] | |
| left = int(left / (ref_width / frame.shape[1])) | |
| top = int(top / (ref_height / frame.shape[0])) | |
| right = int(right / (ref_width / frame.shape[1])) | |
| bot = int(bot / (ref_height / frame.shape[0])) | |
| crop = frame[top:bot, left:right] | |
| if args.image_shape is not None: | |
| crop = img_as_ubyte(resize(crop, args.image_shape, anti_aliasing=True)) | |
| entry['frames'].append(crop) | |
| except imageio.core.format.CannotReadFrameError: | |
| None | |
| for entry in all_chunks_dict: | |
| first_part = '#'.join(video_id.split('#')[::-1]) | |
| path = first_part + '#' + str(entry['start']).zfill(6) + '#' + str(entry['end']).zfill(6) + '.mp4' | |
| save(os.path.join(args.out_folder, partition, path), entry['frames'], args.format) | |
| if __name__ == "__main__": | |
| parser = ArgumentParser() | |
| parser.add_argument("--video_folder", default='youtube-taichi', help='Path to youtube videos') | |
| parser.add_argument("--metadata", default='taichi-metadata-new.csv', help='Path to metadata') | |
| parser.add_argument("--out_folder", default='taichi-png', help='Path to output') | |
| parser.add_argument("--format", default='.png', help='Storing format') | |
| parser.add_argument("--workers", default=1, type=int, help='Number of workers') | |
| parser.add_argument("--youtube", default='./youtube-dl', help='Path to youtube-dl') | |
| parser.add_argument("--image_shape", default=(256, 256), type=lambda x: tuple(map(int, x.split(','))), | |
| help="Image shape, None for no resize") | |
| args = parser.parse_args() | |
| if not os.path.exists(args.video_folder): | |
| os.makedirs(args.video_folder) | |
| if not os.path.exists(args.out_folder): | |
| os.makedirs(args.out_folder) | |
| for partition in ['test', 'train']: | |
| if not os.path.exists(os.path.join(args.out_folder, partition)): | |
| os.makedirs(os.path.join(args.out_folder, partition)) | |
| df = pd.read_csv(args.metadata) | |
| video_ids = set(df['video_id']) | |
| pool = Pool(processes=args.workers) | |
| args_list = cycle([args]) | |
| for chunks_data in tqdm(pool.imap_unordered(run, zip(video_ids, args_list))): | |
| None | |