|
|
|
|
|
import argparse
|
|
|
import os.path as osp
|
|
|
import subprocess
|
|
|
|
|
|
import mmengine
|
|
|
from joblib import Parallel, delayed
|
|
|
|
|
|
URL_PREFIX = 'https://s3.amazonaws.com/ava-dataset/trainval/'
|
|
|
|
|
|
|
|
|
def download_video(video_url, output_dir, num_attempts=5):
|
|
|
video_file = osp.basename(video_url)
|
|
|
output_file = osp.join(output_dir, video_file)
|
|
|
|
|
|
status = False
|
|
|
|
|
|
if not osp.exists(output_file):
|
|
|
command = ['wget', '-c', video_url, '-P', output_dir]
|
|
|
command = ' '.join(command)
|
|
|
print(command)
|
|
|
attempts = 0
|
|
|
while True:
|
|
|
try:
|
|
|
subprocess.check_output(
|
|
|
command, shell=True, stderr=subprocess.STDOUT)
|
|
|
except subprocess.CalledProcessError:
|
|
|
attempts += 1
|
|
|
if attempts == num_attempts:
|
|
|
return status, 'Downloading Failed'
|
|
|
else:
|
|
|
break
|
|
|
|
|
|
status = osp.exists(output_file)
|
|
|
return status, 'Downloaded'
|
|
|
|
|
|
|
|
|
def main(source_file, output_dir, num_jobs=24, num_attempts=5):
|
|
|
mmengine.mkdir_or_exist(output_dir)
|
|
|
video_list = open(source_file).read().strip().split('\n')
|
|
|
video_list = [osp.join(URL_PREFIX, video) for video in video_list]
|
|
|
|
|
|
if num_jobs == 1:
|
|
|
status_list = []
|
|
|
for video in video_list:
|
|
|
video_list.append(download_video(video, output_dir, num_attempts))
|
|
|
else:
|
|
|
status_list = Parallel(n_jobs=num_jobs)(
|
|
|
delayed(download_video)(video, output_dir, num_attempts)
|
|
|
for video in video_list)
|
|
|
|
|
|
mmengine.dump(status_list, 'download_report.json')
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
description = 'Helper script for downloading AVA videos'
|
|
|
parser = argparse.ArgumentParser(description=description)
|
|
|
parser.add_argument(
|
|
|
'source_file', type=str, help='TXT file containing the video filename')
|
|
|
parser.add_argument(
|
|
|
'output_dir',
|
|
|
type=str,
|
|
|
help='Output directory where videos will be saved')
|
|
|
parser.add_argument('-n', '--num-jobs', type=int, default=24)
|
|
|
parser.add_argument('--num-attempts', type=int, default=5)
|
|
|
main(**vars(parser.parse_args()))
|
|
|
|