|
|
|
|
|
|
|
|
| import argparse
|
| import os
|
| import ssl
|
| import subprocess
|
|
|
| import mmengine
|
| from joblib import Parallel, delayed
|
|
|
| ssl._create_default_https_context = ssl._create_unverified_context
|
| data_file = '../../../data/ActivityNet'
|
| output_dir = f'{data_file}/videos'
|
|
|
|
|
| def parse_args():
|
| parser = argparse.ArgumentParser(description='ActivityNet downloader')
|
| parser.add_argument(
|
| '--bsn',
|
| action='store_true',
|
| help='download for BSN annotation or official one')
|
| args = parser.parse_args()
|
| return args
|
|
|
|
|
| def download_clip(video_identifier,
|
| output_filename,
|
| num_attempts=5,
|
| url_base='https://www.youtube.com/watch?v='):
|
| """Download a video from youtube if exists and is not blocked.
|
| arguments:
|
| ---------
|
| video_identifier: str
|
| Unique YouTube video identifier (11 characters)
|
| output_filename: str
|
| File path where the video will be stored.
|
| """
|
|
|
| assert isinstance(video_identifier, str), 'video_identifier must be string'
|
| assert isinstance(output_filename, str), 'output_filename must be string'
|
| assert len(video_identifier) == 11, 'video_identifier must have length 11'
|
|
|
| status = False
|
|
|
| if not os.path.exists(output_filename):
|
| command = [
|
| 'youtube-dl', '--quiet', '--no-warnings', '--no-check-certificate',
|
| '-f', 'mp4', '-o',
|
| '"%s"' % output_filename,
|
| '"%s"' % (url_base + video_identifier)
|
| ]
|
| command = ' '.join(command)
|
| print(command)
|
| attempts = 0
|
| while True:
|
| try:
|
| subprocess.check_output(
|
| command, shell=True, stderr=subprocess.STDOUT)
|
| except subprocess.CalledProcessError:
|
| attempts += 1
|
| if attempts == num_attempts:
|
| return status, 'Fail'
|
| else:
|
| break
|
|
|
| status = os.path.exists(output_filename)
|
| return status, 'Downloaded'
|
|
|
|
|
| def download_clip_wrapper(youtube_id, output_dir):
|
| """Wrapper for parallel processing purposes."""
|
|
|
| output_filename = os.path.join(output_dir, 'v_' + youtube_id + '.mp4')
|
| if os.path.exists(output_filename):
|
| status = tuple(['v_' + youtube_id, True, 'Exists'])
|
| return status
|
|
|
| downloaded, log = download_clip(youtube_id, output_filename)
|
| status = tuple(['v_' + youtube_id, downloaded, log])
|
| return status
|
|
|
|
|
| def parse_activitynet_annotations(input_csv, is_bsn_case=False):
|
| """Returns a list of YoutubeID.
|
| arguments:
|
| ---------
|
| input_csv: str
|
| Path to CSV file containing the following columns:
|
| 'video,numFrame,seconds,fps,rfps,subset,featureFrame'
|
| returns:
|
| -------
|
| youtube_ids: list
|
| List of all YoutubeIDs in ActivityNet.
|
|
|
| """
|
| if is_bsn_case:
|
| lines = open(input_csv).readlines()
|
| lines = lines[1:]
|
|
|
| youtube_ids = [x.split(',')[0][2:] for x in lines]
|
| else:
|
| data = mmengine.load(anno_file)['database']
|
| youtube_ids = list(data.keys())
|
|
|
| return youtube_ids
|
|
|
|
|
| def main(input_csv, output_dir, anno_file, num_jobs=24, is_bsn_case=False):
|
|
|
| youtube_ids = parse_activitynet_annotations(input_csv, is_bsn_case)
|
|
|
|
|
| if not os.path.exists(output_dir):
|
| os.makedirs(output_dir)
|
|
|
| if num_jobs == 1:
|
| status_list = []
|
| for index in youtube_ids:
|
| status_list.append(download_clip_wrapper(index, output_dir))
|
| else:
|
| status_list = Parallel(n_jobs=num_jobs)(
|
| delayed(download_clip_wrapper)(index, output_dir)
|
| for index in youtube_ids)
|
|
|
|
|
| mmengine.dump(status_list, 'download_report.json')
|
| annotation = mmengine.load(anno_file)
|
| downloaded = {status[0]: status[1] for status in status_list}
|
| annotation = {k: v for k, v in annotation.items() if downloaded[k]}
|
|
|
| if is_bsn_case:
|
| anno_file_bak = anno_file.replace('.json', '_bak.json')
|
| os.rename(anno_file, anno_file_bak)
|
| mmengine.dump(annotation, anno_file)
|
|
|
|
|
| if __name__ == '__main__':
|
| args = parse_args()
|
| is_bsn_case = args.bsn
|
| if is_bsn_case:
|
| video_list = f'{data_file}/video_info_new.csv'
|
| anno_file = f'{data_file}/anet_anno_action.json'
|
| else:
|
| video_list = f'{data_file}/activity_net.v1-3.min.json'
|
| anno_file = video_list
|
| main(video_list, output_dir, anno_file, 24, is_bsn_case)
|
|
|