File size: 3,576 Bytes
d670799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Copyright (c) OpenMMLab. All rights reserved.
# This scripts is copied from
# https://github.com/activitynet/ActivityNet/blob/master/Crawler/Kinetics/download.py  # noqa: E501
# The code is licensed under the MIT licence.
import argparse
import os
import ssl
import subprocess

import mmengine
from joblib import Parallel, delayed

ssl._create_default_https_context = ssl._create_unverified_context


def download(video_identifier,

             output_filename,

             num_attempts=5,

             url_base='https://www.youtube.com/watch?v='):
    """Download a video from youtube if exists and is not blocked.

    arguments:

    ---------

    video_identifier: str

        Unique YouTube video identifier (11 characters)

    output_filename: str

        File path where the video will be stored.

    """
    # Defensive argument checking.
    assert isinstance(video_identifier, str), 'video_identifier must be string'
    assert isinstance(output_filename, str), 'output_filename must be string'
    assert len(video_identifier) == 11, 'video_identifier must have length 11'

    status = False

    if not os.path.exists(output_filename):
        command = [
            'youtube-dl', '--quiet', '--no-warnings', '--no-check-certificate',
            '-f', 'mp4', '-o',
            '"%s"' % output_filename,
            '"%s"' % (url_base + video_identifier)
        ]
        command = ' '.join(command)
        print(command)
        attempts = 0
        while True:
            try:
                subprocess.check_output(
                    command, shell=True, stderr=subprocess.STDOUT)
            except subprocess.CalledProcessError:
                attempts += 1
                if attempts == num_attempts:
                    return status, 'Fail'
            else:
                break
    # Check if the video was successfully saved.
    status = os.path.exists(output_filename)
    return status, 'Downloaded'


def download_wrapper(youtube_id, output_dir):
    """Wrapper for parallel processing purposes."""
    # we do this to align with names in annotations
    output_filename = os.path.join(output_dir, youtube_id + '.mp4')
    if os.path.exists(output_filename):
        status = tuple([youtube_id, True, 'Exists'])
        return status

    downloaded, log = download(youtube_id, output_filename)
    status = tuple([youtube_id, downloaded, log])
    return status


def main(input, output_dir, num_jobs=24):
    # Reading and parsing ActivityNet.
    youtube_ids = mmengine.load(input).keys()
    # Creates folders where videos will be saved later.
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    # Download all clips.
    if num_jobs == 1:
        status_list = []
        for index in youtube_ids:
            status_list.append(download_wrapper(index, output_dir))
    else:
        status_list = Parallel(n_jobs=num_jobs)(
            delayed(download_wrapper)(index, output_dir)
            for index in youtube_ids)

    # Save download report.
    mmengine.dump(status_list, 'download_report.json')


if __name__ == '__main__':
    description = 'Helper script for downloading GYM videos.'
    p = argparse.ArgumentParser(description=description)
    p.add_argument('input', type=str, help='The gym annotation file')
    p.add_argument(
        'output_dir', type=str, help='Output directory to save videos.')
    p.add_argument('-n', '--num-jobs', type=int, default=24)
    main(**vars(p.parse_args()))