camenduru
/

neuralangelo

Model card Files Files and versions

xet

Community

camenduru commited on Aug 21, 2023

Commit

d41ff55

1 Parent(s): 36dee30

Upload downloader.py

Browse files

Files changed (1) hide show

downloader.py +442 -0

downloader.py ADDED Viewed

	@@ -0,0 +1,442 @@

+# ----------------------------------------------------------------------------
+# -                   TanksAndTemples Website Toolbox                        -
+# -                    http://www.tanksandtemples.org                        -
+# ----------------------------------------------------------------------------
+# The MIT License (MIT)
+#
+# Copyright (c) 2017
+# Arno Knapitsch <arno.knapitsch@gmail.com >
+# Jaesik Park <syncle@gmail.com>
+# Qian-Yi Zhou <Qianyi.Zhou@gmail.com>
+# Vladlen Koltun <vkoltun@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+# ----------------------------------------------------------------------------
+#
+# This python script is for downloading dataset from www.tanksandtemples.org
+# The dataset has a different license, please refer to
+# https://tanksandtemples.org/license/
+import sys
+import os
+import argparse
+import zipfile
+import hashlib
+import requests
+if (sys.version_info > (3, 0)):
+    pversion = 3
+    from urllib.request import Request, urlopen
+else:
+    pversion = 2
+    from urllib2 import Request, urlopen
+id_download_dict = {
+    'Auditorium.mp4': '0B-ePgl6HF260SmdGUzJSX0ZfZXc',
+    'Auditorium.zip': '0B-ePgl6HF260N1VHWFBTSWQ2MDg',
+    'Ballroom.mp4': '0B-ePgl6HF260MzlmY2Jwa0dqME0',
+    'Ballroom.zip': '0B-ePgl6HF260aS1hQXJHeHFxNVE',
+    'Barn.mp4': '0B-ePgl6HF260ZlBZcHFrTHFLdGM',
+    'Barn.zip': '0B-ePgl6HF260NzQySklGdXZyQzA',
+    'Church.mp4': '0B-ePgl6HF260dnlGMkFkNlpibG8',
+    'Church.zip': '0B-ePgl6HF260SmhXM0czaHJ3SU0',
+    'Caterpillar.mp4': '0B-ePgl6HF260Z00xVWgyN2c3WEU',
+    'Caterpillar.zip': '0B-ePgl6HF260b2JNbnZYYjczU2s',
+    'Courthouse.mp4': '0B-ePgl6HF260TEpnajBqRFJ1enM',
+    'Courthouse.zip': '0B-ePgl6HF260bHRNZTJnU1pWMVE',
+    'Courtroom.mp4': '0B-ePgl6HF260b0JZeUJlUThSWjQ',
+    'Courtroom.zip': '0B-ePgl6HF260UmZIQVgtLXhtZUE',
+    'Family.mp4': '0B-ePgl6HF260UmNxYmlQeDhmeFE',
+    'Family.tar.gz': '0B-ePgl6HF260SWRlRDZCRXZRZlk',
+    'Family.zip': '0B-ePgl6HF260NVRhRmxnTW4tQTQ',
+    'Francis.mp4': '0B-ePgl6HF260emtkUElRT0lXQ3M',
+    'Francis.tar.gz': '0B-ePgl6HF260MnVqcW1EWDVMcFE',
+    'Francis.zip': '0B-ePgl6HF260SHk4ejdaSEhqd28',
+    'Horse.mp4': '0B-ePgl6HF260RGFBcF9iTk5XQTA',
+    'Horse.tar.gz': '0B-ePgl6HF260eE9EVTdpS3hYamc',
+    'Horse.zip': '0B-ePgl6HF260VFdBc0RvQjJuQXc',
+    'Ignatius.mp4': '0B-ePgl6HF260T19oUTIyUTRwTE0',
+    'Ignatius.zip': '0B-ePgl6HF260d0l0ZDNSZ3ZxREk',
+    'Lighthouse.mp4': '0B-ePgl6HF260T184cUdCbFFBVEE',
+    'Lighthouse.zip': '0B-ePgl6HF260dHpldktMNV9NRTA',
+    'M60.mp4': '0B-ePgl6HF260dG9nTzZHdkRJblE',
+    'M60.zip': '0B-ePgl6HF260b2lSTWxwLU1CQ2s',
+    'Meetingroom.mp4': '0B-ePgl6HF260V3BFSFFTZFJwSWc',
+    'Meetingroom.zip': '0B-ePgl6HF260cV9lNmlZZGp6aUU',
+    'Museum.mp4': '0B-ePgl6HF260ZXRwck5rWk4tc2c',
+    'Museum.zip': '0B-ePgl6HF260RTY4Ml9Ubm9fUkk',
+    'Palace.mp4': '0B-ePgl6HF260X21ac1ZXNmx3VTA',
+    'Palace.zip': '0B-ePgl6HF260ZHlJejlXbmFKS3M',
+    'Panther.mp4': '0B-ePgl6HF260bVRndWVYRGM4c0U',
+    'Panther.zip': '0B-ePgl6HF260SUNBeVhMc1hpb28',
+    'Playground.mp4': '0B-ePgl6HF260d0JoR2pWak9RbnM',
+    'Playground.zip': '0B-ePgl6HF260TVktaTFyclFhaDg',
+    'Temple.mp4': '0B-ePgl6HF260N1VTMGNES0FsaDA',
+    'Temple.zip': '0B-ePgl6HF260V2VaSG5GTkl5dmc',
+    'Train.mp4': '0B-ePgl6HF260YUttRUI4U0xtS1E',
+    'Train.zip': '0B-ePgl6HF260UFNWeXk3MHhCT00',
+    'Truck.mp4': '0B-ePgl6HF260aVVZMzhSdVc5Njg',
+    'Truck.zip': '0B-ePgl6HF260NEw3OGN4ckF0dnM',
+    'advanced_video.chk': '0B-ePgl6HF260RWJIcjRPRnlUS28',
+    'advanced_video.zip': '0B-ePgl6HF260OXgzbEJleDVSZ0k',
+    'image_sets_md5.chk': '0B-ePgl6HF260dE5zR3FhQmxVbHc',
+    'intermediate_video.chk': '0B-ePgl6HF260SVdpbG1peXBOYnM',
+    'intermediate_video.zip': '0B-ePgl6HF260UU1zUTd6SzlmczA',
+    'advanced_image.zip': '0B-ePgl6HF260UXlhWDBiNVZvdk0',
+    'intermediate_image.zip': '0B-ePgl6HF260UU1zUTd6SzlmczA',
+    'advanced_image.chk': '0B-ePgl6HF260RWJIcjRPRnlUS28',
+    'intermediate_image.chk': '0B-ePgl6HF260SVdpbG1peXBOYnM',
+    'md5.txt': '0B-ePgl6HF260QTlJUXpqc3RQOGM',
+    'training.zip': '0B-ePgl6HF260dU1pejdkeXdMb00',
+    'video_set_md5.chk': '0B-ePgl6HF260M2h5Q3o1bGdpc1U'
+}
+sep = os.sep
+parser = argparse.ArgumentParser(description='Tanks and Temples file' +
+                                 'downloader')
+parser.add_argument(
+    '--modality',
+    type=str,
+    help='(image|video|both) ' +
+    'choose if you want to download video sequences (very big) or pre sampled' +
+    ' image sets',
+    default='image')
+parser.add_argument(
+    '--group',
+    type=str,
+    help='(intermediate|advanced|both|training|all)' +
+    ' choose if you want to download intermediate, advanced or training dataset',
+    default='both')
+parser.add_argument('--pathname',
+                    type=str,
+                    help='chose destination path name, default = local path',
+                    default='')
+parser.add_argument('-s',
+                    action='store_true',
+                    default=False,
+                    dest='status',
+                    help='show data status')
+parser.add_argument('--unpack_off',
+                    action='store_false',
+                    default=True,
+                    dest='unpack',
+                    help='do not un-zip the folders after download')
+parser.add_argument('--calc_md5_off',
+                    action='store_false',
+                    default=True,
+                    dest='calc_md5',
+                    help='do not calculate md5sum after download')
+def download_file_from_google_drive(id, destination):
+    URL = "https://docs.google.com/uc?export=download"
+    session = requests.Session()
+    response = session.get(URL, params={'id': id}, stream=True)
+    token = get_confirm_token2(response)
+    if token:
+        params = {'id': id, 'confirm': token}
+        response = session.get(URL, params=params, stream=True)
+    save_response_content(response, destination)
+def get_confirm_token(response):
+    for key, value in response.cookies.items():
+        if key.startswith('download_warning'):
+            return value
+    return None
+def get_confirm_token2(response):
+    for key, value in response.headers.items():
+        if key.startswith('Set-Cookie'):
+            return value.split('=')[1].split(';')[0]
+    return None
+def save_response_content(response, destination):
+    CHUNK_SIZE = 32768
+    if not os.path.exists(os.path.dirname(destination)):
+        os.makedirs(os.path.dirname(destination))
+    total_filesize = 0
+    with open(destination, "wb") as f:
+        for chunk in response.iter_content(CHUNK_SIZE):
+            if chunk:  # filter out keep-alive new chunks
+                f.write(chunk)
+                total_filesize += CHUNK_SIZE
+                sys.stdout.write("\r%5.0f MB downloaded" %
+                                 (float(total_filesize) / 1000000))
+                sys.stdout.flush()
+    sys.stdout.write("\rDownload Complete              \n")
+    sys.stdout.flush()
+    return chunk
+def generate_file_md5(filename, blocksize=2**20):
+    m = hashlib.md5()
+    with open(filename, "rb") as f:
+        while True:
+            buf = f.read(blocksize)
+            if not buf:
+                break
+            m.update(buf)
+    return m.hexdigest()
+def download_video(pathname, scene, image_md5_dict, calc_md5):
+    scene_out_dir = pathname + 'videos'
+    download_file = scene + '.mp4'
+    print('\ndownloading video ' + download_file.split('/')[-1])
+    idd = id_download_dict[download_file]
+    download_file_local = scene_out_dir + sep + scene + '.mp4'
+    download_file_from_google_drive(idd, download_file_local)
+    if (calc_md5):
+        h_md5 = generate_file_md5(download_file_local)
+        print('\nmd5 downloaded: ' + h_md5)
+        print('md5 original:   ' + video_md5_dict[scene])
+        md5_check = h_md5 == video_md5_dict[scene]
+        if (not md5_check):
+            print('\nWarning: MD5 does not match, delete file and restart' +
+                  ' download\n')
+    else:
+        if (unpack):
+            extr_dir = scene_out_dir
+            zip_file = scene_out_dir + sep + scene + '.zip'
+            if (zipfile.is_zipfile(zip_file)):
+                if not os.path.exists(extr_dir):
+                    os.makedirs(extr_dir)
+                zip = zipfile.ZipFile(zip_file, 'r')
+                zip.extractall(extr_dir)
+def check_video(pathname, scene, image_md5_dict):
+    scene_out_dir = pathname + 'videos'
+    ret_str = ' '
+    download_file_local = scene_out_dir + sep + scene + '.mp4'
+    if os.path.exists(download_file_local):
+        h_md5 = generate_file_md5(download_file_local)
+        md5_check = h_md5 == video_md5_dict[scene]
+        if (md5_check):
+            ret_str = 'X'
+        else:
+            ret_str = '?'
+    else:
+        ret_str = ' '
+    return ret_str
+def download_image_sets(pathname, scene, image_md5_dict, calc_md5):
+    scene_out_dir = pathname + 'image_sets'
+    download_file = scene + '.zip'
+    download_file_local = scene_out_dir + sep + scene + '.zip'
+    print('\ndownloading image set ' + download_file.split('/')[-1])
+    idd = id_download_dict[download_file]
+    download_file_from_google_drive(idd, download_file_local)
+    if (calc_md5):
+        h_md5 = generate_file_md5(download_file_local)
+        print('\nmd5 downloaded: ' + h_md5)
+        print('md5 original:   ' + image_md5_dict[scene])
+        md5_check = h_md5 == image_md5_dict[scene]
+        if (md5_check):
+            if (unpack):
+                extr_dir = scene_out_dir
+                zip_file = scene_out_dir + sep + scene + '.zip'
+                if (zipfile.is_zipfile(zip_file)):
+                    if not os.path.exists(extr_dir):
+                        os.makedirs(extr_dir)
+                    zip = zipfile.ZipFile(zip_file, 'r')
+                    zip.extractall(extr_dir)
+        else:
+            print('\nWarning: MD5 does not match, delete file and restart' +
+                  ' download\n')
+def check_image_sets(pathname, scene, image_md5_dict):
+    scene_out_dir = pathname + 'image_sets'
+    ret_str = ''
+    download_file_local = scene_out_dir + sep + scene + '.zip'
+    if os.path.exists(download_file_local):
+        h_md5 = generate_file_md5(download_file_local)
+        md5_check = h_md5 == image_md5_dict[scene]
+        if (md5_check):
+            ret_str = 'X'
+        else:
+            ret_str = '?'
+    else:
+        ret_str = ' '
+    return ret_str
+def print_status(sequences, modality, pathname, intermediate_list,
+                 advanced_list, training_list, image_md5_dict, video_md5_dict):
+    #print('intermediate Dataset \t\t\t Video \t\t\t image set')
+    print('\n\n data status: \n\n')
+    print('[X] - downloaded    [ ] - missing    [?] - being downloaded or ' +
+          'corrupted    [n] - not checked')
+    if (sequences == 'intermediate' or sequences == 'both' or
+            sequences == 'all' or sequences == ''):
+        print('\n\n---------------------------------------------------------' +
+              '--------')
+        line_new = '%12s  %12s  %12s' % (' intermediate Dataset', 'Video',
+                                         'image set')
+        print(line_new)
+        print('-----------------------------------------------------------' +
+              '------')
+        for scene in intermediate_list:
+            line_new = '%12s  %19s  %10s' % (
+                scene, check_video(pathname, scene, video_md5_dict) if
+                (modality == 'video' or modality == 'both' or modality == '')
+                else 'n', check_image_sets(pathname, scene, image_md5_dict) if
+                (modality == 'image' or modality == 'both' or
+                 modality == '') else 'n')
+            print(line_new)
+    if (sequences == 'advanced' or sequences == 'both' or sequences == 'all' or
+            sequences == ''):
+        print('\n\n------------------------------------------------------' +
+              '---------')
+        line_new = '%12s  %16s  %12s' % (' advanced Dataset', 'Video',
+                                         'image set')
+        print(line_new)
+        print('---------------------------------------------------------------')
+        for scene in advanced_list:
+            #print(scene + '\t\t\t X \t\t\t X')
+            line_new = '%12s  %19s  %10s' % (
+                scene, check_video(pathname, scene, video_md5_dict) if
+                (modality == 'video' or modality == 'both' or modality == '')
+                else 'n', check_image_sets(pathname, scene, image_md5_dict) if
+                (modality == 'image' or modality == 'both' or
+                 modality == '') else 'n')
+            print(line_new)
+    if (sequences == 'training' or sequences == 'all' or sequences == ''):
+        print('\n\n------------------------------------------------------' +
+              '---------')
+        line_new = '%12s  %16s  %12s' % (' training Dataset', 'Video',
+                                         'image set')
+        print(line_new)
+        print('---------------------------------------------------------------')
+        for scene in training_list:
+            #print(scene + '\t\t\t X \t\t\t X')
+            line_new = '%12s  %19s  %10s' % (
+                scene, check_video(pathname, scene, video_md5_dict) if
+                (modality == 'video' or modality == 'both' or modality == '')
+                else 'n', check_image_sets(pathname, scene, image_md5_dict) if
+                (modality == 'image' or modality == 'both' or
+                 modality == '') else 'n')
+            print(line_new)
+if __name__ == "__main__":
+    intermediate_list = [
+        'Family', 'Francis', 'Horse', 'Lighthouse', 'M60', 'Panther',
+        'Playground', 'Train'
+    ]
+    advanced_list = [
+        'Auditorium', 'Ballroom', 'Courtroom', 'Museum', 'Palace', 'Temple'
+    ]
+    training_list = [
+        'Barn', 'Caterpillar', 'Church', 'Courthouse', 'Ignatius',
+        'Meetingroom', 'Truck'
+    ]
+    args = parser.parse_args()
+    sequences = args.group
+    calc_md5 = args.calc_md5
+    if sequences == 'intermediate':
+        scene_list = intermediate_list
+    elif sequences == 'advanced':
+        scene_list = advanced_list
+    elif sequences == 'training':
+        scene_list = training_list
+    elif sequences == 'both':
+        scene_list = intermediate_list + advanced_list
+    elif sequences == 'all':
+        scene_list = intermediate_list + advanced_list + training_list
+    elif sequences == '':
+        scene_list = intermediate_list + advanced_list
+    else:
+        sys.exit('Error! Unknown group parameter, see help [-h]')
+    scene_list.sort()
+    modality = args.modality
+    unpack = args.unpack
+    status_print = args.status
+    pathname = args.pathname
+    if pathname:
+        pathname = pathname + sep
+    # download md5 checksum file and create md5 dict for image sets zip files:
+    image_md5_dict = {}
+    scene_out_dir = pathname + 'image_sets'
+    fname = scene_out_dir + sep + 'image_sets_md5.chk'
+    idd = id_download_dict['image_sets_md5.chk']
+    print('\ndownloading md5 sum file for image sets')
+    download_file_from_google_drive(idd, fname)
+    with open(fname) as f:
+        content = f.readlines()
+        content = [x.strip() for x in content]
+    for line in content:
+        md5 = line.split(' ')[0]
+        scene_name = line.split(' ')[-1][0:-4]
+        image_md5_dict.update({scene_name: md5})
+    # download md5 checksum file and create md5 dict for videos:
+    video_md5_dict = {}
+    scene_out_dir = pathname + 'videos'
+    fname = scene_out_dir + sep + 'video_set_md5.chk'
+    idd = id_download_dict['video_set_md5.chk']
+    print('\ndownloading md5 sum file for videos')
+    download_file_from_google_drive(idd, fname)
+    with open(fname) as f:
+        content = f.readlines()
+        content = [x.strip() for x in content]
+    for line in content:
+        md5 = line.split(' ')[0]
+        scene_name = line.split(' ')[-1][0:-4]
+        video_md5_dict.update({scene_name: md5})
+    if (len(sys.argv) == 1):
+        print_status('both', 'both', pathname, intermediate_list, advanced_list,
+                     training_list, image_md5_dict, video_md5_dict)
+    elif status_print and (len(sys.argv) == 2):
+        print_status('both', 'both', pathname, intermediate_list, advanced_list,
+                     training_list, image_md5_dict, video_md5_dict)
+    elif status_print:
+        print_status(sequences, modality, pathname, intermediate_list,
+                     advanced_list, training_list, image_md5_dict,
+                     video_md5_dict)
+    elif sequences or modality:
+        for scene in scene_list:
+            if modality == 'video':
+                download_video(pathname, scene, video_md5_dict, calc_md5)
+            elif modality == 'image':
+                download_image_sets(pathname, scene, image_md5_dict, calc_md5)
+            elif modality == 'both':
+                download_image_sets(pathname, scene, image_md5_dict, calc_md5)
+                download_video(pathname, scene, video_md5_dict, calc_md5)
+            else:
+                sys.exit('Error! Unknown modality parameter, see help [-h]')