DTECT / backend /datasets /data /download.py
AdhyaSuman's picture
Initial commit with Git LFS for large files
11c72a2
raw
history blame contribute delete
948 Bytes
import os
import zipfile
from torchvision.datasets.utils import download_url
from backend.datasets.utils.logger import Logger
logger = Logger("WARNING")
def download_dataset(dataset_name, cache_path="~/.topmost"):
cache_path = os.path.expanduser(cache_path)
raw_filename = f'{dataset_name}.zip'
if dataset_name in ['Wikitext-103']:
# download from Git LFS.
zipped_dataset_url = f"https://media.githubusercontent.com/media/BobXWu/TopMost/main/data/{raw_filename}"
else:
zipped_dataset_url = f"https://raw.githubusercontent.com/BobXWu/TopMost/master/data/{raw_filename}"
logger.info(zipped_dataset_url)
download_url(zipped_dataset_url, root=cache_path, filename=raw_filename, md5=None)
path = f'{cache_path}/{raw_filename}'
with zipfile.ZipFile(path, 'r') as zip_ref:
zip_ref.extractall(cache_path)
os.remove(path)
if __name__ == '__main__':
download_dataset('20NG')