Spaces:
Build error
Build error
| """ | |
| Copyright (c) 2022, salesforce.com, inc. | |
| All rights reserved. | |
| SPDX-License-Identifier: BSD-3-Clause | |
| For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause | |
| """ | |
| import os | |
| from pathlib import Path | |
| from omegaconf import OmegaConf | |
| from lavis.common.utils import ( | |
| cleanup_dir, | |
| get_abs_path, | |
| get_cache_path, | |
| ) | |
| import opendatasets as od | |
| DATA_URL = "https://www.kaggle.com/datasets/hsankesara/flickr-image-dataset" | |
| print( | |
| """ | |
| To download the dataset, you need to have a Kaggle account and the associated key. | |
| See https://www.kaggle.com/docs/api to create account and a new API token. | |
| """ | |
| ) | |
| def move_directory(src_dir, dst_dir): | |
| """ | |
| Move files from download_path to storage_path | |
| """ | |
| print("Moving to {}".format(dst_dir)) | |
| os.makedirs(dst_dir, exist_ok=True) | |
| for file_name in os.listdir(src_dir): | |
| os.rename( | |
| os.path.join(src_dir, file_name), | |
| os.path.join(dst_dir, file_name), | |
| ) | |
| if __name__ == "__main__": | |
| config_path = get_abs_path("configs/datasets/flickr30k/defaults.yaml") | |
| storage_dir = OmegaConf.load( | |
| config_path | |
| ).datasets.flickr30k.build_info.images.storage | |
| storage_dir = Path(get_cache_path(storage_dir)) | |
| download_dir = storage_dir.parent / "download" | |
| if storage_dir.exists(): | |
| print(f"Dataset already exists at {storage_dir}. Aborting.") | |
| exit(0) | |
| os.makedirs(download_dir) | |
| try: | |
| print("Downloading {} to {}".format(DATA_URL, download_dir)) | |
| od.download(DATA_URL, download_dir) | |
| except Exception as e: | |
| print(e) | |
| # remove download dir if failed | |
| cleanup_dir(download_dir) | |
| exit(1) | |
| move_directory( | |
| download_dir / "flickr-image-dataset" / "flickr30k_images" / "flickr30k_images", | |
| storage_dir / "flickr30k-images", | |
| ) | |
| cleanup_dir(download_dir) | |