Spaces:
Sleeping
Sleeping
File size: 7,634 Bytes
96da58e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
"""
Script to download datasets packaged with the repository. By default, all
datasets will be stored at robomimic/datasets, unless the @download_dir
argument is supplied. We recommend using the default, as most examples that
use these datasets assume that they can be found there.
The @tasks, @dataset_types, and @hdf5_types arguments can all be supplied
to choose which datasets to download.
Args:
download_dir (str): Base download directory. Created if it doesn't exist.
Defaults to datasets folder in repository - only pass in if you would
like to override the location.
tasks (list): Tasks to download datasets for. Defaults to lift task. Pass 'all' to
download all tasks (sim + real) 'sim' to download all sim tasks, 'real' to
download all real tasks, or directly specify the list of tasks.
dataset_types (list): Dataset types to download datasets for (e.g. ph, mh, mg).
Defaults to ph. Pass 'all' to download datasets for all available dataset
types per task, or directly specify the list of dataset types.
hdf5_types (list): hdf5 types to download datasets for (e.g. raw, low_dim, image).
Defaults to low_dim. Pass 'all' to download datasets for all available hdf5
types per task and dataset, or directly specify the list of hdf5 types.
Example usage:
# default behavior - just download lift proficient-human low-dim dataset
python download_datasets.py
# download low-dim proficient-human datasets for all simulation tasks
# (do a dry run first to see which datasets would be downloaded)
python download_datasets.py --tasks sim --dataset_types ph --hdf5_types low_dim --dry_run
python download_datasets.py --tasks sim --dataset_types ph --hdf5_types low_dim
# download all low-dim and image multi-human datasets for the can and square tasks
python download_datasets.py --tasks can square --dataset_types mh --hdf5_types low_dim image
# download the sparse reward machine-generated low-dim datasets
python download_datasets.py --tasks all --dataset_types mg --hdf5_types low_dim_sparse
# download all real robot datasets
python download_datasets.py --tasks real
"""
import os
import argparse
import robomimic
import robomimic.utils.file_utils as FileUtils
from robomimic import DATASET_REGISTRY
ALL_TASKS = ["lift", "can", "square", "transport", "tool_hang", "lift_real", "can_real", "tool_hang_real"]
ALL_DATASET_TYPES = ["ph", "mh", "mg", "paired"]
ALL_HDF5_TYPES = ["raw", "low_dim", "image", "low_dim_sparse", "low_dim_dense", "image_sparse", "image_dense"]
if __name__ == "__main__":
parser = argparse.ArgumentParser()
# directory to download datasets to
parser.add_argument(
"--download_dir",
type=str,
default=None,
help="Base download directory. Created if it doesn't exist. Defaults to datasets folder in repository.",
)
# tasks to download datasets for
parser.add_argument(
"--tasks",
type=str,
nargs='+',
default=["lift"],
help="Tasks to download datasets for. Defaults to lift task. Pass 'all' to download all tasks (sim + real)\
'sim' to download all sim tasks, 'real' to download all real tasks, or directly specify the list of\
tasks.",
)
# dataset types to download datasets for
parser.add_argument(
"--dataset_types",
type=str,
nargs='+',
default=["ph"],
help="Dataset types to download datasets for (e.g. ph, mh, mg). Defaults to ph. Pass 'all' to download \
datasets for all available dataset types per task, or directly specify the list of dataset types.",
)
# hdf5 types to download datasets for
parser.add_argument(
"--hdf5_types",
type=str,
nargs='+',
default=["low_dim"],
help="hdf5 types to download datasets for (e.g. raw, low_dim, image). Defaults to raw. Pass 'all' \
to download datasets for all available hdf5 types per task and dataset, or directly specify the list\
of hdf5 types.",
)
# dry run - don't actually download datasets, but print which datasets would be downloaded
parser.add_argument(
"--dry_run",
action='store_true',
help="set this flag to do a dry run to only print which datasets would be downloaded"
)
args = parser.parse_args()
# set default base directory for downloads
default_base_dir = args.download_dir
if default_base_dir is None:
default_base_dir = os.path.join(robomimic.__path__[0], "../datasets")
# load args
download_tasks = args.tasks
if "all" in download_tasks:
assert len(download_tasks) == 1, "all should be only tasks argument but got: {}".format(args.tasks)
download_tasks = ALL_TASKS
elif "sim" in download_tasks:
assert len(download_tasks) == 1, "sim should be only tasks argument but got: {}".format(args.tasks)
download_tasks = [task for task in ALL_TASKS if "real" not in task]
elif "real" in download_tasks:
assert len(download_tasks) == 1, "real should be only tasks argument but got: {}".format(args.tasks)
download_tasks = [task for task in ALL_TASKS if "real" in task]
download_dataset_types = args.dataset_types
if "all" in download_dataset_types:
assert len(download_dataset_types) == 1, "all should be only dataset_types argument but got: {}".format(args.dataset_types)
download_dataset_types = ALL_DATASET_TYPES
download_hdf5_types = args.hdf5_types
if "all" in download_hdf5_types:
assert len(download_hdf5_types) == 1, "all should be only hdf5_types argument but got: {}".format(args.hdf5_types)
download_hdf5_types = ALL_HDF5_TYPES
# download requested datasets
for task in DATASET_REGISTRY:
if task in download_tasks:
for dataset_type in DATASET_REGISTRY[task]:
if dataset_type in download_dataset_types:
for hdf5_type in DATASET_REGISTRY[task][dataset_type]:
if hdf5_type in download_hdf5_types:
download_dir = os.path.abspath(os.path.join(default_base_dir, task, dataset_type))
print("\nDownloading dataset:\n task: {}\n dataset type: {}\n hdf5 type: {}\n download path: {}"
.format(task, dataset_type, hdf5_type, download_dir))
url = DATASET_REGISTRY[task][dataset_type][hdf5_type]["url"]
if url is None:
print(
"Skipping {}-{}-{}, no url for dataset exists.".format(task, dataset_type, hdf5_type)
+ " Create this dataset locally by running the appropriate command from robomimic/scripts/extract_obs_from_raw_datasets.sh."
)
continue
if args.dry_run:
print("\ndry run: skip download")
else:
# Make sure path exists and create if it doesn't
os.makedirs(download_dir, exist_ok=True)
FileUtils.download_url(
url=DATASET_REGISTRY[task][dataset_type][hdf5_type]["url"],
download_dir=download_dir,
)
print("")
|