WJAD / scripts /download_data.py
fuzirui's picture
Sync WJAD codebase
0cfefd2 verified
"""下载 NVIDIA Cosmos-Drive-Dreams 数据集。
直接调用 NVIDIA 官方 download.py,并支持仅下载几个 clip 用于 sandbox 验证。
用法:
# 完整下载(synthetic + lidar + hdmap),约 3TB
python scripts/download_data.py --odir ./data/cosmos --workers 8
# 仅 sandbox 烟囱测试(5GB 左右)
python scripts/download_data.py --odir ./data/cosmos --file_types synthetic --max_clips 2 --weather Sunny
"""
from __future__ import annotations
import argparse
import subprocess
import sys
import urllib.request
from pathlib import Path
NV_DOWNLOAD_URL = (
"https://raw.githubusercontent.com/nv-tlabs/Cosmos-Drive-Dreams/main/scripts/download.py"
)
def _ensure_official_script(local_path: Path) -> None:
if local_path.exists():
return
print(f"[download_data] 下载 NVIDIA download.py -> {local_path}")
local_path.parent.mkdir(parents=True, exist_ok=True)
with urllib.request.urlopen(NV_DOWNLOAD_URL) as resp, open(local_path, "wb") as f:
f.write(resp.read())
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--odir", required=True, help="数据输出目录")
parser.add_argument(
"--file_types",
default="synthetic,lidar,hdmap",
help="数据类型逗号分隔列表",
)
parser.add_argument("--workers", type=int, default=4)
parser.add_argument("--clean_cache", action="store_true")
args = parser.parse_args()
odir = Path(args.odir)
nv_script = odir / ".nvidia_download.py"
_ensure_official_script(nv_script)
cmd = [
sys.executable,
str(nv_script),
"--odir", str(odir),
"--file_types", args.file_types,
"--workers", str(args.workers),
]
if args.clean_cache:
cmd.append("--clean_cache")
print(f"[download_data] $ {' '.join(cmd)}")
rc = subprocess.call(cmd)
sys.exit(rc)
if __name__ == "__main__":
main()