"""下载 NVIDIA Cosmos-Drive-Dreams 数据集。 直接调用 NVIDIA 官方 download.py,并支持仅下载几个 clip 用于 sandbox 验证。 用法: # 完整下载(synthetic + lidar + hdmap),约 3TB python scripts/download_data.py --odir ./data/cosmos --workers 8 # 仅 sandbox 烟囱测试(5GB 左右) python scripts/download_data.py --odir ./data/cosmos --file_types synthetic --max_clips 2 --weather Sunny """ from __future__ import annotations import argparse import subprocess import sys import urllib.request from pathlib import Path NV_DOWNLOAD_URL = ( "https://raw.githubusercontent.com/nv-tlabs/Cosmos-Drive-Dreams/main/scripts/download.py" ) def _ensure_official_script(local_path: Path) -> None: if local_path.exists(): return print(f"[download_data] 下载 NVIDIA download.py -> {local_path}") local_path.parent.mkdir(parents=True, exist_ok=True) with urllib.request.urlopen(NV_DOWNLOAD_URL) as resp, open(local_path, "wb") as f: f.write(resp.read()) def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--odir", required=True, help="数据输出目录") parser.add_argument( "--file_types", default="synthetic,lidar,hdmap", help="数据类型逗号分隔列表", ) parser.add_argument("--workers", type=int, default=4) parser.add_argument("--clean_cache", action="store_true") args = parser.parse_args() odir = Path(args.odir) nv_script = odir / ".nvidia_download.py" _ensure_official_script(nv_script) cmd = [ sys.executable, str(nv_script), "--odir", str(odir), "--file_types", args.file_types, "--workers", str(args.workers), ] if args.clean_cache: cmd.append("--clean_cache") print(f"[download_data] $ {' '.join(cmd)}") rc = subprocess.call(cmd) sys.exit(rc) if __name__ == "__main__": main()