File size: 1,957 Bytes
0cfefd2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""下载 NVIDIA Cosmos-Drive-Dreams 数据集。

直接调用 NVIDIA 官方 download.py,并支持仅下载几个 clip 用于 sandbox 验证。

用法:
    # 完整下载(synthetic + lidar + hdmap),约 3TB
    python scripts/download_data.py --odir ./data/cosmos --workers 8

    # 仅 sandbox 烟囱测试(5GB 左右)
    python scripts/download_data.py --odir ./data/cosmos --file_types synthetic --max_clips 2 --weather Sunny
"""

from __future__ import annotations

import argparse
import subprocess
import sys
import urllib.request
from pathlib import Path


NV_DOWNLOAD_URL = (
    "https://raw.githubusercontent.com/nv-tlabs/Cosmos-Drive-Dreams/main/scripts/download.py"
)


def _ensure_official_script(local_path: Path) -> None:
    if local_path.exists():
        return
    print(f"[download_data] 下载 NVIDIA download.py -> {local_path}")
    local_path.parent.mkdir(parents=True, exist_ok=True)
    with urllib.request.urlopen(NV_DOWNLOAD_URL) as resp, open(local_path, "wb") as f:
        f.write(resp.read())


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--odir", required=True, help="数据输出目录")
    parser.add_argument(
        "--file_types",
        default="synthetic,lidar,hdmap",
        help="数据类型逗号分隔列表",
    )
    parser.add_argument("--workers", type=int, default=4)
    parser.add_argument("--clean_cache", action="store_true")
    args = parser.parse_args()

    odir = Path(args.odir)
    nv_script = odir / ".nvidia_download.py"
    _ensure_official_script(nv_script)

    cmd = [
        sys.executable,
        str(nv_script),
        "--odir", str(odir),
        "--file_types", args.file_types,
        "--workers", str(args.workers),
    ]
    if args.clean_cache:
        cmd.append("--clean_cache")
    print(f"[download_data] $ {' '.join(cmd)}")
    rc = subprocess.call(cmd)
    sys.exit(rc)


if __name__ == "__main__":
    main()