File size: 3,254 Bytes
a8eb6e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/bin/bash
# Preflight check: validates the training environment without GPUs or data
# Run inside the Docker container:
#   docker run --rm pi05-training ./preflight.sh

PASS=0
FAIL=0

check() {
    local name="$1"
    shift
    if "$@" > /dev/null 2>&1; then
        echo "  PASS  $name"
        PASS=$((PASS + 1))
    else
        echo "  FAIL  $name"
        FAIL=$((FAIL + 1))
    fi
}

echo "=== Preflight Checks ==="
echo ""

echo "-- Python & Core Packages --"
check "python 3.10"          python -c "import sys; assert sys.version_info[:2] == (3,10)"
check "torch imports"        python -c "import torch"
check "transformers >= 4.45" python -c "import transformers; v=transformers.__version__; assert tuple(int(x) for x in v.split('.')[:2]) >= (4,45), v"
check "accelerate imports"   python -c "import accelerate"
check "lerobot imports"      python -c "import lerobot"
check "wandb imports"        python -c "import wandb"
check "huggingface_hub"      python -c "import huggingface_hub"

echo ""
echo "-- PaliGemma Config (the previous crash) --"
check "PaliGemma registered" python -c "
from transformers import AutoConfig
# This is what crashed before - CONFIG_MAPPING['paligemma'] was None
c = AutoConfig.for_model('paligemma')
assert c is not None
"

echo ""
echo "-- FFmpeg --"
check "ffmpeg available"     ffmpeg -version
check "ffmpeg version >= 6"  python -c "
import subprocess, re
out = subprocess.check_output(['ffmpeg', '-version']).decode()
ver = int(re.search(r'ffmpeg version (\d+)', out).group(1))
assert ver >= 6, f'ffmpeg {ver} < 6'
"

echo ""
echo "-- Project Files --"
check "filtered_index.json"  test -f /workspace/pi05-so100-diverse/filtered_index.json
check "norm_stats.json"      test -f /workspace/pi05-so100-diverse/norm_stats.json
check "train_cloud.sh"       test -f /workspace/pi05-so100-diverse/train_cloud.sh
check "so100_dataset.py"     test -f /workspace/pi05-so100-diverse/so100_dataset.py

echo ""
echo "-- LeRobot Patches Applied --"
check "patched train script" python -c "
import lerobot.scripts.lerobot_train
import inspect
src = inspect.getsource(lerobot.scripts.lerobot_train)
assert 'early_stop_steps' in src, 'train patch not applied'
"
check "patched factory"      python -c "
import lerobot.datasets.factory
import inspect
src = inspect.getsource(lerobot.datasets.factory)
assert 'so100:' in src, 'factory patch not applied'
"

echo ""
echo "-- Accelerate Multi-GPU Config --"
check "accelerate launch"    accelerate launch --help

echo ""
echo "-- HuggingFace Auth --"
if [ -n "$HF_TOKEN" ]; then
    check "HF_TOKEN valid" python -c "
from huggingface_hub import HfApi
api = HfApi(token='$HF_TOKEN')
api.whoami()
"
else
    echo "  SKIP  HF_TOKEN not set (set it to validate auth + Gemma license)"
fi

echo ""
echo "-- Weights Download (dry check) --"
if [ -n "$HF_TOKEN" ]; then
    check "pi05_base accessible" python -c "
from huggingface_hub import HfApi
api = HfApi(token='$HF_TOKEN')
info = api.model_info('lerobot/pi05_base')
assert info is not None
"
else
    echo "  SKIP  Need HF_TOKEN to check model access"
fi

echo ""
echo "================================"
echo "  Results: $PASS passed, $FAIL failed"
echo "================================"

[ "$FAIL" -eq 0 ] && exit 0 || exit 1