File size: 6,075 Bytes
08ff31f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/env bash
# Run LIBERO eval on an 8-GPU server, partitioned for time balance:
#
#   GPU 0 -> libero_spatial   (full suite, 10 tasks x 50 trials = 500 episodes)
#   GPU 1 -> libero_goal      (full suite, 10 tasks x 50 trials = 500 episodes)
#   GPU 2 -> libero_object    (full suite, 10 tasks x 50 trials = 500 episodes)
#   GPU 3 -> libero_10 t0..1  (2 tasks x 50 trials = 100 episodes, longer rollouts)
#   GPU 4 -> libero_10 t2..3
#   GPU 5 -> libero_10 t4..5
#   GPU 6 -> libero_10 t6..7
#   GPU 7 -> libero_10 t8..9
#
# Each GPU points to its own websocket policy server (one per GPU at ports
# $BASE_PORT+0..7 by default). You start the servers separately -- see the
# "Servers" section in the printout below for the canonical commands.
#
# Required env:
#   SPEED          target speed for this eval pass (e.g., 1.0, 1.5, 2.0)
#
# Optional env (defaults shown):
#   BASE_PORT=8000     ports BASE_PORT..BASE_PORT+7 (one per client)
#   HOST=0.0.0.0       server hostname
#   RESULTS_DIR=results/libero_eval_<SPEED>x_<timestamp>
#   VIDEO_DIR=$RESULTS_DIR/videos
#   LOG_DIR=$RESULTS_DIR/logs
#   NUM_TRIALS=50      trials per task
#   SAVE_VIDEOS=1      set to 0 to skip mp4 saves (faster)
#   PYTHON_CMD="uv run python"
#
# Example:
#   SPEED=1.5 BASE_PORT=8000 ./scripts/eval_libero_8gpu.sh
#
set -euo pipefail

SPEED="${SPEED:?SPEED is required (e.g., SPEED=1.0)}"
BASE_PORT="${BASE_PORT:-8000}"
HOST="${HOST:-0.0.0.0}"
NUM_TRIALS="${NUM_TRIALS:-50}"
SAVE_VIDEOS="${SAVE_VIDEOS:-1}"
PYTHON_CMD="${PYTHON_CMD:-uv run python}"

TS="$(date +%Y%m%d_%H%M%S)"
SPEED_TAG="$(printf '%s' "$SPEED" | tr '.' 'p')x"
RESULTS_DIR="${RESULTS_DIR:-results/libero_eval_${SPEED_TAG}_${TS}}"
VIDEO_DIR="${VIDEO_DIR:-$RESULTS_DIR/videos}"
LOG_DIR="${LOG_DIR:-$RESULTS_DIR/logs}"
mkdir -p "$RESULTS_DIR" "$VIDEO_DIR" "$LOG_DIR"

# Partition: (rank, gpu, suite, task_ids, results_json basename).
# GPU 0/1/2 -> spatial/goal/object full; GPU 3..7 -> libero_10 split 5 ways.
RANKS=(0 1 2 3 4 5 6 7)
SUITES=(libero_spatial libero_goal libero_object libero_10 libero_10 libero_10 libero_10 libero_10)
TASK_IDS=(all all all "0,1" "2,3" "4,5" "6,7" "8,9")
LABELS=(spatial goal object long_t0_1 long_t2_3 long_t4_5 long_t6_7 long_t8_9)

if [[ "${#RANKS[@]}" -ne 8 ]]; then
  echo "Hardcoded for 8 ranks; edit the partition arrays to change." >&2
  exit 2
fi

cat <<EOF
====================================================================
LIBERO 8-GPU eval driver
  speed         = $SPEED ($SPEED_TAG)
  results_dir   = $RESULTS_DIR
  base_port     = $BASE_PORT  (clients hit $HOST:$((BASE_PORT))..$HOST:$((BASE_PORT+7)))
  num_trials    = $NUM_TRIALS per task
  save_videos   = $SAVE_VIDEOS

Partition:
EOF
for i in "${!RANKS[@]}"; do
  printf "  rank=%d gpu=%d port=%d suite=%-15s task_ids=%-7s -> %s\n" \
    "${RANKS[$i]}" "${RANKS[$i]}" "$((BASE_PORT + RANKS[$i]))" \
    "${SUITES[$i]}" "${TASK_IDS[$i]}" "${LABELS[$i]}"
done
cat <<EOF

Servers (one per GPU, you must start these separately):
  for g in 0 1 2 3 4 5 6 7; do
    CUDA_VISIBLE_DEVICES=\$g $PYTHON_CMD scripts/serve_policy.py \\
      policy:checkpoint --policy.config=<your_config> \\
      --policy.dir=<your_ckpt_dir> --port=\$((BASE_PORT + g)) &
  done

====================================================================
EOF

pids=()
for i in "${!RANKS[@]}"; do
  rank="${RANKS[$i]}"
  port=$((BASE_PORT + rank))
  suite="${SUITES[$i]}"
  ids="${TASK_IDS[$i]}"
  label="${LABELS[$i]}"
  results_json="$RESULTS_DIR/${label}_${SPEED_TAG}.json"
  log_path="$LOG_DIR/${label}_${SPEED_TAG}.log"

  echo "Launching rank=$rank ($label, suite=$suite, port=$port) -> $log_path"
  $PYTHON_CMD scripts/eval_libero_speed.py \
    --task-suite-name "$suite" \
    --task-ids "$ids" \
    --host "$HOST" --port "$port" \
    --speed "$SPEED" \
    --num-trials-per-task "$NUM_TRIALS" \
    --rank "$rank" \
    --video-out-path "$VIDEO_DIR/${label}_${SPEED_TAG}" \
    --results-json "$results_json" \
    $([[ "$SAVE_VIDEOS" == "1" ]] || echo "--no-save-videos") \
    >"$log_path" 2>&1 &
  pids+=("$!")
done

echo
echo "All 8 ranks launched. Waiting..."

status=0
for i in "${!pids[@]}"; do
  pid="${pids[$i]}"
  label="${LABELS[$i]}"
  if wait "$pid"; then
    echo "[done] rank=${RANKS[$i]} $label"
  else
    echo "[FAIL] rank=${RANKS[$i]} $label  (see $LOG_DIR/${label}_${SPEED_TAG}.log)" >&2
    status=1
  fi
done

echo
echo "===================  Aggregated summary  ==================="
$PYTHON_CMD - <<PYEOF
import json
import pathlib

results_dir = pathlib.Path("$RESULTS_DIR")
files = sorted(results_dir.glob("*.json"))
if not files:
    print("No result JSONs found in", results_dir)
    raise SystemExit(0)

per_rank = []
for fp in files:
    with fp.open() as f:
        d = json.load(f)
    per_rank.append(d["summary"])

# Per-rank lines
for s in per_rank:
    print(s["summary_line"])

# Cross-rank rollups
def _agg(rows, keep_suite=None):
    eps = []
    for fp in files:
        with fp.open() as f:
            d = json.load(f)
        if keep_suite is None or d["summary"]["suite"] == keep_suite:
            eps.extend(d["episodes"])
    if not eps:
        return None
    succ = [e for e in eps if e["success"]]
    return {
        "n": len(eps),
        "n_succ": len(succ),
        "sr": len(succ) / len(eps),
        "mean_steps_succ": (sum(e["steps"] for e in succ) / len(succ)) if succ else float("nan"),
        "mean_steps_all": sum(e["steps"] for e in eps) / len(eps),
    }

print()
print("--- per-suite rollup ---")
for suite in ("libero_spatial", "libero_goal", "libero_object", "libero_10"):
    r = _agg(per_rank, keep_suite=suite)
    if r:
        print(f"  {suite:16s}  success={r['n_succ']}/{r['n']} ({r['sr']*100:.1f}%)  "
              f"mean_steps_success={r['mean_steps_succ']:.1f}  mean_steps_all={r['mean_steps_all']:.1f}")

g = _agg(per_rank)
if g:
    print()
    print(f"GLOBAL (speed=$SPEED): success={g['n_succ']}/{g['n']} ({g['sr']*100:.1f}%)  "
          f"mean_steps_success={g['mean_steps_succ']:.1f}  mean_steps_all={g['mean_steps_all']:.1f}")
PYEOF

exit "$status"