File size: 6,075 Bytes
08ff31f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 | #!/usr/bin/env bash
# Run LIBERO eval on an 8-GPU server, partitioned for time balance:
#
# GPU 0 -> libero_spatial (full suite, 10 tasks x 50 trials = 500 episodes)
# GPU 1 -> libero_goal (full suite, 10 tasks x 50 trials = 500 episodes)
# GPU 2 -> libero_object (full suite, 10 tasks x 50 trials = 500 episodes)
# GPU 3 -> libero_10 t0..1 (2 tasks x 50 trials = 100 episodes, longer rollouts)
# GPU 4 -> libero_10 t2..3
# GPU 5 -> libero_10 t4..5
# GPU 6 -> libero_10 t6..7
# GPU 7 -> libero_10 t8..9
#
# Each GPU points to its own websocket policy server (one per GPU at ports
# $BASE_PORT+0..7 by default). You start the servers separately -- see the
# "Servers" section in the printout below for the canonical commands.
#
# Required env:
# SPEED target speed for this eval pass (e.g., 1.0, 1.5, 2.0)
#
# Optional env (defaults shown):
# BASE_PORT=8000 ports BASE_PORT..BASE_PORT+7 (one per client)
# HOST=0.0.0.0 server hostname
# RESULTS_DIR=results/libero_eval_<SPEED>x_<timestamp>
# VIDEO_DIR=$RESULTS_DIR/videos
# LOG_DIR=$RESULTS_DIR/logs
# NUM_TRIALS=50 trials per task
# SAVE_VIDEOS=1 set to 0 to skip mp4 saves (faster)
# PYTHON_CMD="uv run python"
#
# Example:
# SPEED=1.5 BASE_PORT=8000 ./scripts/eval_libero_8gpu.sh
#
set -euo pipefail
SPEED="${SPEED:?SPEED is required (e.g., SPEED=1.0)}"
BASE_PORT="${BASE_PORT:-8000}"
HOST="${HOST:-0.0.0.0}"
NUM_TRIALS="${NUM_TRIALS:-50}"
SAVE_VIDEOS="${SAVE_VIDEOS:-1}"
PYTHON_CMD="${PYTHON_CMD:-uv run python}"
TS="$(date +%Y%m%d_%H%M%S)"
SPEED_TAG="$(printf '%s' "$SPEED" | tr '.' 'p')x"
RESULTS_DIR="${RESULTS_DIR:-results/libero_eval_${SPEED_TAG}_${TS}}"
VIDEO_DIR="${VIDEO_DIR:-$RESULTS_DIR/videos}"
LOG_DIR="${LOG_DIR:-$RESULTS_DIR/logs}"
mkdir -p "$RESULTS_DIR" "$VIDEO_DIR" "$LOG_DIR"
# Partition: (rank, gpu, suite, task_ids, results_json basename).
# GPU 0/1/2 -> spatial/goal/object full; GPU 3..7 -> libero_10 split 5 ways.
RANKS=(0 1 2 3 4 5 6 7)
SUITES=(libero_spatial libero_goal libero_object libero_10 libero_10 libero_10 libero_10 libero_10)
TASK_IDS=(all all all "0,1" "2,3" "4,5" "6,7" "8,9")
LABELS=(spatial goal object long_t0_1 long_t2_3 long_t4_5 long_t6_7 long_t8_9)
if [[ "${#RANKS[@]}" -ne 8 ]]; then
echo "Hardcoded for 8 ranks; edit the partition arrays to change." >&2
exit 2
fi
cat <<EOF
====================================================================
LIBERO 8-GPU eval driver
speed = $SPEED ($SPEED_TAG)
results_dir = $RESULTS_DIR
base_port = $BASE_PORT (clients hit $HOST:$((BASE_PORT))..$HOST:$((BASE_PORT+7)))
num_trials = $NUM_TRIALS per task
save_videos = $SAVE_VIDEOS
Partition:
EOF
for i in "${!RANKS[@]}"; do
printf " rank=%d gpu=%d port=%d suite=%-15s task_ids=%-7s -> %s\n" \
"${RANKS[$i]}" "${RANKS[$i]}" "$((BASE_PORT + RANKS[$i]))" \
"${SUITES[$i]}" "${TASK_IDS[$i]}" "${LABELS[$i]}"
done
cat <<EOF
Servers (one per GPU, you must start these separately):
for g in 0 1 2 3 4 5 6 7; do
CUDA_VISIBLE_DEVICES=\$g $PYTHON_CMD scripts/serve_policy.py \\
policy:checkpoint --policy.config=<your_config> \\
--policy.dir=<your_ckpt_dir> --port=\$((BASE_PORT + g)) &
done
====================================================================
EOF
pids=()
for i in "${!RANKS[@]}"; do
rank="${RANKS[$i]}"
port=$((BASE_PORT + rank))
suite="${SUITES[$i]}"
ids="${TASK_IDS[$i]}"
label="${LABELS[$i]}"
results_json="$RESULTS_DIR/${label}_${SPEED_TAG}.json"
log_path="$LOG_DIR/${label}_${SPEED_TAG}.log"
echo "Launching rank=$rank ($label, suite=$suite, port=$port) -> $log_path"
$PYTHON_CMD scripts/eval_libero_speed.py \
--task-suite-name "$suite" \
--task-ids "$ids" \
--host "$HOST" --port "$port" \
--speed "$SPEED" \
--num-trials-per-task "$NUM_TRIALS" \
--rank "$rank" \
--video-out-path "$VIDEO_DIR/${label}_${SPEED_TAG}" \
--results-json "$results_json" \
$([[ "$SAVE_VIDEOS" == "1" ]] || echo "--no-save-videos") \
>"$log_path" 2>&1 &
pids+=("$!")
done
echo
echo "All 8 ranks launched. Waiting..."
status=0
for i in "${!pids[@]}"; do
pid="${pids[$i]}"
label="${LABELS[$i]}"
if wait "$pid"; then
echo "[done] rank=${RANKS[$i]} $label"
else
echo "[FAIL] rank=${RANKS[$i]} $label (see $LOG_DIR/${label}_${SPEED_TAG}.log)" >&2
status=1
fi
done
echo
echo "=================== Aggregated summary ==================="
$PYTHON_CMD - <<PYEOF
import json
import pathlib
results_dir = pathlib.Path("$RESULTS_DIR")
files = sorted(results_dir.glob("*.json"))
if not files:
print("No result JSONs found in", results_dir)
raise SystemExit(0)
per_rank = []
for fp in files:
with fp.open() as f:
d = json.load(f)
per_rank.append(d["summary"])
# Per-rank lines
for s in per_rank:
print(s["summary_line"])
# Cross-rank rollups
def _agg(rows, keep_suite=None):
eps = []
for fp in files:
with fp.open() as f:
d = json.load(f)
if keep_suite is None or d["summary"]["suite"] == keep_suite:
eps.extend(d["episodes"])
if not eps:
return None
succ = [e for e in eps if e["success"]]
return {
"n": len(eps),
"n_succ": len(succ),
"sr": len(succ) / len(eps),
"mean_steps_succ": (sum(e["steps"] for e in succ) / len(succ)) if succ else float("nan"),
"mean_steps_all": sum(e["steps"] for e in eps) / len(eps),
}
print()
print("--- per-suite rollup ---")
for suite in ("libero_spatial", "libero_goal", "libero_object", "libero_10"):
r = _agg(per_rank, keep_suite=suite)
if r:
print(f" {suite:16s} success={r['n_succ']}/{r['n']} ({r['sr']*100:.1f}%) "
f"mean_steps_success={r['mean_steps_succ']:.1f} mean_steps_all={r['mean_steps_all']:.1f}")
g = _agg(per_rank)
if g:
print()
print(f"GLOBAL (speed=$SPEED): success={g['n_succ']}/{g['n']} ({g['sr']*100:.1f}%) "
f"mean_steps_success={g['mean_steps_succ']:.1f} mean_steps_all={g['mean_steps_all']:.1f}")
PYEOF
exit "$status"
|