VLAwithVariousSpeed / scripts /eval_libero_8gpu.sh
Alan0928's picture
Upload folder using huggingface_hub
08ff31f verified
Raw
History Blame Contribute Delete
6.08 kB
#!/usr/bin/env bash
# Run LIBERO eval on an 8-GPU server, partitioned for time balance:
#
# GPU 0 -> libero_spatial (full suite, 10 tasks x 50 trials = 500 episodes)
# GPU 1 -> libero_goal (full suite, 10 tasks x 50 trials = 500 episodes)
# GPU 2 -> libero_object (full suite, 10 tasks x 50 trials = 500 episodes)
# GPU 3 -> libero_10 t0..1 (2 tasks x 50 trials = 100 episodes, longer rollouts)
# GPU 4 -> libero_10 t2..3
# GPU 5 -> libero_10 t4..5
# GPU 6 -> libero_10 t6..7
# GPU 7 -> libero_10 t8..9
#
# Each GPU points to its own websocket policy server (one per GPU at ports
# $BASE_PORT+0..7 by default). You start the servers separately -- see the
# "Servers" section in the printout below for the canonical commands.
#
# Required env:
# SPEED target speed for this eval pass (e.g., 1.0, 1.5, 2.0)
#
# Optional env (defaults shown):
# BASE_PORT=8000 ports BASE_PORT..BASE_PORT+7 (one per client)
# HOST=0.0.0.0 server hostname
# RESULTS_DIR=results/libero_eval_<SPEED>x_<timestamp>
# VIDEO_DIR=$RESULTS_DIR/videos
# LOG_DIR=$RESULTS_DIR/logs
# NUM_TRIALS=50 trials per task
# SAVE_VIDEOS=1 set to 0 to skip mp4 saves (faster)
# PYTHON_CMD="uv run python"
#
# Example:
# SPEED=1.5 BASE_PORT=8000 ./scripts/eval_libero_8gpu.sh
#
set -euo pipefail
SPEED="${SPEED:?SPEED is required (e.g., SPEED=1.0)}"
BASE_PORT="${BASE_PORT:-8000}"
HOST="${HOST:-0.0.0.0}"
NUM_TRIALS="${NUM_TRIALS:-50}"
SAVE_VIDEOS="${SAVE_VIDEOS:-1}"
PYTHON_CMD="${PYTHON_CMD:-uv run python}"
TS="$(date +%Y%m%d_%H%M%S)"
SPEED_TAG="$(printf '%s' "$SPEED" | tr '.' 'p')x"
RESULTS_DIR="${RESULTS_DIR:-results/libero_eval_${SPEED_TAG}_${TS}}"
VIDEO_DIR="${VIDEO_DIR:-$RESULTS_DIR/videos}"
LOG_DIR="${LOG_DIR:-$RESULTS_DIR/logs}"
mkdir -p "$RESULTS_DIR" "$VIDEO_DIR" "$LOG_DIR"
# Partition: (rank, gpu, suite, task_ids, results_json basename).
# GPU 0/1/2 -> spatial/goal/object full; GPU 3..7 -> libero_10 split 5 ways.
RANKS=(0 1 2 3 4 5 6 7)
SUITES=(libero_spatial libero_goal libero_object libero_10 libero_10 libero_10 libero_10 libero_10)
TASK_IDS=(all all all "0,1" "2,3" "4,5" "6,7" "8,9")
LABELS=(spatial goal object long_t0_1 long_t2_3 long_t4_5 long_t6_7 long_t8_9)
if [[ "${#RANKS[@]}" -ne 8 ]]; then
echo "Hardcoded for 8 ranks; edit the partition arrays to change." >&2
exit 2
fi
cat <<EOF
====================================================================
LIBERO 8-GPU eval driver
speed = $SPEED ($SPEED_TAG)
results_dir = $RESULTS_DIR
base_port = $BASE_PORT (clients hit $HOST:$((BASE_PORT))..$HOST:$((BASE_PORT+7)))
num_trials = $NUM_TRIALS per task
save_videos = $SAVE_VIDEOS
Partition:
EOF
for i in "${!RANKS[@]}"; do
printf " rank=%d gpu=%d port=%d suite=%-15s task_ids=%-7s -> %s\n" \
"${RANKS[$i]}" "${RANKS[$i]}" "$((BASE_PORT + RANKS[$i]))" \
"${SUITES[$i]}" "${TASK_IDS[$i]}" "${LABELS[$i]}"
done
cat <<EOF
Servers (one per GPU, you must start these separately):
for g in 0 1 2 3 4 5 6 7; do
CUDA_VISIBLE_DEVICES=\$g $PYTHON_CMD scripts/serve_policy.py \\
policy:checkpoint --policy.config=<your_config> \\
--policy.dir=<your_ckpt_dir> --port=\$((BASE_PORT + g)) &
done
====================================================================
EOF
pids=()
for i in "${!RANKS[@]}"; do
rank="${RANKS[$i]}"
port=$((BASE_PORT + rank))
suite="${SUITES[$i]}"
ids="${TASK_IDS[$i]}"
label="${LABELS[$i]}"
results_json="$RESULTS_DIR/${label}_${SPEED_TAG}.json"
log_path="$LOG_DIR/${label}_${SPEED_TAG}.log"
echo "Launching rank=$rank ($label, suite=$suite, port=$port) -> $log_path"
$PYTHON_CMD scripts/eval_libero_speed.py \
--task-suite-name "$suite" \
--task-ids "$ids" \
--host "$HOST" --port "$port" \
--speed "$SPEED" \
--num-trials-per-task "$NUM_TRIALS" \
--rank "$rank" \
--video-out-path "$VIDEO_DIR/${label}_${SPEED_TAG}" \
--results-json "$results_json" \
$([[ "$SAVE_VIDEOS" == "1" ]] || echo "--no-save-videos") \
>"$log_path" 2>&1 &
pids+=("$!")
done
echo
echo "All 8 ranks launched. Waiting..."
status=0
for i in "${!pids[@]}"; do
pid="${pids[$i]}"
label="${LABELS[$i]}"
if wait "$pid"; then
echo "[done] rank=${RANKS[$i]} $label"
else
echo "[FAIL] rank=${RANKS[$i]} $label (see $LOG_DIR/${label}_${SPEED_TAG}.log)" >&2
status=1
fi
done
echo
echo "=================== Aggregated summary ==================="
$PYTHON_CMD - <<PYEOF
import json
import pathlib
results_dir = pathlib.Path("$RESULTS_DIR")
files = sorted(results_dir.glob("*.json"))
if not files:
print("No result JSONs found in", results_dir)
raise SystemExit(0)
per_rank = []
for fp in files:
with fp.open() as f:
d = json.load(f)
per_rank.append(d["summary"])
# Per-rank lines
for s in per_rank:
print(s["summary_line"])
# Cross-rank rollups
def _agg(rows, keep_suite=None):
eps = []
for fp in files:
with fp.open() as f:
d = json.load(f)
if keep_suite is None or d["summary"]["suite"] == keep_suite:
eps.extend(d["episodes"])
if not eps:
return None
succ = [e for e in eps if e["success"]]
return {
"n": len(eps),
"n_succ": len(succ),
"sr": len(succ) / len(eps),
"mean_steps_succ": (sum(e["steps"] for e in succ) / len(succ)) if succ else float("nan"),
"mean_steps_all": sum(e["steps"] for e in eps) / len(eps),
}
print()
print("--- per-suite rollup ---")
for suite in ("libero_spatial", "libero_goal", "libero_object", "libero_10"):
r = _agg(per_rank, keep_suite=suite)
if r:
print(f" {suite:16s} success={r['n_succ']}/{r['n']} ({r['sr']*100:.1f}%) "
f"mean_steps_success={r['mean_steps_succ']:.1f} mean_steps_all={r['mean_steps_all']:.1f}")
g = _agg(per_rank)
if g:
print()
print(f"GLOBAL (speed=$SPEED): success={g['n_succ']}/{g['n']} ({g['sr']*100:.1f}%) "
f"mean_steps_success={g['mean_steps_succ']:.1f} mean_steps_all={g['mean_steps_all']:.1f}")
PYEOF
exit "$status"