File size: 6,075 Bytes

08ff31f

#!/usr/bin/env bash
# Run LIBERO eval on an 8-GPU server, partitioned for time balance:
#
#   GPU 0 -> libero_spatial   (full suite, 10 tasks x 50 trials = 500 episodes)
#   GPU 1 -> libero_goal      (full suite, 10 tasks x 50 trials = 500 episodes)
#   GPU 2 -> libero_object    (full suite, 10 tasks x 50 trials = 500 episodes)
#   GPU 3 -> libero_10 t0..1  (2 tasks x 50 trials = 100 episodes, longer rollouts)
#   GPU 4 -> libero_10 t2..3
#   GPU 5 -> libero_10 t4..5
#   GPU 6 -> libero_10 t6..7
#   GPU 7 -> libero_10 t8..9
#
# Each GPU points to its own websocket policy server (one per GPU at ports
# $BASE_PORT+0..7 by default). You start the servers separately -- see the
# "Servers" section in the printout below for the canonical commands.
#
# Required env:
#   SPEED          target speed for this eval pass (e.g., 1.0, 1.5, 2.0)
#
# Optional env (defaults shown):
#   BASE_PORT=8000     ports BASE_PORT..BASE_PORT+7 (one per client)
#   HOST=0.0.0.0       server hostname
#   RESULTS_DIR=results/libero_eval_<SPEED>x_<timestamp>
#   VIDEO_DIR=$RESULTS_DIR/videos
#   LOG_DIR=$RESULTS_DIR/logs
#   NUM_TRIALS=50      trials per task
#   SAVE_VIDEOS=1      set to 0 to skip mp4 saves (faster)
#   PYTHON_CMD="uv run python"
#
# Example:
#   SPEED=1.5 BASE_PORT=8000 ./scripts/eval_libero_8gpu.sh
#
set -euo pipefail

SPEED="${SPEED:?SPEED is required (e.g., SPEED=1.0)}"
BASE_PORT="${BASE_PORT:-8000}"
HOST="${HOST:-0.0.0.0}"
NUM_TRIALS="${NUM_TRIALS:-50}"
SAVE_VIDEOS="${SAVE_VIDEOS:-1}"
PYTHON_CMD="${PYTHON_CMD:-uv run python}"

TS="$(date +%Y%m%d_%H%M%S)"
SPEED_TAG="$(printf '%s' "$SPEED" | tr '.' 'p')x"
RESULTS_DIR="${RESULTS_DIR:-results/libero_eval_${SPEED_TAG}_${TS}}"
VIDEO_DIR="${VIDEO_DIR:-$RESULTS_DIR/videos}"
LOG_DIR="${LOG_DIR:-$RESULTS_DIR/logs}"
mkdir -p "$RESULTS_DIR" "$VIDEO_DIR" "$LOG_DIR"

# Partition: (rank, gpu, suite, task_ids, results_json basename).
# GPU 0/1/2 -> spatial/goal/object full; GPU 3..7 -> libero_10 split 5 ways.
RANKS=(0 1 2 3 4 5 6 7)
SUITES=(libero_spatial libero_goal libero_object libero_10 libero_10 libero_10 libero_10 libero_10)
TASK_IDS=(all all all "0,1" "2,3" "4,5" "6,7" "8,9")
LABELS=(spatial goal object long_t0_1 long_t2_3 long_t4_5 long_t6_7 long_t8_9)

if [[ "${#RANKS[@]}" -ne 8 ]]; then
  echo "Hardcoded for 8 ranks; edit the partition arrays to change." >&2
  exit 2
fi

cat <<EOF
====================================================================
LIBERO 8-GPU eval driver
  speed         = $SPEED ($SPEED_TAG)
  results_dir   = $RESULTS_DIR
  base_port     = $BASE_PORT  (clients hit $HOST:$((BASE_PORT))..$HOST:$((BASE_PORT+7)))
  num_trials    = $NUM_TRIALS per task
  save_videos   = $SAVE_VIDEOS

Partition:
EOF
for i in "${!RANKS[@]}"; do
  printf "  rank=%d gpu=%d port=%d suite=%-15s task_ids=%-7s -> %s\n" \
    "${RANKS[$i]}" "${RANKS[$i]}" "$((BASE_PORT + RANKS[$i]))" \
    "${SUITES[$i]}" "${TASK_IDS[$i]}" "${LABELS[$i]}"
done
cat <<EOF

Servers (one per GPU, you must start these separately):
  for g in 0 1 2 3 4 5 6 7; do
    CUDA_VISIBLE_DEVICES=\$g $PYTHON_CMD scripts/serve_policy.py \\
      policy:checkpoint --policy.config=<your_config> \\
      --policy.dir=<your_ckpt_dir> --port=\$((BASE_PORT + g)) &
  done

====================================================================
EOF

pids=()
for i in "${!RANKS[@]}"; do
  rank="${RANKS[$i]}"
  port=$((BASE_PORT + rank))
  suite="${SUITES[$i]}"
  ids="${TASK_IDS[$i]}"
  label="${LABELS[$i]}"
  results_json="$RESULTS_DIR/${label}_${SPEED_TAG}.json"
  log_path="$LOG_DIR/${label}_${SPEED_TAG}.log"

  echo "Launching rank=$rank ($label, suite=$suite, port=$port) -> $log_path"
  $PYTHON_CMD scripts/eval_libero_speed.py \
    --task-suite-name "$suite" \
    --task-ids "$ids" \
    --host "$HOST" --port "$port" \
    --speed "$SPEED" \
    --num-trials-per-task "$NUM_TRIALS" \
    --rank "$rank" \
    --video-out-path "$VIDEO_DIR/${label}_${SPEED_TAG}" \
    --results-json "$results_json" \
    $([[ "$SAVE_VIDEOS" == "1" ]] || echo "--no-save-videos") \
    >"$log_path" 2>&1 &
  pids+=("$!")
done

echo
echo "All 8 ranks launched. Waiting..."

status=0
for i in "${!pids[@]}"; do
  pid="${pids[$i]}"
  label="${LABELS[$i]}"
  if wait "$pid"; then
    echo "[done] rank=${RANKS[$i]} $label"
  else
    echo "[FAIL] rank=${RANKS[$i]} $label  (see $LOG_DIR/${label}_${SPEED_TAG}.log)" >&2
    status=1
  fi
done

echo
echo "===================  Aggregated summary  ==================="
$PYTHON_CMD - <<PYEOF
import json
import pathlib

results_dir = pathlib.Path("$RESULTS_DIR")
files = sorted(results_dir.glob("*.json"))
if not files:
    print("No result JSONs found in", results_dir)
    raise SystemExit(0)

per_rank = []
for fp in files:
    with fp.open() as f:
        d = json.load(f)
    per_rank.append(d["summary"])

# Per-rank lines
for s in per_rank:
    print(s["summary_line"])

# Cross-rank rollups
def _agg(rows, keep_suite=None):
    eps = []
    for fp in files:
        with fp.open() as f:
            d = json.load(f)
        if keep_suite is None or d["summary"]["suite"] == keep_suite:
            eps.extend(d["episodes"])
    if not eps:
        return None
    succ = [e for e in eps if e["success"]]
    return {
        "n": len(eps),
        "n_succ": len(succ),
        "sr": len(succ) / len(eps),
        "mean_steps_succ": (sum(e["steps"] for e in succ) / len(succ)) if succ else float("nan"),
        "mean_steps_all": sum(e["steps"] for e in eps) / len(eps),
    }

print()
print("--- per-suite rollup ---")
for suite in ("libero_spatial", "libero_goal", "libero_object", "libero_10"):
    r = _agg(per_rank, keep_suite=suite)
    if r:
        print(f"  {suite:16s}  success={r['n_succ']}/{r['n']} ({r['sr']*100:.1f}%)  "
              f"mean_steps_success={r['mean_steps_succ']:.1f}  mean_steps_all={r['mean_steps_all']:.1f}")

g = _agg(per_rank)
if g:
    print()
    print(f"GLOBAL (speed=$SPEED): success={g['n_succ']}/{g['n']} ({g['sr']*100:.1f}%)  "
          f"mean_steps_success={g['mean_steps_succ']:.1f}  mean_steps_all={g['mean_steps_all']:.1f}")
PYEOF

exit "$status"