| #!/usr/bin/env bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| set -euo pipefail |
|
|
| SPEED="${SPEED:?SPEED is required (e.g., SPEED=1.0)}" |
| BASE_PORT="${BASE_PORT:-8000}" |
| HOST="${HOST:-0.0.0.0}" |
| NUM_TRIALS="${NUM_TRIALS:-50}" |
| SAVE_VIDEOS="${SAVE_VIDEOS:-1}" |
| PYTHON_CMD="${PYTHON_CMD:-uv run python}" |
|
|
| TS="$(date +%Y%m%d_%H%M%S)" |
| SPEED_TAG="$(printf '%s' "$SPEED" | tr '.' 'p')x" |
| RESULTS_DIR="${RESULTS_DIR:-results/libero_eval_${SPEED_TAG}_${TS}}" |
| VIDEO_DIR="${VIDEO_DIR:-$RESULTS_DIR/videos}" |
| LOG_DIR="${LOG_DIR:-$RESULTS_DIR/logs}" |
| mkdir -p "$RESULTS_DIR" "$VIDEO_DIR" "$LOG_DIR" |
|
|
| |
| |
| RANKS=(0 1 2 3 4 5 6 7) |
| SUITES=(libero_spatial libero_goal libero_object libero_10 libero_10 libero_10 libero_10 libero_10) |
| TASK_IDS=(all all all "0,1" "2,3" "4,5" "6,7" "8,9") |
| LABELS=(spatial goal object long_t0_1 long_t2_3 long_t4_5 long_t6_7 long_t8_9) |
|
|
| if [[ "${#RANKS[@]}" -ne 8 ]]; then |
| echo "Hardcoded for 8 ranks; edit the partition arrays to change." >&2 |
| exit 2 |
| fi |
|
|
| cat <<EOF |
| ==================================================================== |
| LIBERO 8-GPU eval driver |
| speed = $SPEED ($SPEED_TAG) |
| results_dir = $RESULTS_DIR |
| base_port = $BASE_PORT (clients hit $HOST:$((BASE_PORT))..$HOST:$((BASE_PORT+7))) |
| num_trials = $NUM_TRIALS per task |
| save_videos = $SAVE_VIDEOS |
| |
| Partition: |
| EOF |
| for i in "${!RANKS[@]}"; do |
| printf " rank=%d gpu=%d port=%d suite=%-15s task_ids=%-7s -> %s\n" \ |
| "${RANKS[$i]}" "${RANKS[$i]}" "$((BASE_PORT + RANKS[$i]))" \ |
| "${SUITES[$i]}" "${TASK_IDS[$i]}" "${LABELS[$i]}" |
| done |
| cat <<EOF |
| |
| Servers (one per GPU, you must start these separately): |
| for g in 0 1 2 3 4 5 6 7; do |
| CUDA_VISIBLE_DEVICES=\$g $PYTHON_CMD scripts/serve_policy.py \\ |
| policy:checkpoint --policy.config=<your_config> \\ |
| --policy.dir=<your_ckpt_dir> --port=\$((BASE_PORT + g)) & |
| done |
| |
| ==================================================================== |
| EOF |
|
|
| pids=() |
| for i in "${!RANKS[@]}"; do |
| rank="${RANKS[$i]}" |
| port=$((BASE_PORT + rank)) |
| suite="${SUITES[$i]}" |
| ids="${TASK_IDS[$i]}" |
| label="${LABELS[$i]}" |
| results_json="$RESULTS_DIR/${label}_${SPEED_TAG}.json" |
| log_path="$LOG_DIR/${label}_${SPEED_TAG}.log" |
|
|
| echo "Launching rank=$rank ($label, suite=$suite, port=$port) -> $log_path" |
| $PYTHON_CMD scripts/eval_libero_speed.py \ |
| --task-suite-name "$suite" \ |
| --task-ids "$ids" \ |
| --host "$HOST" --port "$port" \ |
| --speed "$SPEED" \ |
| --num-trials-per-task "$NUM_TRIALS" \ |
| --rank "$rank" \ |
| --video-out-path "$VIDEO_DIR/${label}_${SPEED_TAG}" \ |
| --results-json "$results_json" \ |
| $([[ "$SAVE_VIDEOS" == "1" ]] || echo "--no-save-videos") \ |
| >"$log_path" 2>&1 & |
| pids+=("$!") |
| done |
|
|
| echo |
| echo "All 8 ranks launched. Waiting..." |
|
|
| status=0 |
| for i in "${!pids[@]}"; do |
| pid="${pids[$i]}" |
| label="${LABELS[$i]}" |
| if wait "$pid"; then |
| echo "[done] rank=${RANKS[$i]} $label" |
| else |
| echo "[FAIL] rank=${RANKS[$i]} $label (see $LOG_DIR/${label}_${SPEED_TAG}.log)" >&2 |
| status=1 |
| fi |
| done |
|
|
| echo |
| echo "=================== Aggregated summary ===================" |
| $PYTHON_CMD - <<PYEOF |
| import json |
| import pathlib |
| |
| results_dir = pathlib.Path("$RESULTS_DIR") |
| files = sorted(results_dir.glob("*.json")) |
| if not files: |
| print("No result JSONs found in", results_dir) |
| raise SystemExit(0) |
| |
| per_rank = [] |
| for fp in files: |
| with fp.open() as f: |
| d = json.load(f) |
| per_rank.append(d["summary"]) |
| |
| # Per-rank lines |
| for s in per_rank: |
| print(s["summary_line"]) |
| |
| # Cross-rank rollups |
| def _agg(rows, keep_suite=None): |
| eps = [] |
| for fp in files: |
| with fp.open() as f: |
| d = json.load(f) |
| if keep_suite is None or d["summary"]["suite"] == keep_suite: |
| eps.extend(d["episodes"]) |
| if not eps: |
| return None |
| succ = [e for e in eps if e["success"]] |
| return { |
| "n": len(eps), |
| "n_succ": len(succ), |
| "sr": len(succ) / len(eps), |
| "mean_steps_succ": (sum(e["steps"] for e in succ) / len(succ)) if succ else float("nan"), |
| "mean_steps_all": sum(e["steps"] for e in eps) / len(eps), |
| } |
| |
| print() |
| print("--- per-suite rollup ---") |
| for suite in ("libero_spatial", "libero_goal", "libero_object", "libero_10"): |
| r = _agg(per_rank, keep_suite=suite) |
| if r: |
| print(f" {suite:16s} success={r['n_succ']}/{r['n']} ({r['sr']*100:.1f}%) " |
| f"mean_steps_success={r['mean_steps_succ']:.1f} mean_steps_all={r['mean_steps_all']:.1f}") |
| |
| g = _agg(per_rank) |
| if g: |
| print() |
| print(f"GLOBAL (speed=$SPEED): success={g['n_succ']}/{g['n']} ({g['sr']*100:.1f}%) " |
| f"mean_steps_success={g['mean_steps_succ']:.1f} mean_steps_all={g['mean_steps_all']:.1f}") |
| PYEOF |
|
|
| exit "$status" |
|
|