VLAarchtests / code /reveal_vla_bimanual /scripts /run_rlbench_handoff_eval.sh
lsnu's picture
2026-03-25 runpod handoff update
e7d8e79 verified
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="${ROOT_DIR:-/workspace}"
PROJECT_DIR="${ROOT_DIR}/reveal_vla_bimanual"
PYTHON_BIN="${PYTHON_BIN:-${ROOT_DIR}/envs/rlbench/bin/python}"
OUTPUT_ROOT="${OUTPUT_ROOT:-${ROOT_DIR}/reports/rlbench_handoff_matrix}"
EPISODES_PER_TASK="${EPISODES_PER_TASK:-1}"
EPISODE_LENGTH="${EPISODE_LENGTH:-20}"
RESOLUTION="${RESOLUTION:-224}"
CHUNK_COMMIT_STEPS="${CHUNK_COMMIT_STEPS:-4}"
BASELINE_CHECKPOINT="${BASELINE_CHECKPOINT:-${ROOT_DIR}/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/checkpoint_best.pt}"
SPATIAL_CHECKPOINT="${SPATIAL_CHECKPOINT:-${ROOT_DIR}/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial_phase_seed17/checkpoint_best.pt}"
source "${ROOT_DIR}/VLAarchtests_work/environment/runtime_env_vars.sh"
run_sweep() {
local output_dir="$1"
shift
mkdir -p "${output_dir}"
(
cd "${PROJECT_DIR}"
PYTHONPATH="${PROJECT_DIR}" "${PYTHON_BIN}" -m eval.run_peract2_task_sweep "$@"
)
}
mkdir -p "${OUTPUT_ROOT}"
run_sweep \
"${OUTPUT_ROOT}/baseline" \
--checkpoint "${BASELINE_CHECKPOINT}" \
--output-root "${OUTPUT_ROOT}/baseline" \
--run-name-prefix baseline_rgbd_seed17 \
--episodes-per-task "${EPISODES_PER_TASK}" \
--episode-length "${EPISODE_LENGTH}" \
--resolution "${RESOLUTION}" \
--chunk-commit-steps "${CHUNK_COMMIT_STEPS}" \
--allow-unsupervised-planning \
--headless \
--skip-noplan
run_sweep \
"${OUTPUT_ROOT}/spatial_full" \
--checkpoint "${SPATIAL_CHECKPOINT}" \
--output-root "${OUTPUT_ROOT}/spatial_full" \
--run-name-prefix spatial_phase_seed17 \
--episodes-per-task "${EPISODES_PER_TASK}" \
--episode-length "${EPISODE_LENGTH}" \
--resolution "${RESOLUTION}" \
--chunk-commit-steps "${CHUNK_COMMIT_STEPS}" \
--allow-unsupervised-planning \
--headless
run_sweep \
"${OUTPUT_ROOT}/spatial_nogeom" \
--checkpoint "${SPATIAL_CHECKPOINT}" \
--output-root "${OUTPUT_ROOT}/spatial_nogeom" \
--run-name-prefix spatial_phase_nogeom_seed17 \
--episodes-per-task "${EPISODES_PER_TASK}" \
--episode-length "${EPISODE_LENGTH}" \
--resolution "${RESOLUTION}" \
--chunk-commit-steps "${CHUNK_COMMIT_STEPS}" \
--allow-unsupervised-planning \
--headless \
--no-geometry \
--skip-noplan
run_sweep \
"${OUTPUT_ROOT}/spatial_compactwm" \
--checkpoint "${SPATIAL_CHECKPOINT}" \
--output-root "${OUTPUT_ROOT}/spatial_compactwm" \
--run-name-prefix spatial_phase_compactwm_seed17 \
--episodes-per-task "${EPISODES_PER_TASK}" \
--episode-length "${EPISODE_LENGTH}" \
--resolution "${RESOLUTION}" \
--chunk-commit-steps "${CHUNK_COMMIT_STEPS}" \
--allow-unsupervised-planning \
--headless \
--compact-world-model \
--skip-noplan
run_sweep \
"${OUTPUT_ROOT}/spatial_notask" \
--checkpoint "${SPATIAL_CHECKPOINT}" \
--output-root "${OUTPUT_ROOT}/spatial_notask" \
--run-name-prefix spatial_phase_notask_seed17 \
--episodes-per-task "${EPISODES_PER_TASK}" \
--episode-length "${EPISODE_LENGTH}" \
--resolution "${RESOLUTION}" \
--chunk-commit-steps "${CHUNK_COMMIT_STEPS}" \
--allow-unsupervised-planning \
--headless \
--disable-task-conditioning \
--skip-noplan
(
cd "${PROJECT_DIR}"
PYTHONPATH="${PROJECT_DIR}" "${PYTHON_BIN}" -m eval.compare_rlbench_sweeps \
--reference-label baseline_plan \
--output-dir "${OUTPUT_ROOT}/comparison" \
--run "baseline_plan=${OUTPUT_ROOT}/baseline/baseline_rgbd_seed17_plan_split/rollout_eval.json" \
--run "spatial_noplan=${OUTPUT_ROOT}/spatial_full/spatial_phase_seed17_noplan_split/rollout_eval.json" \
--run "spatial_plan=${OUTPUT_ROOT}/spatial_full/spatial_phase_seed17_plan_split/rollout_eval.json" \
--run "spatial_nogeom=${OUTPUT_ROOT}/spatial_nogeom/spatial_phase_nogeom_seed17_plan_split/rollout_eval.json" \
--run "spatial_compactwm=${OUTPUT_ROOT}/spatial_compactwm/spatial_phase_compactwm_seed17_plan_split/rollout_eval.json" \
--run "spatial_notask=${OUTPUT_ROOT}/spatial_notask/spatial_phase_notask_seed17_plan_split/rollout_eval.json"
)