diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/README.md b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1730887a8d900eacfc2a7644e50b1cac5a79decd --- /dev/null +++ b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/README.md @@ -0,0 +1,114 @@ +# reveal_vla_bimanual + +Simulation-first prototype for a language-conditioned bimanual reveal-and-retrieve policy under elastic occlusion. + +This repo is not a generalist VLA backbone in the RT-2 / OpenVLA / Octo sense. The current contribution is the reveal-state machinery layered on top of a frozen vision-language encoder. + +This repo is structured around five top-level modules: + +- `sim_rlbench/`: RLBench2 / PerAct2 wrappers, dataset hooks, camera setup, and benchmark evaluation helpers. +- `sim_reveal/`: reveal-proxy environments, scripted teachers, and privileged label extraction. +- `models/`: shared backbone wrappers, multi-view fusion, bimanual decoder, reveal-state head, world model, and planner. +- `train/`: trainers, losses, checkpointing, and Hydra/YAML configs. +- `eval/`: benchmark scripts, ablations, metrics, plots, and report generation. + +Current bootstrap priorities: + +1. Reproduce the RLBench2 / PerAct2 stack with a fixed 3-camera interface. +2. Stand up a backbone-only 3-camera policy in the same training/eval harness. +3. Add reveal-state supervision and short-horizon planning for synthetic reveal proxies. + +## Public benchmark package + +The repo now includes a concrete public-benchmark package definition for the next-stage fair comparison: + +- `eval/public_benchmark_package.py` + - track registry for bag, dense occluded retrieval, cloth retrieval, and the generic anchor + - same-protocol signatures across `trunk_only`, `adapter_noop`, and `adapter_active` + - same-data / same-init fairness signatures for `trunk_only_ft` vs `adapter_active_ft` + +- `eval/run_public_benchmark_package.py` + - validates normalized result files from multiple public suites + - checks protocol identity and training fairness + - aggregates per-track gains, sign-of-life diagnostics, and anchor regressions + +Write the default manifest to `~/workspace` with: + +```bash +python -m eval.run_public_benchmark_package \ + --write-default-manifest ~/workspace/public_benchmark_package_v1.json +``` + +Summarize normalized result files with: + +```bash +python -m eval.run_public_benchmark_package \ + --result /abs/path/result_a.json \ + --result /abs/path/result_b.json \ + --output-dir ~/workspace/reports/public_benchmark_package_v1 +``` + +Upstream dependencies are kept in `/workspace/third_party` and pinned in `docs/upstream_pins.md`. + +## RLBench env A + +The RLBench / PerAct2 stack is pinned to Python 3.10 and lives in `/workspace/envs/rlbench`. + +Bring it up with: + +```bash +/workspace/reveal_vla_bimanual/scripts/setup_env_a_rlbench.sh +/workspace/reveal_vla_bimanual/scripts/setup_rlbench_headless_x.sh +/workspace/reveal_vla_bimanual/scripts/start_rlbench_x.sh +``` + +Verify GPU GL on the headless display: + +```bash +DISPLAY=:99 glxinfo -B +``` + +Run the RLBench launch/reset/step smoke test: + +```bash +env \ + DISPLAY=:99 \ + XDG_RUNTIME_DIR=/tmp/runtime-root \ + COPPELIASIM_ROOT=/workspace/assets/coppeliasim_v4_1_0 \ + LD_LIBRARY_PATH=/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu:/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu/nvidia:/workspace/assets/coppeliasim_v4_1_0 \ + QT_QPA_PLATFORM_PLUGIN_PATH=/workspace/assets/coppeliasim_v4_1_0 \ + /workspace/.tools/micromamba/bin/micromamba run \ + -r /workspace/.micromamba \ + -p /workspace/envs/rlbench \ + python -m sim_rlbench.launch_smoke --headless +``` + +The working benchmark interface is fixed to three cameras only: + +- `front` +- `wrist_left` +- `wrist_right` + +The smoke test covers launch, bimanual task reset, canonical observation extraction, and one bimanual action step in `headless=True`, which is the same mode used by the upstream PerAct2-style training stack. + +Generate the PerAct2-compatible train command for the fixed 3-camera interface with: + +```bash +micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \ + python -m sim_rlbench.smoke_test --print-train-command +``` + +Download the published PerAct2 demos into `/workspace/data/rlbench2` with checksum verification: + +```bash +micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \ + python -m sim_rlbench.dataset_download --resolution 256 --splits train +``` + +If you want the archives unpacked directly into the demo root expected by RLBench, add `--extract`: + +```bash +apt-get install -y squashfs-tools +micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \ + python -m sim_rlbench.dataset_download --resolution 256 --splits train --extract +``` diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/adapter_stack.md b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/adapter_stack.md new file mode 100644 index 0000000000000000000000000000000000000000..3f0124bc535f8574f7c956e362a5bd934a4cbd80 --- /dev/null +++ b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/adapter_stack.md @@ -0,0 +1,87 @@ +# Adapter Stack + +This repo now contains a no-op-safe `trunk + adapter` path alongside the legacy monolithic policies. + +## Main classes + +- `models/policy.py` + - `FoundationTrunkPolicy` + - `ElasticOcclusionAdapter` + - `AdapterWrappedPolicy` + +- `models/backbones.py` + - `NoOpAdapterCompatibleTrunkOutput` + - `TrunkInterface` + +- `models/action_decoder.py` + - `TaskRoutedProposalPrior` + +- `models/planner.py` + - `ElasticFeasibilityGate` + - `ResidualActionReranker` + - `AdapterPlanner` + +- `models/world_model.py` + - `LightweightRevealStateTransitionModel` + +- `models/observation_memory.py` + - `RevealStateCache` + +## Trainer modes + +`train/trainer.py` now supports: + +- `policy_type: adapter_wrapped` +- `policy_type: foundation_trunk` + +Relevant trainer fields: + +- `training_regime` +- `eval_mode` +- `adapter_mode` +- `adapter_use_transition_model` +- `adapter_use_task_conditioning` + +## Guardrail tests + +New tests: + +- `tests/test_trunk_noop_equivalence.py` +- `tests/test_adapter_gate_blocks_unsafe_retrieve.py` +- `tests/test_task_specific_loss_masking.py` +- `tests/test_cloth_specific_metrics_affect_selection.py` +- `tests/test_general_eval_protocol_is_identical.py` + +## Config templates + +- `train/configs/proxy_adapter_wrapped_clip_base.yaml` +- `train/configs/proxy_adapter_wrapped_clip_rank_only.yaml` +- `train/configs/proxy_adapter_wrapped_clip_noop_eval.yaml` + +## Benchmark wrappers + +- `scripts/run_anchor_adapter_ablations.sh` +- `scripts/run_proxy_adapter_ablations.sh` +- `scripts/run_target_like_adapter_subset.sh` +- `eval/public_benchmark_package.py` +- `eval/run_public_benchmark_package.py` + +All new configs and scripts default to `~/workspace` outputs and reports. + +## Public benchmark package + +The public benchmark package is the current fair-comparison contract for real benchmarks: + +- target tracks: + - `bag_track` -> `BEHAVIOR-1K/unpacking_childs_bag-0` + - `occlusion_track` -> `ManiSkill/PickClutterYCB-v1` + - `cloth_track` -> `GarmentLab/grasp_protocol_stacked_garment` +- anchor track: + - `anchor_track` -> `AnyBimanual/dual_push_buttons` + +The package code enforces: + +- mode-invariant eval protocols per track +- same-data / same-init fairness for `trunk_only_ft` vs `adapter_active_ft` +- sign-of-life thresholds on intervention and non-base proposal selection +- no-regression tolerance on the trusted generic anchor diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/public_benchmark_package.md b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/public_benchmark_package.md new file mode 100644 index 0000000000000000000000000000000000000000..0177b97231a41624f30373a81a1fc150ad2599eb --- /dev/null +++ b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/public_benchmark_package.md @@ -0,0 +1,73 @@ +# Public Benchmark Package + +This repo now contains a concrete public-benchmark package for the real-sim phase. + +## Tracks + +- `bag_track` + - suite: `BEHAVIOR-1K` + - task: `unpacking_childs_bag-0` +- `occlusion_track` + - suite: `ManiSkill 3` + - task: `PickClutterYCB-v1` +- `cloth_track` + - suite: `GarmentLab` + - task slice: `grasp_protocol_stacked_garment` +- `anchor_track` + - suite: `AnyBimanual` + - task: `dual_push_buttons` + +## Enforced fairness + +- `trunk_only_ft` and `adapter_active_ft` must share: + - train demos + - val demos + - init checkpoint group + - optimizer + - LR schedule + - batch size + - augmentations + - early stopping metric + - max gradient steps + - unfrozen trunk scope + - dataset split id +- all modes on a track must share the same eval protocol signature +- anchor regressions are bounded by an absolute tolerance of `0.02` + +## Normalized result schema + +Each external benchmark run should be converted to one JSON object with: + +- `track_id` +- `adapter_mode` +- `successes` or `success_rate` +- `episodes` +- `seed` +- `eval_protocol` +- for target tracks: `train_spec` +- optional diagnostics: + - `intervention_rate` + - `non_base_selection_rate` + - `steps_to_first_reveal_or_access` + - `steps_to_retrieve` + - `disturbance_proxy` + +## Commands + +Write the default manifest: + +```bash +python -m eval.run_public_benchmark_package \ + --write-default-manifest ~/workspace/public_benchmark_package_v1.json +``` + +Summarize results: + +```bash +python -m eval.run_public_benchmark_package \ + --result /abs/path/behavior_bag_adapter_active_seed17.json \ + --result /abs/path/behavior_bag_trunk_seed17.json \ + --result /abs/path/maniskill_occlusion_adapter_active_seed17.json \ + --result /abs/path/maniskill_occlusion_trunk_seed17.json \ + --output-dir ~/workspace/reports/public_benchmark_package_v1 +``` diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab2f73f5df8933135137c07c5b5dbf3233e29c5f Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e579f5b56dbaea4b5b32d43e57363c285b9a6c6 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3cc8ab3be87464ceb7b1619ae30e450d2d2e5bf3 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dbe603f6360e30f5eb855928ef43ea83e952e028 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/protocols.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/protocols.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7d17f9fe6deb61339b57c3a3a8efe75508835ecb Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/protocols.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/public_benchmark_package.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/public_benchmark_package.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13afb66c5f0762f7d73c3e39cfba7c05b470dd9e Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/public_benchmark_package.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/public_benchmark_package.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/public_benchmark_package.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7c1bca21b906fca30265d3fb68ce16037628c142 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/public_benchmark_package.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/report.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/report.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f431242c62131998b9b34e08a217c32986868c05 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/report.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/report.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/report.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..35252f6311e53ce61c0fb4c06de1647b24b034e0 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/report.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_public_benchmark_package.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_public_benchmark_package.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..56ed4e0d2393c020fa10b9cbc6af39ba83314169 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_public_benchmark_package.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_public_benchmark_package.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_public_benchmark_package.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..64bb523a3b7f0ccb80ccf98a46955ee056e8bf2d Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_public_benchmark_package.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..83db399f519a73c58a40749a008d39288ce762b3 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5da885139c952f3e78d49c3bc1303194840e1d62 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/public_benchmark_package.py b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/public_benchmark_package.py new file mode 100644 index 0000000000000000000000000000000000000000..e4589b4282662c94d70c01424da25025e419895e --- /dev/null +++ b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/public_benchmark_package.py @@ -0,0 +1,266 @@ +from __future__ import annotations + +import json +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Any, Sequence + + +TARGET_ROLE = "target" +ANCHOR_ROLE = "anchor" + +TARGET_TRACK_EVAL_MODES: tuple[str, ...] = ( + "trunk_only_ft", + "adapter_noop", + "adapter_active_ft", +) +ANCHOR_TRACK_EVAL_MODES: tuple[str, ...] = ( + "trunk_only", + "adapter_noop", + "adapter_active", +) + +DEFAULT_TARGET_TRAIN_DEMOS = 64 +DEFAULT_TARGET_VAL_DEMOS = 16 +DEFAULT_TARGET_TEST_EPISODES = 100 +DEFAULT_ANCHOR_EPISODES = 25 +DEFAULT_RESOLUTION = 256 +DEFAULT_ANCHOR_TOLERANCE = 0.02 +DEFAULT_SIGN_OF_LIFE_INTERVENTION = 0.15 +DEFAULT_SIGN_OF_LIFE_NON_BASE = 0.15 +DEFAULT_SIGN_OF_LIFE_GAIN = 0.05 + + +@dataclass(frozen=True) +class PublicBenchmarkTrack: + track_id: str + suite: str + benchmark_task: str + role: str + task_family: str + target_behavior: str + public_source: str + notes: str = "" + success_metric: str = "success_rate" + + +PUBLIC_BENCHMARK_TRACKS: tuple[PublicBenchmarkTrack, ...] = ( + PublicBenchmarkTrack( + track_id="bag_track", + suite="behavior1k", + benchmark_task="unpacking_childs_bag-0", + role=TARGET_ROLE, + task_family="bag_retrieval", + target_behavior="retrieve target objects from an opened compliant bag or backpack", + public_source="https://behavior.stanford.edu/knowledgebase/tasks/", + notes=( + "Closest public bag retrieval task. Treat as the benchmark-standard bag opening / " + "retrieval slice." + ), + ), + PublicBenchmarkTrack( + track_id="occlusion_track", + suite="maniskill3", + benchmark_task="PickClutterYCB-v1", + role=TARGET_ROLE, + task_family="dense_occluded_retrieval", + target_behavior="retrieve a target object from dense occluding clutter", + public_source="https://maniskill.readthedocs.io/en/latest/tasks/table_top_gripper/index.html", + notes=( + "Closest maintained public occluded retrieval task. Treat as the canopy / dense " + "occlusion proxy." + ), + ), + PublicBenchmarkTrack( + track_id="cloth_track", + suite="garmentlab", + benchmark_task="grasp_protocol_stacked_garment", + role=TARGET_ROLE, + task_family="cloth_retrieval", + target_behavior="retrieve a hidden or partially covered object from stacked or cluttered garments", + public_source="https://garmentlab.readthedocs.io/en/latest/tutorial/realworldbenchmark/index.html", + notes=( + "Use the GarmentLab grasp protocol in stacked/clutter layouts as the closest public " + "cloth retrieval benchmark slice." + ), + ), + PublicBenchmarkTrack( + track_id="anchor_track", + suite="anybimanual", + benchmark_task="dual_push_buttons", + role=ANCHOR_ROLE, + task_family="generic_anchor", + target_behavior="generic bimanual control regression anchor", + public_source="https://arxiv.org/abs/2412.06779", + notes="Trusted public anchor on this setup. Keep as a no-regression track only.", + ), +) + + +def public_benchmark_tracks(role: str | None = None) -> list[PublicBenchmarkTrack]: + if role is None: + return list(PUBLIC_BENCHMARK_TRACKS) + return [track for track in PUBLIC_BENCHMARK_TRACKS if track.role == role] + + +def public_track_by_id(track_id: str) -> PublicBenchmarkTrack: + normalized = str(track_id).strip() + for track in PUBLIC_BENCHMARK_TRACKS: + if track.track_id == normalized: + return track + raise KeyError(f"Unknown public benchmark track: {track_id!r}") + + +def expected_eval_modes(track_id: str) -> tuple[str, ...]: + track = public_track_by_id(track_id) + if track.role == TARGET_ROLE: + return TARGET_TRACK_EVAL_MODES + return ANCHOR_TRACK_EVAL_MODES + + +def build_public_eval_protocol( + *, + track_id: str, + eval_mode: str, + seed: int = 17, + episodes: int | None = None, + resolution: int = DEFAULT_RESOLUTION, + cameras: Sequence[str] = ("front", "left_wrist", "right_wrist"), +) -> dict[str, Any]: + track = public_track_by_id(track_id) + expected = expected_eval_modes(track.track_id) + mode = str(eval_mode) + if mode not in expected: + raise ValueError(f"Unexpected eval mode {mode!r} for track {track.track_id!r}. Expected one of {expected}.") + if episodes is None: + episodes = DEFAULT_TARGET_TEST_EPISODES if track.role == TARGET_ROLE else DEFAULT_ANCHOR_EPISODES + return { + "track_id": track.track_id, + "suite": track.suite, + "benchmark_task": track.benchmark_task, + "role": track.role, + "eval_mode": mode, + "seed": int(seed), + "episodes": int(episodes), + "resolution": int(resolution), + "cameras": tuple(str(camera) for camera in cameras), + "observation_stack": "rgbd_3cam", + "action_horizon": 8, + "action_space": "bimanual_delta_pose", + "same_test_episodes": True, + } + + +def public_protocol_identity_signature(protocol: dict[str, Any]) -> tuple[object, ...]: + return ( + protocol["track_id"], + protocol["suite"], + protocol["benchmark_task"], + protocol["role"], + protocol["seed"], + protocol["episodes"], + protocol["resolution"], + tuple(protocol["cameras"]), + protocol["observation_stack"], + protocol["action_horizon"], + protocol["action_space"], + protocol["same_test_episodes"], + ) + + +def build_target_training_spec( + *, + track_id: str, + model_variant: str, + seed: int, + train_demos: int = DEFAULT_TARGET_TRAIN_DEMOS, + val_demos: int = DEFAULT_TARGET_VAL_DEMOS, + init_checkpoint_group: str = "shared_public_trunk", + optimizer: str = "adamw", + learning_rate: float = 3e-4, + lr_schedule: str = "cosine", + batch_size: int = 32, + augmentations: str = "matched_rgbd_aug_v1", + early_stopping_metric: str = "val_success", + max_gradient_steps: int = 20_000, + unfreeze_scope: str = "matched_trunk_scope", + dataset_split_id: str | None = None, +) -> dict[str, Any]: + track = public_track_by_id(track_id) + if track.role != TARGET_ROLE: + raise ValueError(f"Target training spec is only valid for target tracks, got {track_id!r}.") + return { + "track_id": track.track_id, + "suite": track.suite, + "benchmark_task": track.benchmark_task, + "model_variant": str(model_variant), + "seed": int(seed), + "train_demos": int(train_demos), + "val_demos": int(val_demos), + "init_checkpoint_group": str(init_checkpoint_group), + "optimizer": str(optimizer), + "learning_rate": float(learning_rate), + "lr_schedule": str(lr_schedule), + "batch_size": int(batch_size), + "augmentations": str(augmentations), + "early_stopping_metric": str(early_stopping_metric), + "max_gradient_steps": int(max_gradient_steps), + "unfreeze_scope": str(unfreeze_scope), + "dataset_split_id": dataset_split_id or f"{track.track_id}_shared_split_seed{int(seed)}", + "same_data_policy": True, + "same_init_policy": True, + } + + +def training_fairness_signature(spec: dict[str, Any]) -> tuple[object, ...]: + return ( + spec["track_id"], + spec["suite"], + spec["benchmark_task"], + spec["seed"], + spec["train_demos"], + spec["val_demos"], + spec["init_checkpoint_group"], + spec["optimizer"], + spec["learning_rate"], + spec["lr_schedule"], + spec["batch_size"], + spec["augmentations"], + spec["early_stopping_metric"], + spec["max_gradient_steps"], + spec["unfreeze_scope"], + spec["dataset_split_id"], + spec["same_data_policy"], + spec["same_init_policy"], + ) + + +def default_public_benchmark_manifest() -> dict[str, Any]: + return { + "package_name": "public_reveal_retrieve_package_v1", + "tracks": [asdict(track) for track in PUBLIC_BENCHMARK_TRACKS], + "target_track_ids": [track.track_id for track in public_benchmark_tracks(TARGET_ROLE)], + "anchor_track_ids": [track.track_id for track in public_benchmark_tracks(ANCHOR_ROLE)], + "target_eval_modes": list(TARGET_TRACK_EVAL_MODES), + "anchor_eval_modes": list(ANCHOR_TRACK_EVAL_MODES), + "defaults": { + "target_train_demos": DEFAULT_TARGET_TRAIN_DEMOS, + "target_val_demos": DEFAULT_TARGET_VAL_DEMOS, + "target_test_episodes": DEFAULT_TARGET_TEST_EPISODES, + "anchor_episodes": DEFAULT_ANCHOR_EPISODES, + "resolution": DEFAULT_RESOLUTION, + }, + "thresholds": { + "anchor_tolerance": DEFAULT_ANCHOR_TOLERANCE, + "sign_of_life_intervention_rate": DEFAULT_SIGN_OF_LIFE_INTERVENTION, + "sign_of_life_non_base_selection_rate": DEFAULT_SIGN_OF_LIFE_NON_BASE, + "sign_of_life_success_gain": DEFAULT_SIGN_OF_LIFE_GAIN, + }, + } + + +def write_default_public_benchmark_manifest(output_path: str | Path) -> Path: + path = Path(output_path) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(default_public_benchmark_manifest(), indent=2, sort_keys=True) + "\n", encoding="utf-8") + return path diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_maniskill_bridge_retrieval_smoke.py b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_maniskill_bridge_retrieval_smoke.py new file mode 100644 index 0000000000000000000000000000000000000000..8f8ddab34b05feb9b2ce0ff216cd0c51f569d601 --- /dev/null +++ b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_maniskill_bridge_retrieval_smoke.py @@ -0,0 +1,2037 @@ +from __future__ import annotations + +import argparse +import json +import math +import os +import sys +from collections import deque +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Sequence + +import numpy as np +import torch +import torch.nn.functional as F + + +CODE_ROOT = Path(__file__).resolve().parents[1] +if str(CODE_ROOT) not in sys.path: + sys.path.insert(0, str(CODE_ROOT)) + + +def _configure_runtime_env() -> None: + os.environ.setdefault("VK_ICD_FILENAMES", "/workspace/runtime/vulkan/icd.d/nvidia_icd_egl.json") + os.environ.setdefault("VK_LAYER_PATH", "/workspace/runtime/vulkan/implicit_layer.d") + os.environ.setdefault("XDG_RUNTIME_DIR", "/tmp/runtime-root") + os.environ["MS_ASSET_DIR"] = "/workspace/.maniskill" + + +_configure_runtime_env() + +from eval.run_maniskill_pickclutter_smoke import ( + DEFAULT_INIT_CHECKPOINT, + HISTORY_STEPS, + MAX_MACRO_STEPS, + NUM_APPROACH_TEMPLATES, + PROPRIO_DIM, + ROLL_OUT_HORIZON, + SMOKE_ADAPTER_CONFIDENCE_THRESHOLD, + SMOKE_RETRIEVE_ACCESS_THRESHOLD, + SMOKE_RETRIEVE_PERSISTENCE_THRESHOLD, + SMOKE_RETRIEVE_REOCCLUSION_THRESHOLD, + SMOKE_RETRIEVE_SUPPORT_THRESHOLD, + STATE_METRIC_MASK, + STATE_SUPERVISION_METRICS, + SUPPORT_MODE_HOLD, + SUPPORT_MODE_PASSIVE, + SUPPORT_MODE_TRANSFER, + _aggregate_epoch, + _apply_smoke_planner_overrides, + _history_stack, + _init_history_entry, + _load_checkpoint, + _load_init_bundle, + _make_loader, + _save_training_checkpoint, +) +from eval.run_public_benchmark_package import summarize_public_benchmark_package +from models.action_decoder import ChunkDecoderConfig, TASK_INDEX, semantic_macro_chunk +from train.losses import LossWeights, compute_total_loss +from train.run_experiment import _load_init_checkpoint, _move_batch_to_device +from train.trainer import BimanualTrainer, TrainerConfig, apply_trainable_parameter_prefixes, build_policy + +import gymnasium as gym # noqa: E402 +import mani_skill.envs # noqa: E402 +from mani_skill.utils.structs.pose import Pose # noqa: E402 + + +WORKSPACE_ROOT = Path("/workspace/workspace") +SMOKE_VERSION = "bridge_smoke_v1" +CAMERA_NAMES = ("front", "left", "right") +IMAGE_RESOLUTION = 224 +DEFAULT_SEED = 17 +VIEW_VISIBILITY_SCALE = 0.0125 +CLOTH_HIDDEN_SETTLE_STEPS = 25 +CLOTH_SUCCESS_MIN_Y_DELTA = 0.10 +CLOTH_SUCCESS_MIN_PLANAR_DELTA = 0.10 +CLOTH_SUCCESS_MIN_VISIBILITY = 0.45 +CLOTH_FIXED_SOURCE_X = -0.235 +CLOTH_FIXED_SOURCE_Y = -0.094 +CLOTH_FIXED_SOURCE_Z = 0.8748 +CLOTH_FIXED_COVER_X = -0.235 +CLOTH_FIXED_COVER_Y = -0.075 +CLOTH_FIXED_COVER_Z = 0.885 +EXPECTED_PROPOSAL_CANDIDATES = ChunkDecoderConfig().num_candidates + + +@dataclass(frozen=True) +class SmokePaths: + data_dir: Path + output_dir: Path + report_dir: Path + + +@dataclass(frozen=True) +class SmokeSpec: + resolution: int = IMAGE_RESOLUTION + train_episodes: int = 32 + val_episodes: int = 8 + eval_episodes: int = 50 + dataset_seed: int = DEFAULT_SEED + train_seed: int = DEFAULT_SEED + history_steps: int = HISTORY_STEPS + max_macro_steps: int = MAX_MACRO_STEPS + batch_size: int = 4 + epochs: int = 6 + num_workers: int = 16 + learning_rate: float = 1e-4 + weight_decay: float = 1e-4 + + @property + def seed(self) -> int: + return self.train_seed + + +@dataclass(frozen=True) +class BridgeTaskSpec: + key: str + env_id: str + track_id: str + suite: str + benchmark_task: str + task_name: str + text_prompt: str + mode_order: tuple[str, ...] + reveal_modes: tuple[str, ...] + transfer_modes: tuple[str, ...] + retrieve_modes: tuple[str, ...] + notes: str + + +TASK_SPECS: dict[str, BridgeTaskSpec] = { + "bag": BridgeTaskSpec( + key="bag", + env_id="PutEggplantInBasketScene-v1", + track_id="bag_track", + suite="maniskill3", + benchmark_task="PutEggplantInBasketRetrievalProxy-v1", + task_name="bag", + text_prompt="retrieve the target object from inside the basket and stage it outside the basket", + mode_order=( + "base_action", + "pin_left_rim", + "pin_right_rim", + "widen_mouth", + "maintain_mouth", + "probe_inside", + "insert_actor", + "retrieve", + ), + reveal_modes=("pin_left_rim", "pin_right_rim", "widen_mouth", "maintain_mouth", "probe_inside"), + transfer_modes=("insert_actor",), + retrieve_modes=("retrieve",), + notes=( + "Public ManiSkill bridge scene with custom retrieval initialization. The eggplant is placed inside the " + "basket region and must be pulled back out to a staging zone." + ), + ), + "cloth": BridgeTaskSpec( + key="cloth", + env_id="PutSpoonOnTableClothInScene-v1", + track_id="cloth_track", + suite="maniskill3", + benchmark_task="PutSpoonUnderClothRetrievalProxy-v1", + task_name="cloth", + text_prompt="reveal the spoon from under the cloth and retrieve it to the open area", + mode_order=( + "base_action", + "lift_edge", + "separate_layer", + "stabilize_fold", + "maintain_lift", + "insert_actor", + "retrieve", + ), + reveal_modes=("lift_edge", "separate_layer", "stabilize_fold", "maintain_lift"), + transfer_modes=("insert_actor",), + retrieve_modes=("retrieve",), + notes=( + "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the " + "cloth region and must be revealed and extracted to the open side of the table." + ), + ), +} + + +def _task_spec(task: str) -> BridgeTaskSpec: + normalized = str(task).strip().lower() + if normalized not in TASK_SPECS: + raise KeyError(f"Unsupported task {task!r}. Expected one of {sorted(TASK_SPECS)}.") + return TASK_SPECS[normalized] + + +def _default_paths(task_spec: BridgeTaskSpec) -> SmokePaths: + return SmokePaths( + data_dir=WORKSPACE_ROOT / "data" / "maniskill_bridge_retrieval" / f"{task_spec.key}_{SMOKE_VERSION}", + output_dir=WORKSPACE_ROOT / "outputs" / f"maniskill_{task_spec.key}_{SMOKE_VERSION}", + report_dir=WORKSPACE_ROOT / "reports" / f"maniskill_{task_spec.key}_{SMOKE_VERSION}", + ) + + +def _dataset_artifact_path(data_dir: Path, basename: str, *, dataset_seed: int) -> Path: + if int(dataset_seed) == DEFAULT_SEED: + return data_dir / basename + artifact = Path(basename) + return data_dir / f"{artifact.stem}_seed{int(dataset_seed)}{artifact.suffix}" + + +def _np(value: Any, *, dtype: np.dtype | None = None) -> np.ndarray: + if isinstance(value, np.ndarray): + array = value + elif isinstance(value, torch.Tensor): + array = value.detach().cpu().numpy() + else: + array = np.asarray(value) + if dtype is not None: + array = array.astype(dtype, copy=False) + return array + + +def _vec3(value: Any) -> np.ndarray: + return _np(value, dtype=np.float32).reshape(-1)[:3] + + +def _resize_rgb(rgb: np.ndarray, size: int) -> np.ndarray: + tensor = torch.from_numpy(rgb).permute(2, 0, 1).unsqueeze(0).float() + resized = F.interpolate(tensor, size=(size, size), mode="bilinear", align_corners=False) + return resized[0].permute(1, 2, 0).round().clamp(0, 255).to(dtype=torch.uint8).cpu().numpy() + + +def _resize_single_channel(image: np.ndarray, size: int, *, dtype: np.dtype) -> np.ndarray: + tensor = torch.from_numpy(image).unsqueeze(0).unsqueeze(0).float() + resized = F.interpolate(tensor, size=(size, size), mode="nearest") + return resized[0, 0].to(dtype=torch.float32).cpu().numpy().astype(dtype, copy=False) + + +def _camera_intrinsic_from_param(param: dict[str, Any]) -> np.ndarray: + for key in ("intrinsic_cv", "intrinsic", "cam_intrinsic"): + if key in param: + matrix = _np(param[key], dtype=np.float32) + return matrix[0] if matrix.ndim == 3 else matrix + return np.eye(3, dtype=np.float32) + + +def _camera_extrinsic_from_param(param: dict[str, Any]) -> np.ndarray: + for key in ("cam2world_gl", "cam2world", "extrinsic_cv", "extrinsic"): + if key in param: + matrix = _np(param[key], dtype=np.float32) + return matrix[0] if matrix.ndim == 3 else matrix + return np.eye(4, dtype=np.float32) + + +def _extract_sensor_bundle(obs: dict[str, Any], *, resolution: int) -> dict[str, np.ndarray]: + camera_name = next(iter(obs["sensor_data"].keys())) + view = obs["sensor_data"][camera_name] + param = obs["sensor_param"][camera_name] + rgb = _np(view["rgb"], dtype=np.uint8) + segmentation = _np(view["segmentation"], dtype=np.int16) + rgb = rgb[0] if rgb.ndim == 4 else rgb + segmentation = segmentation[0] if segmentation.ndim == 4 else segmentation + segmentation = segmentation[..., 0] if segmentation.ndim == 3 else segmentation + rgb_resized = _resize_rgb(rgb, resolution) + seg_resized = _resize_single_channel(segmentation, resolution, dtype=np.int16) + intrinsic = _camera_intrinsic_from_param(param) + extrinsic = _camera_extrinsic_from_param(param) + images = np.stack([rgb_resized.copy() for _ in CAMERA_NAMES], axis=0) + segmentations = np.stack([seg_resized.copy() for _ in CAMERA_NAMES], axis=0) + depths = np.zeros((len(CAMERA_NAMES), 1, resolution, resolution), dtype=np.float32) + depth_valid = np.zeros_like(depths, dtype=np.float32) + intrinsics = np.stack([intrinsic.copy() for _ in CAMERA_NAMES], axis=0) + extrinsics = np.stack([extrinsic.copy() for _ in CAMERA_NAMES], axis=0) + return { + "images": images, + "segmentations": segmentations, + "depths": depths, + "depth_valid": depth_valid, + "camera_intrinsics": intrinsics, + "camera_extrinsics": extrinsics, + } + + +def _build_proprio(env: gym.Env[Any, Any]) -> np.ndarray: + base = env.unwrapped + qpos = _np(base.agent.robot.get_qpos(), dtype=np.float32).reshape(-1) + qvel = _np(base.agent.robot.get_qvel(), dtype=np.float32).reshape(-1) + ee_pose = base.agent.robot.links_map["ee_gripper_link"].pose + tcp_pose = np.concatenate([_vec3(ee_pose.p), _np(ee_pose.q, dtype=np.float32).reshape(-1)[:4]], axis=0) + gripper_width = qpos[-2:].sum(keepdims=True).astype(np.float32) + flat = np.concatenate([qpos, qvel, tcp_pose, gripper_width], axis=0) + if flat.shape[0] >= PROPRIO_DIM: + return flat[:PROPRIO_DIM] + padded = np.zeros((PROPRIO_DIM,), dtype=np.float32) + padded[: flat.shape[0]] = flat + return padded + + +def _source_actor(env: gym.Env[Any, Any]) -> Any: + base = env.unwrapped + return base.objs[base.source_obj_name] + + +def _target_actor(env: gym.Env[Any, Any]) -> Any: + base = env.unwrapped + return base.objs[base.target_obj_name] + + +def _source_position(env: gym.Env[Any, Any]) -> np.ndarray: + return _vec3(_source_actor(env).pose.p) + + +def _target_position(env: gym.Env[Any, Any]) -> np.ndarray: + return _vec3(_target_actor(env).pose.p) + + +def _ee_position(env: gym.Env[Any, Any]) -> np.ndarray: + return _vec3(env.unwrapped.agent.robot.links_map["ee_gripper_link"].pose.p) + + +def _act_from_world_delta(delta_xyz: Sequence[float]) -> np.ndarray: + delta = np.asarray(delta_xyz, dtype=np.float32).reshape(3) + return np.asarray([-delta[0], -delta[1], delta[2]], dtype=np.float32) + + +def _step_action(env: gym.Env[Any, Any], delta_xyz: Sequence[float], *, grip: float) -> None: + action = np.zeros((1, 7), dtype=np.float32) + action[0, :3] = np.clip(_act_from_world_delta(delta_xyz), -0.02, 0.02) + action[0, 6] = float(np.clip(grip, -1.0, 1.0)) + env.step(action) + + +def _hold(env: gym.Env[Any, Any], *, steps: int, grip: float) -> None: + for _ in range(int(steps)): + action = np.zeros((1, 7), dtype=np.float32) + action[0, 6] = float(np.clip(grip, -1.0, 1.0)) + env.step(action) + + +def _move_ee(env: gym.Env[Any, Any], goal_xyz: Sequence[float], *, grip: float, max_steps: int = 60, tol: float = 0.003) -> dict[str, Any]: + goal = np.asarray(goal_xyz, dtype=np.float32).reshape(3) + for _ in range(int(max_steps)): + ee = _ee_position(env) + delta = goal - ee + if float(np.linalg.norm(delta)) <= float(tol): + break + _step_action(env, delta, grip=grip) + return {"ee_position": _ee_position(env)} + + +def _repeat_world_delta(env: gym.Env[Any, Any], delta_xyz: Sequence[float], *, grip: float, steps: int) -> None: + delta = np.asarray(delta_xyz, dtype=np.float32).reshape(3) + for _ in range(int(steps)): + _step_action(env, delta, grip=grip) + + +def _snapshot_env(env: gym.Env[Any, Any]) -> dict[str, Any]: + return {"state_dict": env.unwrapped.get_state_dict()} + + +def _restore_env(env: gym.Env[Any, Any], snapshot: dict[str, Any]) -> None: + env.unwrapped.set_state_dict(snapshot["state_dict"]) + + +def _sync_env_state(src_env: gym.Env[Any, Any], dst_env: gym.Env[Any, Any]) -> None: + _restore_env(dst_env, _snapshot_env(src_env)) + + +def _canonical_chunks(task_spec: BridgeTaskSpec) -> dict[str, np.ndarray]: + base = torch.zeros((1, 8, 14), dtype=torch.float32) + chunks = {"base_action": base.squeeze(0).numpy().astype(np.float32)} + for mode_name in task_spec.mode_order[1:]: + chunk = semantic_macro_chunk(base, task_name=task_spec.task_name, mode_name=mode_name).squeeze(0).cpu().numpy() + chunks[mode_name] = chunk.astype(np.float32) + return chunks + + +def _classify_mode_from_chunk(chunk: np.ndarray, canonical_chunks: dict[str, np.ndarray]) -> str: + candidate = np.asarray(chunk, dtype=np.float32) + distances = { + mode_name: float(np.mean(np.abs(candidate - prototype))) + for mode_name, prototype in canonical_chunks.items() + } + return min(distances, key=distances.get) + + +def _rng_for_seed(seed: int) -> np.random.Generator: + return np.random.default_rng(int(seed) + 31) + + +def _initialize_proxy_state(env: gym.Env[Any, Any], task_spec: BridgeTaskSpec, *, episode_seed: int) -> None: + base = env.unwrapped + rng = _rng_for_seed(episode_seed) + source = _source_actor(env) + source_pose = source.pose + source_q = _np(source_pose.q, dtype=np.float32).reshape(-1)[:4] + if task_spec.key == "bag": + center = _target_position(env) + start = center + np.asarray( + [ + rng.uniform(-0.006, 0.006), + rng.uniform(-0.010, 0.004), + 0.010 + rng.uniform(-0.002, 0.002), + ], + dtype=np.float32, + ) + else: + cover = _target_actor(env) + start = np.asarray([CLOTH_FIXED_SOURCE_X, CLOTH_FIXED_SOURCE_Y, CLOTH_FIXED_SOURCE_Z], dtype=np.float32) + source.set_pose(Pose.create_from_pq(p=start, q=source_q)) + if task_spec.key == "cloth": + _hold(env, steps=8, grip=1.0) + cover_pose = cover.pose + cover_q = _np(cover_pose.q, dtype=np.float32).reshape(-1)[:4] + cover_start = np.asarray([CLOTH_FIXED_COVER_X, CLOTH_FIXED_COVER_Y, CLOTH_FIXED_COVER_Z], dtype=np.float32) + cover.set_pose(Pose.create_from_pq(p=cover_start, q=cover_q)) + _hold(env, steps=CLOTH_HIDDEN_SETTLE_STEPS, grip=1.0) + return + _hold(env, steps=5, grip=1.0) + + +def _source_visibility(obs_bundle: dict[str, np.ndarray], actor_id: int) -> float: + seg = obs_bundle["segmentations"] + match = float(np.mean(seg == int(actor_id))) + return float(np.clip(match / VIEW_VISIBILITY_SCALE, 0.0, 1.0)) + + +def _all_positions(env: gym.Env[Any, Any], task_spec: BridgeTaskSpec) -> dict[str, np.ndarray]: + positions = {"source": _source_position(env), "target": _target_position(env)} + if task_spec.key == "cloth": + positions["cover"] = _target_position(env) + return positions + + +def _bag_progress(env: gym.Env[Any, Any]) -> float: + source = _source_position(env) + center = _target_position(env) + x_shift = max(source[0] - center[0], 0.0) + y_pull = max(center[1] - source[1], 0.0) + z_lift = max(source[2] - center[2], 0.0) + planar = float(np.linalg.norm((source - center)[:2])) + return float( + np.clip( + 0.35 * (x_shift / 0.05) + + 0.30 * (y_pull / 0.18) + + 0.20 * (z_lift / 0.12) + + 0.15 * (planar / 0.12), + 0.0, + 1.0, + ) + ) + + +def _bag_success(env: gym.Env[Any, Any]) -> bool: + source = _source_position(env) + center = _target_position(env) + planar = float(np.linalg.norm((source - center)[:2])) + return bool( + planar >= 0.035 + and ( + source[0] >= center[0] + 0.020 + or source[1] <= center[1] - 0.050 + or source[2] >= center[2] + 0.050 + ) + ) + + +def _cloth_progress( + env: gym.Env[Any, Any], + *, + start_positions: dict[str, np.ndarray], + current_visibility: float, +) -> float: + source = _source_position(env) + source_start = start_positions["source"] + y_shift = max(source[1] - source_start[1], 0.0) + planar = float(np.linalg.norm((source - source_start)[:2])) + return float(np.clip(0.45 * (y_shift / 0.16) + 0.35 * (planar / 0.16) + 0.20 * current_visibility, 0.0, 1.0)) + + +def _cloth_success( + env: gym.Env[Any, Any], + *, + start_positions: dict[str, np.ndarray], + current_visibility: float, +) -> bool: + source = _source_position(env) + source_start = start_positions["source"] + planar = float(np.linalg.norm((source - source_start)[:2])) + return bool( + source[1] >= source_start[1] + CLOTH_SUCCESS_MIN_Y_DELTA + and planar >= CLOTH_SUCCESS_MIN_PLANAR_DELTA + and current_visibility >= CLOTH_SUCCESS_MIN_VISIBILITY + ) + + +def _candidate_metrics( + env: gym.Env[Any, Any], + *, + task_spec: BridgeTaskSpec, + start_positions: dict[str, np.ndarray], + current_obs_bundle: dict[str, np.ndarray] | None = None, +) -> dict[str, float]: + source_actor = _source_actor(env) + actor_id = int(getattr(source_actor, "per_scene_id", -1)) + visibility = 0.0 + if current_obs_bundle is not None: + visibility = _source_visibility(current_obs_bundle, actor_id) + if task_spec.key == "bag": + progress = _bag_progress(env) + success = float(_bag_success(env)) + disturbance = 0.0 + access = float(np.clip(0.65 * progress + 0.35 * visibility, 0.0, 1.0)) + else: + progress = _cloth_progress(env, start_positions=start_positions, current_visibility=visibility) + success = float(_cloth_success(env, start_positions=start_positions, current_visibility=visibility)) + cloth_start = start_positions["cover"] + cloth_now = _target_position(env) + cloth_displacement = float(np.linalg.norm((cloth_now - cloth_start)[:2])) + disturbance = float(np.clip(max(cloth_displacement - 0.24, 0.0) / 0.14, 0.0, 1.0)) + access = float(np.clip(0.55 * progress + 0.45 * visibility, 0.0, 1.0)) + return { + "retrieval_success": success, + "disturbance": disturbance, + "visibility": visibility, + "clearance": access, + "progress": progress, + } + + +def _execute_bag_mode(env: gym.Env[Any, Any], mode_name: str) -> None: + center = _target_position(env) + source = _source_position(env) + if mode_name == "retrieve": + _move_ee(env, source + np.asarray([0.0, 0.0, 0.08], dtype=np.float32), grip=1.0) + _move_ee(env, source + np.asarray([0.0, 0.0, 0.010], dtype=np.float32), grip=1.0, tol=0.002) + _hold(env, steps=10, grip=-1.0) + _move_ee(env, source + np.asarray([0.0, 0.0, 0.12], dtype=np.float32), grip=-1.0) + _move_ee(env, source + np.asarray([0.0, -0.18, 0.12], dtype=np.float32), grip=-1.0) + return + if mode_name == "insert_actor": + _move_ee(env, source + np.asarray([0.0, 0.0, 0.06], dtype=np.float32), grip=1.0) + _move_ee(env, source + np.asarray([0.0, 0.0, 0.018], dtype=np.float32), grip=1.0, tol=0.002) + _hold(env, steps=4, grip=1.0) + return + if mode_name == "probe_inside": + _move_ee(env, source + np.asarray([0.0, 0.0, 0.05], dtype=np.float32), grip=1.0) + _move_ee(env, source + np.asarray([0.0, 0.0, 0.026], dtype=np.float32), grip=1.0, tol=0.002) + _repeat_world_delta(env, np.asarray([0.010, -0.004, 0.0], dtype=np.float32), grip=1.0, steps=8) + return + if mode_name == "widen_mouth": + _move_ee(env, center + np.asarray([-0.04, 0.01, 0.06], dtype=np.float32), grip=1.0) + _move_ee(env, center + np.asarray([-0.03, 0.01, 0.028], dtype=np.float32), grip=1.0, tol=0.003) + _repeat_world_delta(env, np.asarray([0.012, -0.004, 0.0], dtype=np.float32), grip=1.0, steps=12) + return + if mode_name == "pin_left_rim": + _move_ee(env, center + np.asarray([-0.03, 0.01, 0.06], dtype=np.float32), grip=1.0) + _move_ee(env, center + np.asarray([-0.03, 0.01, 0.028], dtype=np.float32), grip=1.0, tol=0.003) + _repeat_world_delta(env, np.asarray([0.006, -0.003, 0.0], dtype=np.float32), grip=1.0, steps=8) + return + if mode_name == "pin_right_rim": + _move_ee(env, center + np.asarray([0.03, 0.01, 0.06], dtype=np.float32), grip=1.0) + _move_ee(env, center + np.asarray([0.03, 0.01, 0.028], dtype=np.float32), grip=1.0, tol=0.003) + _repeat_world_delta(env, np.asarray([-0.006, -0.003, 0.0], dtype=np.float32), grip=1.0, steps=8) + return + if mode_name in {"maintain_mouth", "base_action"}: + _move_ee(env, center + np.asarray([0.0, 0.0, 0.09], dtype=np.float32), grip=1.0, max_steps=30, tol=0.006) + _hold(env, steps=3, grip=1.0) + return + raise KeyError(f"Unsupported bag mode {mode_name!r}.") + + +def _execute_cloth_mode(env: gym.Env[Any, Any], mode_name: str) -> None: + cloth = _target_position(env) + source = _source_position(env) + if mode_name == "retrieve": + _move_ee(env, source + np.asarray([0.0, 0.0, 0.05], dtype=np.float32), grip=1.0) + _move_ee(env, source + np.asarray([0.0, 0.0, 0.010], dtype=np.float32), grip=1.0, tol=0.002) + _hold(env, steps=10, grip=-1.0) + _move_ee(env, source + np.asarray([0.0, 0.0, 0.10], dtype=np.float32), grip=-1.0) + _move_ee(env, source + np.asarray([0.0, 0.16, 0.10], dtype=np.float32), grip=-1.0) + return + if mode_name == "insert_actor": + _move_ee(env, source + np.asarray([0.0, 0.0, 0.05], dtype=np.float32), grip=1.0) + _move_ee(env, source + np.asarray([0.0, 0.0, 0.018], dtype=np.float32), grip=1.0, tol=0.002) + _hold(env, steps=4, grip=1.0) + return + if mode_name == "lift_edge": + _move_ee(env, cloth + np.asarray([0.0, -0.03, 0.05], dtype=np.float32), grip=1.0) + _move_ee(env, cloth + np.asarray([0.0, -0.03, 0.015], dtype=np.float32), grip=1.0, tol=0.003) + _repeat_world_delta(env, np.asarray([0.0, 0.006, 0.0], dtype=np.float32), grip=1.0, steps=8) + return + if mode_name == "separate_layer": + _move_ee(env, cloth + np.asarray([-0.04, 0.0, 0.05], dtype=np.float32), grip=1.0) + _move_ee(env, cloth + np.asarray([-0.04, 0.0, 0.015], dtype=np.float32), grip=1.0, tol=0.003) + _repeat_world_delta(env, np.asarray([0.008, 0.002, 0.0], dtype=np.float32), grip=1.0, steps=10) + return + if mode_name == "stabilize_fold": + _move_ee(env, cloth + np.asarray([0.0, 0.03, 0.05], dtype=np.float32), grip=1.0) + _move_ee(env, cloth + np.asarray([0.0, 0.03, 0.015], dtype=np.float32), grip=1.0, tol=0.003) + _repeat_world_delta(env, np.asarray([0.0, -0.006, 0.0], dtype=np.float32), grip=1.0, steps=8) + return + if mode_name in {"maintain_lift", "base_action"}: + _move_ee(env, cloth + np.asarray([0.0, 0.06, 0.07], dtype=np.float32), grip=1.0, max_steps=30, tol=0.006) + _hold(env, steps=3, grip=1.0) + return + raise KeyError(f"Unsupported cloth mode {mode_name!r}.") + + +def _execute_mode(env: gym.Env[Any, Any], task_spec: BridgeTaskSpec, mode_name: str) -> None: + if task_spec.key == "bag": + _execute_bag_mode(env, mode_name) + return + _execute_cloth_mode(env, mode_name) + + +def _mode_support_mode(task_spec: BridgeTaskSpec, mode_name: str, current_support_mode: int) -> int: + if mode_name in task_spec.reveal_modes: + return SUPPORT_MODE_HOLD + if mode_name in task_spec.transfer_modes: + return SUPPORT_MODE_TRANSFER + if mode_name in task_spec.retrieve_modes: + return SUPPORT_MODE_PASSIVE + return int(current_support_mode) + + +def _mode_progress_schedule(task_spec: BridgeTaskSpec, mode_name: str) -> np.ndarray: + if mode_name in task_spec.reveal_modes: + return np.asarray([0.18, 0.38, 0.62, 0.84, 1.0], dtype=np.float32) + if mode_name in task_spec.transfer_modes: + return np.asarray([0.22, 0.44, 0.66, 0.86, 1.0], dtype=np.float32) + if mode_name in task_spec.retrieve_modes: + return np.asarray([0.34, 0.56, 0.76, 0.92, 1.0], dtype=np.float32) + return np.asarray([0.10, 0.22, 0.34, 0.44, 0.54], dtype=np.float32) + + +def _scalar_rollout(start: float, end: float, schedule: np.ndarray) -> np.ndarray: + return np.clip((1.0 - schedule) * float(start) + schedule * float(end), 0.0, 1.0).astype(np.float32) + + +def _current_state_targets( + task_spec: BridgeTaskSpec, + *, + obs_bundle: dict[str, np.ndarray], + candidate_metrics: Sequence[dict[str, float]], + episode_start_positions: dict[str, np.ndarray], + selected_mode: str, + env: gym.Env[Any, Any], +) -> dict[str, Any]: + metrics_by_name = {mode_name: payload for mode_name, payload in zip(task_spec.mode_order, candidate_metrics)} + current_metrics = _candidate_metrics( + env, + task_spec=task_spec, + start_positions=episode_start_positions, + current_obs_bundle=obs_bundle, + ) + current_disturbance = float(np.clip(current_metrics["disturbance"], 0.0, 1.0)) + current_visibility = float(np.clip(current_metrics["visibility"], 0.0, 1.0)) + current_clearance = float(np.clip(current_metrics["clearance"], 0.0, 1.0)) + current_progress = float(np.clip(current_metrics["progress"], 0.0, 1.0)) + base_gap = float(np.clip(max(current_clearance, current_progress), 0.0, 1.0)) + support_stability = float(np.clip(1.0 - 0.5 * current_disturbance, 0.0, 1.0)) + hold_quality = float(np.clip(0.5 * (support_stability + max(current_clearance, current_progress)), 0.0, 1.0)) + opening_quality = float(np.clip(0.55 * current_progress + 0.25 * current_clearance + 0.20 * current_visibility, 0.0, 1.0)) + actor_feasibility = float(np.clip(0.6 * current_clearance + 0.4 * max(current_visibility, current_progress), 0.0, 1.0)) + reocclusion_rate = float(np.clip(1.0 - max(current_clearance, current_visibility), 0.0, 1.0)) + insertable_actor_corridor = float(np.clip(0.6 * actor_feasibility + 0.4 * base_gap, 0.0, 1.0)) + insertion_corridor = float(np.clip(0.5 * actor_feasibility + 0.5 * base_gap, 0.0, 1.0)) + layer_separation = float(np.clip(0.7 * base_gap + 0.3 * actor_feasibility, 0.0, 1.0)) + fold_preservation = float(np.clip(1.0 - current_disturbance, 0.0, 1.0)) + lift_too_much_risk = float(np.clip(current_disturbance + 0.5 * max(base_gap - 0.5, 0.0), 0.0, 1.0)) + task_metrics = { + "opening_quality": opening_quality, + "actor_feasibility_score": actor_feasibility, + "gap_width": float(0.03 + 0.21 * base_gap), + "damage_proxy": current_disturbance, + "release_collapse_rate": reocclusion_rate, + "target_visibility_confidence": current_visibility, + "insertable_actor_corridor": insertable_actor_corridor, + "insertion_corridor": insertion_corridor, + "hold_quality": hold_quality, + "layer_separation_quality": layer_separation, + "fold_preservation": fold_preservation, + "top_layer_stability": support_stability, + "lift_too_much_risk": lift_too_much_risk, + } + + base_metrics = metrics_by_name["base_action"] + insert_metrics = metrics_by_name["insert_actor"] + retrieve_metrics = metrics_by_name["retrieve"] + reveal_candidates = [metrics_by_name[mode_name] for mode_name in task_spec.reveal_modes] + reveal_access = max(candidate["candidate_actor_feasibility_auc"] for candidate in reveal_candidates) + reveal_reveal = max(candidate["candidate_reveal_achieved"] for candidate in reveal_candidates) + reveal_hold = max(candidate["candidate_hold_persistence"] for candidate in reveal_candidates) + reveal_visibility = max(candidate["candidate_visibility_integral"] for candidate in reveal_candidates) + + reveal_corridor = float( + np.clip( + 0.45 * opening_quality + + 0.30 * reveal_access + + 0.15 * reveal_reveal + + 0.10 * reveal_visibility + - 0.10 * current_disturbance, + 0.0, + 1.0, + ) + ) + transfer_corridor = float( + np.clip( + 0.45 * insertable_actor_corridor + + 0.30 * insert_metrics["candidate_actor_feasibility_auc"] + + 0.15 * insert_metrics["candidate_reveal_achieved"] + + 0.10 * insert_metrics["candidate_visibility_integral"] + - 0.15 * current_disturbance, + 0.0, + 1.0, + ) + ) + passive_corridor = float( + np.clip( + 0.55 * retrieve_metrics["candidate_retrieval_success"] + + 0.20 * retrieve_metrics["candidate_actor_feasibility_auc"] + + 0.15 * current_progress + + 0.10 * current_clearance + - 0.10 * current_disturbance, + 0.0, + 1.0, + ) + ) + corridor_feasible = np.stack( + [ + np.full((NUM_APPROACH_TEMPLATES,), reveal_corridor, dtype=np.float32), + np.full((NUM_APPROACH_TEMPLATES,), transfer_corridor, dtype=np.float32), + np.full((NUM_APPROACH_TEMPLATES,), passive_corridor, dtype=np.float32), + ], + axis=0, + ) + persistence_horizon = np.asarray( + [ + ROLL_OUT_HORIZON * float(np.clip(0.35 * hold_quality + 0.35 * reveal_hold + 0.30 * reveal_corridor, 0.0, 1.0)), + ROLL_OUT_HORIZON + * float( + np.clip( + 0.30 * hold_quality + 0.35 * insert_metrics["candidate_hold_persistence"] + 0.35 * transfer_corridor, + 0.0, + 1.0, + ) + ), + ROLL_OUT_HORIZON + * float( + np.clip( + 0.25 * hold_quality + 0.35 * retrieve_metrics["candidate_hold_persistence"] + 0.40 * passive_corridor, + 0.0, + 1.0, + ) + ), + ], + dtype=np.float32, + ) + retrieve_margin = float(retrieve_metrics["candidate_utility"] - base_metrics["candidate_utility"]) + insert_margin = float(insert_metrics["candidate_utility"] - base_metrics["candidate_utility"]) + if selected_mode == "retrieve" or (retrieve_metrics["candidate_retrieval_success"] >= 0.5 and retrieve_margin >= 0.12): + support_mode = SUPPORT_MODE_PASSIVE + elif selected_mode == "insert_actor" or (insert_margin >= 0.12 and transfer_corridor >= 0.35): + support_mode = SUPPORT_MODE_TRANSFER + elif selected_mode in task_spec.reveal_modes: + support_mode = SUPPORT_MODE_HOLD + elif selected_mode == "base_action": + support_mode = SUPPORT_MODE_PASSIVE if passive_corridor >= 0.55 and retrieve_margin >= 0.03 else SUPPORT_MODE_HOLD + else: + support_mode = SUPPORT_MODE_HOLD + best_non_base_utility = max(float(payload["candidate_utility"]) for payload in candidate_metrics[1:]) + intervention_warranted = selected_mode != "base_action" and best_non_base_utility >= float(base_metrics["candidate_utility"]) + 0.12 + return { + "support_mode": int(support_mode), + "corridor_feasible": corridor_feasible, + "persistence_horizon": persistence_horizon, + "disturbance_cost": np.float32(current_disturbance), + "state_confidence_target": np.float32(1.0 if intervention_warranted else 0.0), + "task_metric_mask": STATE_METRIC_MASK.copy(), + **{metric_name: np.float32(metric_value) for metric_name, metric_value in task_metrics.items()}, + } + + +def _candidate_rollout_targets( + task_spec: BridgeTaskSpec, + *, + mode_name: str, + state_targets: dict[str, Any], + candidate_payload: dict[str, float], +) -> dict[str, np.ndarray]: + schedule = _mode_progress_schedule(task_spec, mode_name) + start_visibility = float(state_targets["target_visibility_confidence"]) + start_access = float(state_targets["actor_feasibility_score"]) + start_persistence = float(np.clip(state_targets["hold_quality"], 0.0, 1.0)) + start_support = float(np.clip(state_targets["top_layer_stability"], 0.0, 1.0)) + start_reocclusion = float(np.clip(state_targets["release_collapse_rate"], 0.0, 1.0)) + start_disturbance = float(np.clip(state_targets["disturbance_cost"], 0.0, 1.0)) + start_clearance = float(np.clip(state_targets["actor_feasibility_score"], 0.0, 1.0)) + start_grasp = float(np.clip(max(start_visibility, start_access), 0.0, 1.0)) + + end_visibility = float(np.clip(candidate_payload["candidate_immediate_visibility"], 0.0, 1.0)) + end_access = float(np.clip(candidate_payload["candidate_immediate_access"], 0.0, 1.0)) + end_progress = float(np.clip(candidate_payload["candidate_immediate_progress"], 0.0, 1.0)) + end_disturbance = float(np.clip(candidate_payload["candidate_immediate_disturbance"], 0.0, 1.0)) + end_support = float(np.clip(candidate_payload["candidate_immediate_support_stability"], 0.0, 1.0)) + end_persistence = float(np.clip(candidate_payload["candidate_immediate_hold_persistence"], 0.0, 1.0)) + end_reocclusion = float(np.clip(candidate_payload["candidate_immediate_reocclusion"], 0.0, 1.0)) + end_clearance = float(np.clip(max(end_access, end_progress), 0.0, 1.0)) + end_grasp = float(np.clip(max(end_visibility, 0.5 * end_access + 0.5 * end_progress), 0.0, 1.0)) + + if mode_name in task_spec.transfer_modes: + start_visibility = max(start_visibility, 0.35 * end_visibility) + start_access = max(start_access, 0.40 * end_access) + start_persistence = max(start_persistence, 0.45 * end_persistence) + start_support = max(start_support, 0.50 * end_support) + elif mode_name in task_spec.retrieve_modes: + start_visibility = max(start_visibility, 0.55 * end_visibility) + start_access = max(start_access, 0.70 * end_access) + start_persistence = max(start_persistence, 0.65 * end_persistence) + start_support = max(start_support, 0.65 * end_support) + start_reocclusion = min(start_reocclusion, max(0.4 * end_reocclusion, 0.0)) + + visibility = _scalar_rollout(start_visibility, end_visibility, schedule) + access = _scalar_rollout(start_access, end_access, schedule) + persistence = _scalar_rollout(start_persistence, end_persistence, schedule) + support = _scalar_rollout(start_support, end_support, schedule) + reocclusion = _scalar_rollout(start_reocclusion, end_reocclusion, schedule) + disturbance = _scalar_rollout(start_disturbance, end_disturbance, schedule) + clearance = _scalar_rollout(start_clearance, end_clearance, schedule) + grasp = _scalar_rollout(start_grasp, end_grasp, schedule) + reveal_corridor = np.clip(0.38 * visibility + 0.34 * access + 0.22 * support - 0.12 * disturbance, 0.0, 1.0) + transfer_corridor = np.clip( + 0.30 * visibility + 0.38 * access + 0.18 * persistence + 0.14 * support - 0.12 * disturbance, + 0.0, + 1.0, + ) + passive_corridor = np.clip( + 0.22 * visibility + 0.42 * access + 0.20 * persistence + 0.16 * grasp - 0.14 * disturbance - 0.10 * reocclusion, + 0.0, + 1.0, + ) + if mode_name in task_spec.reveal_modes: + reveal_corridor = np.clip(reveal_corridor + 0.14, 0.0, 1.0) + passive_corridor = np.clip(0.75 * passive_corridor, 0.0, 1.0) + elif mode_name in task_spec.transfer_modes: + transfer_corridor = np.clip(transfer_corridor + 0.16, 0.0, 1.0) + elif mode_name in task_spec.retrieve_modes: + passive_corridor = np.clip(passive_corridor + 0.20, 0.0, 1.0) + reveal_corridor = np.clip(0.60 * reveal_corridor, 0.0, 1.0) + else: + reveal_corridor = np.clip(0.85 * reveal_corridor, 0.0, 1.0) + transfer_corridor = np.clip(0.75 * transfer_corridor, 0.0, 1.0) + passive_corridor = np.clip(0.80 * passive_corridor, 0.0, 1.0) + corridor_feasible = np.stack( + [ + np.repeat(reveal_corridor[:, None], NUM_APPROACH_TEMPLATES, axis=1), + np.repeat(transfer_corridor[:, None], NUM_APPROACH_TEMPLATES, axis=1), + np.repeat(passive_corridor[:, None], NUM_APPROACH_TEMPLATES, axis=1), + ], + axis=1, + ).astype(np.float32) + persistence_horizon = np.stack( + [ + np.clip(ROLL_OUT_HORIZON * (0.55 * reveal_corridor + 0.45 * support), 0.0, float(ROLL_OUT_HORIZON)), + np.clip(ROLL_OUT_HORIZON * (0.50 * transfer_corridor + 0.50 * persistence), 0.0, float(ROLL_OUT_HORIZON)), + np.clip(ROLL_OUT_HORIZON * (0.55 * passive_corridor + 0.45 * persistence), 0.0, float(ROLL_OUT_HORIZON)), + ], + axis=1, + ).astype(np.float32) + support_mode = np.full((ROLL_OUT_HORIZON,), _mode_support_mode(task_spec, mode_name, int(state_targets["support_mode"])), dtype=np.int64) + if mode_name == "base_action": + support_mode[:] = int(state_targets["support_mode"]) + return { + "candidate_rollout_support_mode": support_mode, + "candidate_rollout_corridor_feasible": corridor_feasible, + "candidate_rollout_persistence_horizon": persistence_horizon, + "candidate_rollout_disturbance_cost": disturbance.astype(np.float32), + "candidate_rollout_belief_map": visibility[:, None, None].astype(np.float32), + "candidate_rollout_visibility_map": visibility[:, None, None].astype(np.float32), + "candidate_rollout_clearance_map": np.repeat(clearance[:, None, None, None], 2, axis=1).astype(np.float32), + "candidate_rollout_support_stability": support[:, None, None, None].astype(np.float32), + "candidate_rollout_reocclusion_target": reocclusion[:, None, None].astype(np.float32), + "candidate_rollout_occluder_contact_map": np.clip(access * support, 0.0, 1.0)[:, None, None].astype(np.float32), + "candidate_rollout_grasp_affordance_map": grasp[:, None, None].astype(np.float32), + } + + +def _select_expert_mode( + task_spec: BridgeTaskSpec, + *, + decision_step: int, + candidate_metrics: Sequence[dict[str, float]], +) -> str: + metrics_by_name = {mode_name: payload for mode_name, payload in zip(task_spec.mode_order, candidate_metrics)} + base_utility = float(metrics_by_name["base_action"]["candidate_utility"]) + reveal_best = max(task_spec.reveal_modes, key=lambda name: float(metrics_by_name[name]["candidate_utility"])) + transfer_best = max(task_spec.transfer_modes, key=lambda name: float(metrics_by_name[name]["candidate_utility"])) + retrieve_utility = float(metrics_by_name["retrieve"]["candidate_utility"]) + reveal_best_utility = float(metrics_by_name[reveal_best]["candidate_utility"]) + transfer_best_utility = float(metrics_by_name[transfer_best]["candidate_utility"]) + retrieve_success = float(metrics_by_name["retrieve"]["candidate_retrieval_success"]) + + if int(decision_step) > 0 and retrieve_success >= 0.5: + return "retrieve" + if int(decision_step) == 0 and reveal_best_utility >= base_utility - 0.02: + return reveal_best + if transfer_best_utility >= reveal_best_utility + 0.05 and transfer_best_utility >= base_utility + 0.02: + return transfer_best + if reveal_best_utility >= base_utility - 0.02: + return reveal_best + if retrieve_success >= 0.5 and retrieve_utility >= base_utility + 0.02: + return "retrieve" + if transfer_best_utility >= base_utility + 0.02: + return transfer_best + utilities = np.asarray([payload["candidate_utility"] for payload in candidate_metrics], dtype=np.float32) + return task_spec.mode_order[int(utilities.argmax())] + + +def _evaluate_candidate( + task_spec: BridgeTaskSpec, + sim_env: gym.Env[Any, Any], + obs_env: gym.Env[Any, Any], + snapshot: dict[str, Any], + mode_name: str, +) -> dict[str, float]: + _restore_env(sim_env, snapshot) + start_positions = _all_positions(sim_env, task_spec) + _execute_mode(sim_env, task_spec, mode_name) + _sync_env_state(sim_env, obs_env) + after_bundle = _extract_sensor_bundle(obs_env.get_obs(obs_env.get_info()), resolution=IMAGE_RESOLUTION) + immediate = _candidate_metrics(sim_env, task_spec=task_spec, start_positions=start_positions, current_obs_bundle=after_bundle) + if not immediate["retrieval_success"] and mode_name not in {"retrieve", "base_action"}: + _execute_mode(sim_env, task_spec, "retrieve") + _sync_env_state(sim_env, obs_env) + follow_bundle = _extract_sensor_bundle(obs_env.get_obs(obs_env.get_info()), resolution=IMAGE_RESOLUTION) + final_metrics = _candidate_metrics(sim_env, task_spec=task_spec, start_positions=start_positions, current_obs_bundle=follow_bundle) + else: + final_metrics = immediate + _restore_env(obs_env, snapshot) + utility = ( + 2.5 * final_metrics["retrieval_success"] + + 1.0 * final_metrics["progress"] + + 0.5 * final_metrics["clearance"] + + 0.25 * final_metrics["visibility"] + - 0.5 * final_metrics["disturbance"] + ) + return { + "candidate_retrieval_success": final_metrics["retrieval_success"], + "candidate_risk": float(np.clip(final_metrics["disturbance"], 0.0, 1.0)), + "candidate_utility": float(utility), + "candidate_final_disturbance_cost": final_metrics["disturbance"], + "candidate_reocclusion_rate": float(np.clip(1.0 - final_metrics["clearance"], 0.0, 1.0)), + "candidate_visibility_integral": final_metrics["visibility"], + "candidate_actor_feasibility_auc": final_metrics["clearance"], + "candidate_reveal_achieved": float(final_metrics["progress"] > 0.15 or final_metrics["clearance"] > 0.35), + "candidate_hold_persistence": float(1.0 - final_metrics["disturbance"]), + "candidate_support_stability_auc": float(1.0 - 0.5 * final_metrics["disturbance"]), + "candidate_disturbance_auc": final_metrics["disturbance"], + "candidate_immediate_retrieval_success": immediate["retrieval_success"], + "candidate_immediate_visibility": immediate["visibility"], + "candidate_immediate_access": immediate["clearance"], + "candidate_immediate_progress": immediate["progress"], + "candidate_immediate_reocclusion": float(np.clip(1.0 - immediate["clearance"], 0.0, 1.0)), + "candidate_immediate_hold_persistence": float(1.0 - immediate["disturbance"]), + "candidate_immediate_support_stability": float(1.0 - 0.5 * immediate["disturbance"]), + "candidate_immediate_disturbance": immediate["disturbance"], + } + + +def _cloth_seed_is_valid(env: gym.Env[Any, Any], task_spec: BridgeTaskSpec, *, episode_seed: int) -> bool: + env.reset(seed=int(episode_seed)) + _initialize_proxy_state(env, task_spec, episode_seed=int(episode_seed)) + start_positions = _all_positions(env, task_spec) + obs = env.get_obs(env.get_info()) + obs_bundle = _extract_sensor_bundle(obs, resolution=IMAGE_RESOLUTION) + actor_id = int(getattr(_source_actor(env), "per_scene_id", -1)) + start_visibility = _source_visibility(obs_bundle, actor_id) + if start_visibility > CLOTH_SUCCESS_MIN_VISIBILITY: + return False + snapshot = _snapshot_env(env) + for reveal_mode in ("lift_edge", "separate_layer"): + _restore_env(env, snapshot) + _execute_mode(env, task_spec, reveal_mode) + _execute_mode(env, task_spec, "retrieve") + obs = env.get_obs(env.get_info()) + obs_bundle = _extract_sensor_bundle(obs, resolution=IMAGE_RESOLUTION) + visibility = _source_visibility(obs_bundle, actor_id) + if _cloth_success(env, start_positions=start_positions, current_visibility=visibility): + return True + return False + + +def _build_episode_splits(task_spec: BridgeTaskSpec, spec: SmokeSpec) -> dict[str, list[int]]: + if task_spec.key != "cloth": + return { + "train": [spec.dataset_seed * 10_000 + index for index in range(spec.train_episodes)], + "val": [spec.dataset_seed * 10_000 + 1_000 + index for index in range(spec.val_episodes)], + "eval": [spec.dataset_seed * 10_000 + 2_000 + index for index in range(spec.eval_episodes)], + } + target_total = int(spec.train_episodes + spec.val_episodes + spec.eval_episodes) + valid_seeds: list[int] = [] + candidate_index = 0 + env = gym.make(task_spec.env_id, obs_mode="rgb+segmentation", render_mode="rgb_array") + try: + while len(valid_seeds) < target_total: + episode_seed = spec.dataset_seed * 10_000 + candidate_index + candidate_index += 1 + if _cloth_seed_is_valid(env, task_spec, episode_seed=episode_seed): + valid_seeds.append(int(episode_seed)) + print( + json.dumps( + { + "phase": "cloth_seed_selected", + "episode_seed": int(episode_seed), + "selected": len(valid_seeds), + "target_total": target_total, + } + ), + flush=True, + ) + if candidate_index > target_total * 30: + raise RuntimeError("Unable to find enough physics-valid cloth proxy seeds for the smoke protocol.") + finally: + env.close() + return { + "train": valid_seeds[: spec.train_episodes], + "val": valid_seeds[spec.train_episodes : spec.train_episodes + spec.val_episodes], + "eval": valid_seeds[spec.train_episodes + spec.val_episodes : target_total], + } + + +def _save_episode_splits(output_path: Path, payload: dict[str, list[int]]) -> None: + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + + +def _normalize_depth_array(array: np.ndarray) -> np.ndarray: + normalized = _np(array, dtype=np.float32) + if normalized.ndim >= 4 and normalized.shape[-1] == 1: + return np.moveaxis(normalized, -1, normalized.ndim - 3) + return normalized + + +def _candidate_pad_indices(source_candidates: int, expected_candidates: int) -> list[int]: + if source_candidates <= 0 or source_candidates >= expected_candidates: + return [] + if source_candidates == 1: + return [0] * (expected_candidates - source_candidates) + cycle = list(range(1, source_candidates)) + indices: list[int] = [] + while len(indices) < (expected_candidates - source_candidates): + indices.extend(cycle) + return indices[: expected_candidates - source_candidates] + + +def _pad_candidate_axis( + value: Any, + *, + source_candidates: int, + expected_candidates: int, + pad_indices: Sequence[int], +) -> Any: + if source_candidates <= 0 or source_candidates >= expected_candidates: + return value + if isinstance(value, np.ndarray): + if value.ndim == 0 or value.shape[0] != source_candidates: + return value + if not pad_indices: + return value + padding = np.take(value, indices=list(pad_indices), axis=0) + return np.concatenate([value, padding], axis=0) + if isinstance(value, torch.Tensor): + if value.ndim == 0 or value.shape[0] != source_candidates: + return value + if not pad_indices: + return value + pad_index = torch.as_tensor(list(pad_indices), device=value.device, dtype=torch.long) + padding = value.index_select(0, pad_index) + return torch.cat([value, padding], dim=0) + if isinstance(value, list) and len(value) == source_candidates: + padded = list(value) + padded.extend(value[index] for index in pad_indices) + return padded + if isinstance(value, tuple) and len(value) == source_candidates: + padded = list(value) + padded.extend(value[index] for index in pad_indices) + return tuple(padded) + return value + + +def _normalize_candidate_targets(sample: dict[str, Any]) -> dict[str, Any]: + candidate_chunks = sample.get("candidate_action_chunks") + if candidate_chunks is None: + return sample + candidate_array = _np(candidate_chunks) + if candidate_array.ndim == 0: + return sample + source_candidates = int(candidate_array.shape[0]) + if source_candidates >= EXPECTED_PROPOSAL_CANDIDATES: + return sample + pad_indices = _candidate_pad_indices(source_candidates, EXPECTED_PROPOSAL_CANDIDATES) + if not pad_indices: + return sample + padded = dict(sample) + for key, value in sample.items(): + if not (key.startswith("candidate_") or key.startswith("proposal_target_")): + continue + padded[key] = _pad_candidate_axis( + value, + source_candidates=source_candidates, + expected_candidates=EXPECTED_PROPOSAL_CANDIDATES, + pad_indices=pad_indices, + ) + return padded + + +def _normalize_cached_samples(samples: Sequence[dict[str, Any]]) -> list[dict[str, Any]]: + normalized_samples: list[dict[str, Any]] = [] + for sample in samples: + patched = dict(sample) + for key in ("depths", "depth_valid", "history_depths", "history_depth_valid"): + if key in patched: + patched[key] = _normalize_depth_array(patched[key]) + patched = _normalize_candidate_targets(patched) + normalized_samples.append(patched) + return normalized_samples + + +def _collect_split( + *, + task_spec: BridgeTaskSpec, + canonical_chunks: dict[str, np.ndarray], + split_name: str, + seeds: Sequence[int], + spec: SmokeSpec, + output_path: Path, +) -> dict[str, Any]: + obs_env = gym.make(task_spec.env_id, obs_mode="rgb+segmentation", render_mode="rgb_array") + sim_env = gym.make(task_spec.env_id, obs_mode="rgb+segmentation", render_mode="rgb_array") + samples: list[dict[str, Any]] = [] + episode_records: list[dict[str, Any]] = [] + try: + for episode_seed in seeds: + obs, _ = obs_env.reset(seed=int(episode_seed)) + sim_env.reset(seed=int(episode_seed)) + _initialize_proxy_state(obs_env, task_spec, episode_seed=int(episode_seed)) + _sync_env_state(obs_env, sim_env) + obs = obs_env.get_obs(obs_env.get_info()) + episode_start_positions = _all_positions(obs_env, task_spec) + history: deque[dict[str, Any]] = deque(maxlen=spec.history_steps) + episode_success = False + for decision_step in range(spec.max_macro_steps): + obs_bundle = _extract_sensor_bundle(obs, resolution=spec.resolution) + proprio = _build_proprio(obs_env) + snapshot = _snapshot_env(obs_env) + candidate_metrics = [ + _evaluate_candidate(task_spec, sim_env, obs_env, snapshot, mode_name) for mode_name in task_spec.mode_order + ] + candidate_chunks = np.stack([canonical_chunks[mode_name] for mode_name in task_spec.mode_order], axis=0).astype(np.float32) + utilities = np.asarray([payload["candidate_utility"] for payload in candidate_metrics], dtype=np.float32) + selected_mode = _select_expert_mode(task_spec, decision_step=decision_step, candidate_metrics=candidate_metrics) + state_targets = _current_state_targets( + task_spec, + env=obs_env, + obs_bundle=obs_bundle, + candidate_metrics=candidate_metrics, + episode_start_positions=episode_start_positions, + selected_mode=selected_mode, + ) + rollout_targets_by_mode = [ + _candidate_rollout_targets(task_spec, mode_name=mode_name, state_targets=state_targets, candidate_payload=payload) + for mode_name, payload in zip(task_spec.mode_order, candidate_metrics) + ] + sample = { + "images": obs_bundle["images"].copy(), + "depths": obs_bundle["depths"].copy(), + "depth_valid": obs_bundle["depth_valid"].copy(), + "camera_intrinsics": obs_bundle["camera_intrinsics"].copy(), + "camera_extrinsics": obs_bundle["camera_extrinsics"].copy(), + "history_images": _history_stack( + history, + "images", + pad_shape=obs_bundle["images"].shape, + dtype=np.uint8, + history_steps=spec.history_steps, + ), + "history_depths": _history_stack( + history, + "depths", + pad_shape=obs_bundle["depths"].shape, + dtype=np.float32, + history_steps=spec.history_steps, + ), + "history_depth_valid": _history_stack( + history, + "depth_valid", + pad_shape=obs_bundle["depth_valid"].shape, + dtype=np.float32, + history_steps=spec.history_steps, + ), + "history_camera_intrinsics": _history_stack( + history, + "camera_intrinsics", + pad_shape=obs_bundle["camera_intrinsics"].shape, + dtype=np.float32, + history_steps=spec.history_steps, + ), + "history_camera_extrinsics": _history_stack( + history, + "camera_extrinsics", + pad_shape=obs_bundle["camera_extrinsics"].shape, + dtype=np.float32, + history_steps=spec.history_steps, + ), + "history_proprio": _history_stack( + history, + "proprio", + pad_shape=(PROPRIO_DIM,), + dtype=np.float32, + history_steps=spec.history_steps, + ), + "history_actions": _history_stack( + history, + "action", + pad_shape=(14,), + dtype=np.float32, + history_steps=spec.history_steps, + ), + "proprio": proprio.astype(np.float32), + "language_goal": task_spec.text_prompt, + "task_name": task_spec.task_name, + "task_id": TASK_INDEX[task_spec.task_name], + "action_chunk": canonical_chunks[selected_mode].copy(), + "candidate_action_chunks": candidate_chunks, + "candidate_retrieval_success": np.asarray([payload["candidate_retrieval_success"] for payload in candidate_metrics], dtype=np.float32), + "candidate_final_disturbance_cost": np.asarray( + [payload["candidate_final_disturbance_cost"] for payload in candidate_metrics], + dtype=np.float32, + ), + "candidate_reocclusion_rate": np.asarray([payload["candidate_reocclusion_rate"] for payload in candidate_metrics], dtype=np.float32), + "candidate_visibility_integral": np.asarray( + [payload["candidate_visibility_integral"] for payload in candidate_metrics], + dtype=np.float32, + ), + "candidate_actor_feasibility_auc": np.asarray( + [payload["candidate_actor_feasibility_auc"] for payload in candidate_metrics], + dtype=np.float32, + ), + "candidate_reveal_achieved": np.asarray([payload["candidate_reveal_achieved"] for payload in candidate_metrics], dtype=np.float32), + "candidate_hold_persistence": np.asarray([payload["candidate_hold_persistence"] for payload in candidate_metrics], dtype=np.float32), + "candidate_support_stability_auc": np.asarray( + [payload["candidate_support_stability_auc"] for payload in candidate_metrics], + dtype=np.float32, + ), + "candidate_disturbance_auc": np.asarray([payload["candidate_disturbance_auc"] for payload in candidate_metrics], dtype=np.float32), + "candidate_risk": np.asarray([payload["candidate_risk"] for payload in candidate_metrics], dtype=np.float32), + "candidate_utility": utilities, + "candidate_rollout_support_mode": np.stack( + [payload["candidate_rollout_support_mode"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.int64), + "candidate_rollout_corridor_feasible": np.stack( + [payload["candidate_rollout_corridor_feasible"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_persistence_horizon": np.stack( + [payload["candidate_rollout_persistence_horizon"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_disturbance_cost": np.stack( + [payload["candidate_rollout_disturbance_cost"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_belief_map": np.stack( + [payload["candidate_rollout_belief_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_visibility_map": np.stack( + [payload["candidate_rollout_visibility_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_clearance_map": np.stack( + [payload["candidate_rollout_clearance_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_support_stability": np.stack( + [payload["candidate_rollout_support_stability"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_reocclusion_target": np.stack( + [payload["candidate_rollout_reocclusion_target"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_occluder_contact_map": np.stack( + [payload["candidate_rollout_occluder_contact_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_grasp_affordance_map": np.stack( + [payload["candidate_rollout_grasp_affordance_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_support_mode": np.stack( + [payload["candidate_rollout_support_mode"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.int64), + "proposal_target_rollout_corridor_feasible": np.stack( + [payload["candidate_rollout_corridor_feasible"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_persistence_horizon": np.stack( + [payload["candidate_rollout_persistence_horizon"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_disturbance_cost": np.stack( + [payload["candidate_rollout_disturbance_cost"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_belief_map": np.stack( + [payload["candidate_rollout_belief_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_visibility_map": np.stack( + [payload["candidate_rollout_visibility_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_clearance_map": np.stack( + [payload["candidate_rollout_clearance_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_support_stability": np.stack( + [payload["candidate_rollout_support_stability"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_reocclusion_target": np.stack( + [payload["candidate_rollout_reocclusion_target"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_occluder_contact_map": np.stack( + [payload["candidate_rollout_occluder_contact_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_grasp_affordance_map": np.stack( + [payload["candidate_rollout_grasp_affordance_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "episode_seed": int(episode_seed), + "decision_step": int(decision_step), + "selected_mode": selected_mode, + **state_targets, + } + samples.append(sample) + _execute_mode(obs_env, task_spec, selected_mode) + obs = obs_env.get_obs(obs_env.get_info()) + post_bundle = _extract_sensor_bundle(obs, resolution=spec.resolution) + history.append(_init_history_entry(obs_bundle, proprio, canonical_chunks[selected_mode])) + if ( + _bag_success(obs_env) + if task_spec.key == "bag" + else _cloth_success( + obs_env, + start_positions=episode_start_positions, + current_visibility=_source_visibility(post_bundle, int(getattr(_source_actor(obs_env), "per_scene_id", -1))), + ) + ): + episode_success = True + break + episode_records.append({"episode_seed": int(episode_seed), "success": episode_success, "steps": len(history)}) + print( + json.dumps( + { + "phase": "collect_episode_complete", + "task": task_spec.key, + "split": split_name, + "episode_seed": int(episode_seed), + "success": episode_success, + "steps": len(history), + "samples_collected": len(samples), + } + ), + flush=True, + ) + finally: + obs_env.close() + sim_env.close() + payload = { + "split_name": split_name, + "resolution": spec.resolution, + "history_steps": spec.history_steps, + "samples": samples, + "episode_records": episode_records, + } + output_path.parent.mkdir(parents=True, exist_ok=True) + torch.save(payload, output_path) + return payload + + +def _manual_train_spec(task_spec: BridgeTaskSpec, variant: str, spec: SmokeSpec) -> dict[str, Any]: + return { + "track_id": task_spec.track_id, + "suite": task_spec.suite, + "benchmark_task": task_spec.benchmark_task, + "model_variant": str(variant), + "seed": int(spec.train_seed), + "train_demos": int(spec.train_episodes), + "val_demos": int(spec.val_episodes), + "init_checkpoint_group": str(DEFAULT_INIT_CHECKPOINT), + "optimizer": "adamw", + "learning_rate": float(spec.learning_rate), + "lr_schedule": "constant", + "batch_size": int(spec.batch_size), + "augmentations": "none", + "early_stopping_metric": "val_total", + "max_gradient_steps": int(spec.epochs * math.ceil(max(1, spec.train_episodes) / max(1, spec.batch_size))), + "unfreeze_scope": "fusion_memory_decoder", + "dataset_split_id": ( + f"{task_spec.key}_{SMOKE_VERSION}_seed{spec.dataset_seed}" + if int(spec.dataset_seed) == DEFAULT_SEED + else f"{task_spec.key}_{SMOKE_VERSION}_dataset_seed{spec.dataset_seed}" + ), + "same_data_policy": True, + "same_init_policy": True, + } + + +def _trainer_config_for_variant(variant: str) -> TrainerConfig: + if variant == "trunk_only_ft": + return TrainerConfig( + policy_type="trunk_only", + trainable_parameter_prefixes=("fusion", "memory", "decoder"), + eval_mode="trunk_only", + ) + if variant == "adapter_active_ft": + return TrainerConfig( + policy_type="adapter_wrapped", + trainable_parameter_prefixes=( + "trunk.fusion", + "trunk.memory", + "trunk.decoder", + "adapter.state_head", + "adapter.transition_model", + "adapter.proposal_prior", + "adapter.planner", + ), + adapter_mode="adapter_active", + eval_mode="adapter_active", + adapter_use_transition_model=True, + adapter_use_task_conditioning=True, + adapter_action_supervision_source="trunk", + ) + raise KeyError(f"Unsupported variant {variant!r}.") + + +def _loss_weights_for_smoke(task_spec: BridgeTaskSpec) -> LossWeights: + return LossWeights( + action=1.0, + support_mode=0.15, + corridor=0.15, + persistence=0.08, + disturbance=0.08, + planner_success=0.20, + planner_risk=0.08, + planner_ranking=0.20, + proposal_reconstruction=0.10, + proposal_success=0.12, + proposal_ranking=0.15, + proposal_mode=0.10, + proposal_diversity=0.02, + task_metrics=0.15, + transition=0.25, + gate=0.25, + calibration=0.10, + proposal_mode_task_filter=[task_spec.task_name], + ) + + +def _train_variant( + *, + task_spec: BridgeTaskSpec, + variant: str, + train_samples: Sequence[dict[str, Any]], + val_samples: Sequence[dict[str, Any]], + spec: SmokeSpec, + output_dir: Path, +) -> tuple[Path, dict[str, Any]]: + policy_config, _init_trainer_cfg, _init_loss_weights = _load_init_bundle() + policy_config = _apply_smoke_planner_overrides(policy_config) + trainer_config = _trainer_config_for_variant(variant) + loss_weights = _loss_weights_for_smoke(task_spec) + model = build_policy(policy_config, trainer_config) + init_info = _load_init_checkpoint(model, str(DEFAULT_INIT_CHECKPOINT), False) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = model.to(device) + torch.manual_seed(spec.train_seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(spec.train_seed) + torch.backends.cuda.matmul.allow_tf32 = True + matched = apply_trainable_parameter_prefixes(model, trainer_config) + optimizer = torch.optim.AdamW( + [parameter for parameter in model.parameters() if parameter.requires_grad], + lr=spec.learning_rate, + weight_decay=spec.weight_decay, + ) + trainer = BimanualTrainer(model=model, optimizer=optimizer, config=trainer_config) + train_loader = _make_loader(train_samples, batch_size=spec.batch_size, num_workers=spec.num_workers, shuffle=True) + val_loader = _make_loader(val_samples, batch_size=spec.batch_size, num_workers=spec.num_workers, shuffle=False) + best_val = math.inf + history: list[dict[str, Any]] = [] + train_spec = _manual_train_spec(task_spec, variant, spec) + train_spec["max_gradient_steps"] = len(train_loader) * spec.epochs + for epoch in range(spec.epochs): + model.train() + train_losses: list[dict[str, float]] = [] + for batch in train_loader: + moved = _move_batch_to_device(batch, device) + loss_dict = trainer.training_step(moved, loss_weights=loss_weights) + train_losses.append({key: float(value.detach().cpu()) for key, value in loss_dict.items()}) + model.eval() + val_losses: list[dict[str, float]] = [] + with torch.no_grad(): + for batch in val_loader: + moved = _move_batch_to_device(batch, device) + forward_kwargs = { + "images": moved["images"], + "proprio": moved["proprio"], + "texts": moved["texts"], + "task_names": moved.get("task_name"), + "task_ids": moved.get("task_id"), + "history_images": moved.get("history_images"), + "history_proprio": moved.get("history_proprio"), + "history_actions": moved.get("history_actions"), + "depths": moved.get("depths"), + "depth_valid": moved.get("depth_valid"), + "camera_intrinsics": moved.get("camera_intrinsics"), + "camera_extrinsics": moved.get("camera_extrinsics"), + "history_depths": moved.get("history_depths"), + "history_depth_valid": moved.get("history_depth_valid"), + "history_camera_intrinsics": moved.get("history_camera_intrinsics"), + "history_camera_extrinsics": moved.get("history_camera_extrinsics"), + } + if variant == "adapter_active_ft": + forward_kwargs["adapter_mode"] = "adapter_active" + forward_kwargs["use_transition_model"] = True + forward_kwargs["use_task_conditioning"] = True + outputs = model(**forward_kwargs) + losses = compute_total_loss(outputs, moved, weights=loss_weights) + val_losses.append({key: float(value.detach().cpu()) for key, value in losses.items()}) + train_summary = _aggregate_epoch(train_losses) + val_summary = _aggregate_epoch(val_losses) + history.append({"epoch": epoch, "train": train_summary, "val": val_summary}) + print( + json.dumps( + { + "phase": "epoch_complete", + "task": task_spec.key, + "variant": variant, + "epoch": epoch, + "train_total": train_summary.get("total", 0.0), + "val_total": val_summary.get("total", 0.0), + } + ), + flush=True, + ) + if val_summary.get("total", math.inf) <= best_val: + best_val = val_summary["total"] + checkpoint_path = _save_training_checkpoint( + output_dir=output_dir, + experiment_name=f"{task_spec.key}_{variant}_seed{spec.train_seed}", + model=model, + policy_config=policy_config, + trainer_config=trainer_config, + loss_weights=loss_weights, + history=history, + best_val=best_val, + train_spec=train_spec, + ) + (output_dir / "summary.json").write_text( + json.dumps( + { + "task": task_spec.key, + "variant": variant, + "checkpoint_path": str(checkpoint_path), + "init_info": init_info, + "trainable_parameter_names": matched, + "best_val_total": best_val, + "history": history, + "train_spec": train_spec, + }, + indent=2, + ) + + "\n", + encoding="utf-8", + ) + return output_dir / "checkpoint_best.pt", train_spec + + +def _eval_mode_name(model_output: dict[str, Any], result_mode_name: str, canonical_chunks: dict[str, np.ndarray]) -> tuple[str, bool, bool]: + if result_mode_name == "adapter_active_ft" and "proposal_mode_names" in model_output and "best_candidate_indices" in model_output: + active_mask = bool(_np(model_output.get("adapter_active_mask", np.asarray([False]))).reshape(-1)[0]) + if not active_mask: + return _classify_mode_from_chunk(_np(model_output["action_mean"])[0], canonical_chunks), False, False + best_index = int(_np(model_output["best_candidate_indices"])[0]) + proposal_mode_names = model_output["proposal_mode_names"][0] + if best_index < len(proposal_mode_names): + mode_name = str(proposal_mode_names[best_index]) + else: + mode_name = _classify_mode_from_chunk(_np(model_output["action_mean"])[0], canonical_chunks) + return mode_name, active_mask, bool(best_index > 0) + return _classify_mode_from_chunk(_np(model_output["action_mean"])[0], canonical_chunks), False, False + + +def _manual_eval_protocol(task_spec: BridgeTaskSpec, *, eval_mode: str, spec: SmokeSpec, episodes: int) -> dict[str, Any]: + return { + "track_id": task_spec.track_id, + "suite": task_spec.suite, + "benchmark_task": task_spec.benchmark_task, + "role": "target", + "eval_mode": eval_mode, + "seed": int(spec.dataset_seed), + "episodes": int(episodes), + "resolution": int(spec.resolution), + "cameras": tuple(CAMERA_NAMES), + "observation_stack": "rgb_triplicate_zero_depth", + "action_horizon": 8, + "action_space": "widowx_delta_pose", + "same_test_episodes": True, + } + + +def _batch_from_obs( + task_spec: BridgeTaskSpec, + obs_bundle: dict[str, np.ndarray], + proprio: np.ndarray, + history: Sequence[dict[str, Any]], + device: torch.device, +) -> dict[str, Any]: + return { + "images": torch.from_numpy(obs_bundle["images"]).permute(0, 3, 1, 2).unsqueeze(0).float().div(255.0).to(device), + "depths": torch.from_numpy(obs_bundle["depths"]).unsqueeze(0).float().to(device), + "depth_valid": torch.from_numpy(obs_bundle["depth_valid"]).unsqueeze(0).float().to(device), + "camera_intrinsics": torch.from_numpy(obs_bundle["camera_intrinsics"]).unsqueeze(0).float().to(device), + "camera_extrinsics": torch.from_numpy(obs_bundle["camera_extrinsics"]).unsqueeze(0).float().to(device), + "history_images": torch.from_numpy( + _history_stack(history, "images", pad_shape=obs_bundle["images"].shape, dtype=np.uint8, history_steps=HISTORY_STEPS) + ).permute(0, 1, 4, 2, 3).unsqueeze(0).float().div(255.0).to(device), + "history_depths": torch.from_numpy( + _history_stack(history, "depths", pad_shape=obs_bundle["depths"].shape, dtype=np.float32, history_steps=HISTORY_STEPS) + ).unsqueeze(0).float().to(device), + "history_depth_valid": torch.from_numpy( + _history_stack( + history, + "depth_valid", + pad_shape=obs_bundle["depth_valid"].shape, + dtype=np.float32, + history_steps=HISTORY_STEPS, + ) + ).unsqueeze(0).float().to(device), + "history_camera_intrinsics": torch.from_numpy( + _history_stack( + history, + "camera_intrinsics", + pad_shape=obs_bundle["camera_intrinsics"].shape, + dtype=np.float32, + history_steps=HISTORY_STEPS, + ) + ).unsqueeze(0).float().to(device), + "history_camera_extrinsics": torch.from_numpy( + _history_stack( + history, + "camera_extrinsics", + pad_shape=obs_bundle["camera_extrinsics"].shape, + dtype=np.float32, + history_steps=HISTORY_STEPS, + ) + ).unsqueeze(0).float().to(device), + "history_proprio": torch.from_numpy( + _history_stack(history, "proprio", pad_shape=(PROPRIO_DIM,), dtype=np.float32, history_steps=HISTORY_STEPS) + ).unsqueeze(0).float().to(device), + "history_actions": torch.from_numpy( + _history_stack(history, "action", pad_shape=(14,), dtype=np.float32, history_steps=HISTORY_STEPS) + ).unsqueeze(0).float().to(device), + "proprio": torch.from_numpy(proprio).unsqueeze(0).float().to(device), + "texts": [task_spec.text_prompt], + "task_names": [task_spec.task_name], + "task_ids": torch.as_tensor([TASK_INDEX[task_spec.task_name]], dtype=torch.long, device=device), + } + + +def _evaluate_checkpoint( + *, + task_spec: BridgeTaskSpec, + canonical_chunks: dict[str, np.ndarray], + checkpoint_path: Path, + adapter_mode: str, + result_mode_name: str, + seeds: Sequence[int], + report_path: Path, + train_spec: dict[str, Any] | None, + planner_overrides: dict[str, float] | None, +) -> dict[str, Any]: + model, checkpoint = _load_checkpoint( + checkpoint_path, + adapter_mode=adapter_mode if adapter_mode != "trunk_only" else None, + planner_overrides=planner_overrides, + ) + device = next(model.parameters()).device + obs_env = gym.make(task_spec.env_id, obs_mode="rgb+segmentation", render_mode="rgb_array") + sim_env = gym.make(task_spec.env_id, obs_mode="rgb+segmentation", render_mode="rgb_array") + successes: list[int] = [] + episode_records: list[dict[str, Any]] = [] + reveal_steps: list[int] = [] + retrieve_steps: list[int] = [] + disturbance_values: list[float] = [] + intervention_events = 0 + non_base_events = 0 + total_decisions = 0 + try: + for episode_seed in seeds: + obs, _ = obs_env.reset(seed=int(episode_seed)) + sim_env.reset(seed=int(episode_seed)) + _initialize_proxy_state(obs_env, task_spec, episode_seed=int(episode_seed)) + _sync_env_state(obs_env, sim_env) + obs = obs_env.get_obs(obs_env.get_info()) + history: deque[dict[str, Any]] = deque(maxlen=HISTORY_STEPS) + episode_start_positions = _all_positions(obs_env, task_spec) + success = False + first_reveal_step: int | None = None + first_retrieve_step: int | None = None + episode_disturbance: list[float] = [] + for decision_step in range(MAX_MACRO_STEPS): + obs_bundle = _extract_sensor_bundle(obs, resolution=IMAGE_RESOLUTION) + proprio = _build_proprio(obs_env) + batch = _batch_from_obs(task_spec, obs_bundle, proprio, list(history), device) + with torch.no_grad(): + if adapter_mode == "trunk_only": + outputs = model(**batch) + else: + outputs = model(**batch, adapter_mode=adapter_mode, use_transition_model=True, use_task_conditioning=True) + selected_mode, active_mask, non_base = _eval_mode_name(outputs, result_mode_name, canonical_chunks) + start_positions = _all_positions(obs_env, task_spec) + _sync_env_state(obs_env, sim_env) + _execute_mode(sim_env, task_spec, selected_mode) + _sync_env_state(sim_env, obs_env) + obs = obs_env.get_obs(obs_env.get_info()) + post_bundle = _extract_sensor_bundle(obs, resolution=IMAGE_RESOLUTION) + end_metrics = _candidate_metrics( + obs_env, + task_spec=task_spec, + start_positions=start_positions, + current_obs_bundle=post_bundle, + ) + history.append(_init_history_entry(obs_bundle, proprio, canonical_chunks.get(selected_mode, canonical_chunks["base_action"]))) + total_decisions += 1 + intervention_events += int(active_mask) + non_base_events += int(non_base) + episode_disturbance.append(end_metrics["disturbance"]) + if selected_mode != "retrieve" and selected_mode != "base_action" and first_reveal_step is None: + first_reveal_step = decision_step + 1 + if selected_mode == "retrieve" and first_retrieve_step is None: + first_retrieve_step = decision_step + 1 + if ( + _bag_success(obs_env) + if task_spec.key == "bag" + else _cloth_success( + obs_env, + start_positions=episode_start_positions, + current_visibility=end_metrics["visibility"], + ) + ): + success = True + break + successes.append(int(success)) + if first_reveal_step is not None: + reveal_steps.append(first_reveal_step) + if first_retrieve_step is not None: + retrieve_steps.append(first_retrieve_step) + disturbance_values.append(float(np.mean(episode_disturbance)) if episode_disturbance else 0.0) + episode_records.append( + { + "episode_seed": int(episode_seed), + "success": success, + "steps": len(history), + "first_reveal_step": first_reveal_step, + "first_retrieve_step": first_retrieve_step, + "episode_disturbance": float(np.mean(episode_disturbance)) if episode_disturbance else 0.0, + } + ) + print( + json.dumps( + { + "phase": "eval_episode_complete", + "task": task_spec.key, + "adapter_mode": result_mode_name, + "episode_seed": int(episode_seed), + "success": success, + "steps": len(history), + } + ), + flush=True, + ) + finally: + obs_env.close() + sim_env.close() + payload = { + "track_id": task_spec.track_id, + "suite": task_spec.suite, + "benchmark_task": task_spec.benchmark_task, + "role": "target", + "adapter_mode": result_mode_name, + "episodes": len(seeds), + "successes": successes, + "success_rate": float(np.mean(successes)) if successes else 0.0, + "intervention_rate": float(intervention_events / max(1, total_decisions)), + "non_base_selection_rate": float(non_base_events / max(1, total_decisions)), + "steps_to_first_reveal_or_access": float(np.mean(reveal_steps)) if reveal_steps else float(MAX_MACRO_STEPS), + "steps_to_retrieve": float(np.mean(retrieve_steps)) if retrieve_steps else float(MAX_MACRO_STEPS), + "disturbance_proxy": float(np.mean(disturbance_values)) if disturbance_values else 0.0, + "episode_records": episode_records, + "eval_protocol": _manual_eval_protocol(task_spec, eval_mode=result_mode_name, spec=SmokeSpec(), episodes=len(seeds)), + "proxy_notes": task_spec.notes, + } + if train_spec is not None: + payload["train_spec"] = train_spec + report_path.parent.mkdir(parents=True, exist_ok=True) + report_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") + return payload + + +def _patch_summary_metadata(summary: dict[str, Any], task_spec: BridgeTaskSpec) -> dict[str, Any]: + patched = json.loads(json.dumps(summary)) + track_payload = patched.get("tracks", {}).get(task_spec.track_id) + if track_payload is not None: + track_payload["suite"] = task_spec.suite + track_payload["benchmark_task"] = task_spec.benchmark_task + track_payload["notes"] = task_spec.notes + track_payload["public_source"] = f"ManiSkill public scene proxy: {task_spec.env_id}" + track_payload["task_family"] = f"{task_spec.key}_retrieval_proxy" + track_payload["target_behavior"] = task_spec.text_prompt + return patched + + +def _summarize_task(task_spec: BridgeTaskSpec, results: Sequence[dict[str, Any]], output_dir: Path) -> dict[str, Any]: + summary = summarize_public_benchmark_package(list(results), allow_partial=True) + summary = _patch_summary_metadata(summary, task_spec) + output_dir.mkdir(parents=True, exist_ok=True) + json_path = output_dir / "public_benchmark_package_summary.json" + md_path = output_dir / "public_benchmark_package_summary.md" + json_path.write_text(json.dumps(summary, indent=2, sort_keys=True) + "\n", encoding="utf-8") + track_payload = summary["tracks"][task_spec.track_id] + lines = [ + f"# ManiSkill {task_spec.key.capitalize()} Retrieval Smoke Summary", + "", + f"- benchmark_task: {task_spec.benchmark_task}", + f"- target_macro_average_delta: {summary['target_macro_average_delta']:.3f}", + f"- headline_pass: {summary['headline_pass']}", + f"- sign_of_life_pass: {summary['sign_of_life_pass']}", + "", + f"## {task_spec.track_id}", + f"- delta_active_vs_trunk: {track_payload.get('delta_active_vs_trunk', 0.0):.3f}", + f"- delta_noop_vs_trunk: {track_payload.get('delta_noop_vs_trunk', 0.0):.3f}", + f"- signs_of_life: {track_payload.get('signs_of_life', False)}", + ] + if "delta_active_vs_trunk_ci95" in track_payload: + low, high = track_payload["delta_active_vs_trunk_ci95"] + lines.append(f"- delta_active_vs_trunk_ci95: [{low:.3f}, {high:.3f}]") + for mode, mode_payload in track_payload["modes"].items(): + lines.append(f"- {mode}: mean_success={mode_payload['mean_success']:.3f}") + lines.append("") + md_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") + return summary + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Run a fair bridge-scene retrieval smoke for bag or cloth proxy tasks.") + parser.add_argument("--task", choices=sorted(TASK_SPECS), required=True) + parser.add_argument("--dataset-seed", type=int, default=DEFAULT_SEED) + parser.add_argument("--train-seed", type=int, default=DEFAULT_SEED) + parser.add_argument("--eval-split", choices=("val", "eval"), default="eval") + parser.add_argument("--report-dir", type=Path, default=None) + parser.add_argument("--skip-collection", action="store_true") + parser.add_argument("--skip-train", action="store_true") + parser.add_argument("--skip-eval", action="store_true") + parser.add_argument("--reuse-dataset", action="store_true") + parser.add_argument("--reuse-checkpoints", action="store_true") + parser.add_argument("--adapter-confidence-threshold", type=float, default=None) + parser.add_argument("--retrieve-access-threshold", type=float, default=None) + parser.add_argument("--retrieve-persistence-threshold", type=float, default=None) + parser.add_argument("--retrieve-support-threshold", type=float, default=None) + parser.add_argument("--retrieve-reocclusion-threshold", type=float, default=None) + parser.add_argument("--planner-mode-preference-bonus", type=float, default=None) + parser.add_argument("--planner-premature-retrieve-penalty", type=float, default=None) + parser.add_argument("--planner-premature-insert-penalty", type=float, default=None) + parser.add_argument("--planner-premature-occlusion-sweep-penalty", type=float, default=None) + parser.add_argument("--planner-premature-maintain-penalty", type=float, default=None) + parser.add_argument("--planner-retrieve-stage-access-threshold", type=float, default=None) + parser.add_argument("--planner-retrieve-stage-reveal-threshold", type=float, default=None) + parser.add_argument("--planner-retrieve-stage-persistence-threshold", type=float, default=None) + parser.add_argument("--planner-retrieve-stage-support-threshold", type=float, default=None) + parser.add_argument("--planner-insert-stage-access-threshold", type=float, default=None) + parser.add_argument("--planner-insert-stage-visibility-threshold", type=float, default=None) + parser.add_argument("--planner-insert-stage-support-threshold", type=float, default=None) + parser.add_argument("--planner-occlusion-maintain-gap-min-access", type=float, default=None) + parser.add_argument("--planner-occlusion-maintain-gap-min-visibility", type=float, default=None) + return parser.parse_args() + + +def _planner_overrides_from_args(args: argparse.Namespace) -> dict[str, float]: + overrides = { + "adapter_confidence_threshold": SMOKE_ADAPTER_CONFIDENCE_THRESHOLD, + "retrieve_access_threshold": SMOKE_RETRIEVE_ACCESS_THRESHOLD, + "retrieve_persistence_threshold": SMOKE_RETRIEVE_PERSISTENCE_THRESHOLD, + "retrieve_support_threshold": SMOKE_RETRIEVE_SUPPORT_THRESHOLD, + "retrieve_reocclusion_threshold": SMOKE_RETRIEVE_REOCCLUSION_THRESHOLD, + } + optional_pairs = ( + ("adapter_confidence_threshold", args.adapter_confidence_threshold), + ("retrieve_access_threshold", args.retrieve_access_threshold), + ("retrieve_persistence_threshold", args.retrieve_persistence_threshold), + ("retrieve_support_threshold", args.retrieve_support_threshold), + ("retrieve_reocclusion_threshold", args.retrieve_reocclusion_threshold), + ("mode_preference_bonus", args.planner_mode_preference_bonus), + ("premature_retrieve_penalty", args.planner_premature_retrieve_penalty), + ("premature_insert_penalty", args.planner_premature_insert_penalty), + ("premature_occlusion_sweep_penalty", args.planner_premature_occlusion_sweep_penalty), + ("premature_maintain_penalty", args.planner_premature_maintain_penalty), + ("retrieve_stage_access_threshold", args.planner_retrieve_stage_access_threshold), + ("retrieve_stage_reveal_threshold", args.planner_retrieve_stage_reveal_threshold), + ("retrieve_stage_persistence_threshold", args.planner_retrieve_stage_persistence_threshold), + ("retrieve_stage_support_threshold", args.planner_retrieve_stage_support_threshold), + ("insert_stage_access_threshold", args.planner_insert_stage_access_threshold), + ("insert_stage_visibility_threshold", args.planner_insert_stage_visibility_threshold), + ("insert_stage_support_threshold", args.planner_insert_stage_support_threshold), + ("occlusion_maintain_gap_min_access", args.planner_occlusion_maintain_gap_min_access), + ("occlusion_maintain_gap_min_visibility", args.planner_occlusion_maintain_gap_min_visibility), + ) + for key, value in optional_pairs: + if value is not None: + overrides[key] = value + return overrides + + +def main() -> None: + args = _parse_args() + task_spec = _task_spec(args.task) + spec = SmokeSpec(dataset_seed=int(args.dataset_seed), train_seed=int(args.train_seed)) + canonical_chunks = _canonical_chunks(task_spec) + paths = _default_paths(task_spec) + report_dir = args.report_dir or paths.report_dir + planner_overrides = _planner_overrides_from_args(args) + split_path = _dataset_artifact_path(paths.data_dir, "episode_splits.json", dataset_seed=spec.dataset_seed) + if split_path.exists(): + episode_splits = json.loads(split_path.read_text(encoding="utf-8")) + else: + episode_splits = _build_episode_splits(task_spec, spec) + _save_episode_splits(split_path, episode_splits) + + train_path = _dataset_artifact_path(paths.data_dir, "train.pt", dataset_seed=spec.dataset_seed) + val_path = _dataset_artifact_path(paths.data_dir, "val.pt", dataset_seed=spec.dataset_seed) + if args.skip_collection and (not train_path.exists() or not val_path.exists()): + raise FileNotFoundError("Requested --skip-collection but cached dataset files are missing.") + if not args.skip_collection and (not args.reuse_dataset or not train_path.exists() or not val_path.exists()): + train_payload = _collect_split( + task_spec=task_spec, + canonical_chunks=canonical_chunks, + split_name="train", + seeds=episode_splits["train"], + spec=spec, + output_path=train_path, + ) + val_payload = _collect_split( + task_spec=task_spec, + canonical_chunks=canonical_chunks, + split_name="val", + seeds=episode_splits["val"], + spec=spec, + output_path=val_path, + ) + else: + train_payload = torch.load(train_path, map_location="cpu", weights_only=False) + val_payload = torch.load(val_path, map_location="cpu", weights_only=False) + + train_samples = _normalize_cached_samples(train_payload["samples"]) + val_samples = _normalize_cached_samples(val_payload["samples"]) + checkpoints: dict[str, Path] = {} + train_specs: dict[str, dict[str, Any]] = {} + for variant in ("trunk_only_ft", "adapter_active_ft"): + variant_output_dir = paths.output_dir / f"{variant}_seed{spec.train_seed}" + checkpoint_path = variant_output_dir / "checkpoint_best.pt" + if args.skip_train and not checkpoint_path.exists(): + raise FileNotFoundError(f"Requested --skip-train but checkpoint is missing: {checkpoint_path}") + if not args.skip_train and (not args.reuse_checkpoints or not checkpoint_path.exists()): + checkpoint_path, train_spec = _train_variant( + task_spec=task_spec, + variant=variant, + train_samples=train_samples, + val_samples=val_samples, + spec=spec, + output_dir=variant_output_dir, + ) + else: + summary_path = variant_output_dir / "summary.json" + if not summary_path.exists(): + raise FileNotFoundError(f"Missing cached summary file for {variant}: {summary_path}") + summary_payload = json.loads(summary_path.read_text(encoding="utf-8")) + train_spec = summary_payload["train_spec"] + checkpoints[variant] = checkpoint_path + train_specs[variant] = train_spec + + results: list[dict[str, Any]] = [] + if not args.skip_eval: + eval_plan = ( + ("trunk_only_ft", checkpoints["trunk_only_ft"], "trunk_only", None), + ("adapter_noop", checkpoints["adapter_active_ft"], "adapter_noop", None), + ("adapter_active_ft", checkpoints["adapter_active_ft"], "adapter_active", train_specs["adapter_active_ft"]), + ) + for result_mode_name, checkpoint_path, adapter_mode, train_spec in eval_plan: + result = _evaluate_checkpoint( + task_spec=task_spec, + canonical_chunks=canonical_chunks, + checkpoint_path=checkpoint_path, + adapter_mode=adapter_mode, + result_mode_name=result_mode_name, + seeds=episode_splits[args.eval_split], + report_path=report_dir / f"{result_mode_name}_seed{spec.train_seed}.json", + train_spec=train_spec if result_mode_name != "adapter_noop" else None, + planner_overrides=planner_overrides, + ) + if result_mode_name == "trunk_only_ft": + result["train_spec"] = train_specs["trunk_only_ft"] + (report_dir / f"{result_mode_name}_seed{spec.train_seed}.json").write_text( + json.dumps(result, indent=2) + "\n", + encoding="utf-8", + ) + results.append(result) + _summarize_task(task_spec, results, report_dir) + + +if __name__ == "__main__": + main() diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_maniskill_pickclutter_smoke.py b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_maniskill_pickclutter_smoke.py new file mode 100644 index 0000000000000000000000000000000000000000..5568c00b2c3e8740662d4aca48df781bf393e14a --- /dev/null +++ b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_maniskill_pickclutter_smoke.py @@ -0,0 +1,2005 @@ +from __future__ import annotations + +import argparse +import collections +import json +import math +import os +import time +from collections import deque +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Any, Iterable, Sequence + +import numpy as np +import torch +from omegaconf import OmegaConf +from torch import Tensor +from torch.utils.data import DataLoader, Dataset + +from eval.public_benchmark_package import build_public_eval_protocol, build_target_training_spec +from models.action_decoder import ChunkDecoderConfig, semantic_macro_chunk +from models.backbones import FrozenVLBackboneConfig +from models.multiview_fusion import MultiViewFusionConfig +from models.observation_memory import ObservationMemoryConfig +from models.planner import PlannerConfig +from models.policy import PolicyConfig +from models.reveal_head import RevealHeadConfig, TASK_METRIC_NAMES +from models.world_model import RevealWMConfig +from train.checkpoint_compat import filter_compatible_state_dict +from train.losses import LossWeights +from train.trainer import BimanualTrainer, TrainerConfig, apply_trainable_parameter_prefixes, build_policy + + +def _configure_runtime_env() -> None: + os.environ.setdefault("VK_ICD_FILENAMES", "/workspace/runtime/vulkan/icd.d/nvidia_icd_egl.json") + os.environ.setdefault("VK_LAYER_PATH", "/workspace/runtime/vulkan/implicit_layer.d") + os.environ.setdefault("XDG_RUNTIME_DIR", "/tmp/runtime-root") + os.environ.setdefault("MS_ASSET_DIR", "/workspace/data/maniskill") + + +_configure_runtime_env() + +import mani_skill.envs # noqa: E402 +import sapien # noqa: E402 +from mani_skill.envs.tasks.tabletop.pick_clutter_ycb import PickClutterYCBEnv # noqa: E402 +from mani_skill.sensors.camera import CameraConfig # noqa: E402 +from mani_skill.utils import sapien_utils # noqa: E402 +from mani_skill.utils.structs import Pose # noqa: E402 + +from eval.run_public_benchmark_package import summarize_public_benchmark_package # noqa: E402 +from models.action_decoder import TASK_INDEX # noqa: E402 +from train.run_experiment import _load_init_checkpoint, _move_batch_to_device # noqa: E402 + + +REPO_ROOT = Path(__file__).resolve().parents[3] +WORKSPACE_ROOT = Path("/workspace/workspace") +SMOKE_VERSION = "smoke_v5" +DEFAULT_DATA_DIR = WORKSPACE_ROOT / "data" / "maniskill_pickclutter" / SMOKE_VERSION +DEFAULT_OUTPUT_DIR = WORKSPACE_ROOT / "outputs" / f"maniskill_pickclutter_{SMOKE_VERSION}" +DEFAULT_REPORT_DIR = WORKSPACE_ROOT / "reports" / f"maniskill_pickclutter_{SMOKE_VERSION}" +DEFAULT_INIT_CHECKPOINT = Path( + "/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/" + "r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt" +) + +TEXT_PROMPT = "retrieve the target object from dense clutter and stage it at the front edge" +TASK_NAME = "foliage" +TASK_ID = TASK_INDEX[TASK_NAME] +CAMERA_NAMES = ("front", "left", "right") +MODE_ORDER = ( + "base_action", + "sweep_left", + "sweep_right", + "pin_canopy", + "widen_gap", + "maintain_gap", + "insert_actor", + "retrieve", +) +ROLL_OUT_HORIZON = 5 +NUM_SUPPORT_MODES = 3 +NUM_APPROACH_TEMPLATES = 32 +SUPPORT_MODE_HOLD = 0 +SUPPORT_MODE_TRANSFER = 1 +SUPPORT_MODE_PASSIVE = 2 +REVEAL_MODES = ("sweep_left", "sweep_right", "pin_canopy", "widen_gap", "maintain_gap") +TRANSFER_MODES = ("insert_actor",) +RETRIEVE_MODES = ("retrieve",) +STATE_SUPERVISION_METRICS = ( + "opening_quality", + "actor_feasibility_score", + "gap_width", + "damage_proxy", + "release_collapse_rate", + "target_visibility_confidence", + "insertable_actor_corridor", + "insertion_corridor", + "hold_quality", + "layer_separation_quality", + "fold_preservation", + "top_layer_stability", + "lift_too_much_risk", +) +MAX_MACRO_STEPS = 4 +HISTORY_STEPS = 6 +PROPRIO_DIM = 32 +EXTRACTION_LINE_Y = -0.22 +MIN_CLEARANCE_FOR_SUCCESS = 0.05 +DEFAULT_SEED = 17 +SMOKE_ADAPTER_CONFIDENCE_THRESHOLD = 0.50 +SMOKE_RETRIEVE_ACCESS_THRESHOLD = 0.08 +SMOKE_RETRIEVE_PERSISTENCE_THRESHOLD = 0.12 +SMOKE_RETRIEVE_SUPPORT_THRESHOLD = 0.08 +SMOKE_RETRIEVE_REOCCLUSION_THRESHOLD = 0.92 + + +@dataclass(frozen=True) +class SmokePaths: + data_dir: Path = DEFAULT_DATA_DIR + output_dir: Path = DEFAULT_OUTPUT_DIR + report_dir: Path = DEFAULT_REPORT_DIR + + +@dataclass(frozen=True) +class SmokeSpec: + resolution: int = 224 + train_episodes: int = 32 + val_episodes: int = 8 + eval_episodes: int = 50 + dataset_seed: int = DEFAULT_SEED + train_seed: int = DEFAULT_SEED + history_steps: int = HISTORY_STEPS + max_macro_steps: int = MAX_MACRO_STEPS + batch_size: int = 4 + epochs: int = 6 + num_workers: int = 16 + learning_rate: float = 1e-4 + weight_decay: float = 1e-4 + + @property + def seed(self) -> int: + return self.train_seed + + +def _apply_smoke_planner_overrides( + policy_config: PolicyConfig, + planner_overrides: dict[str, float] | None = None, +) -> PolicyConfig: + policy_config.planner.adapter_confidence_threshold = SMOKE_ADAPTER_CONFIDENCE_THRESHOLD + policy_config.planner.retrieve_access_threshold = SMOKE_RETRIEVE_ACCESS_THRESHOLD + policy_config.planner.retrieve_persistence_threshold = SMOKE_RETRIEVE_PERSISTENCE_THRESHOLD + policy_config.planner.retrieve_support_threshold = SMOKE_RETRIEVE_SUPPORT_THRESHOLD + policy_config.planner.retrieve_reocclusion_threshold = SMOKE_RETRIEVE_REOCCLUSION_THRESHOLD + if planner_overrides: + for key, value in planner_overrides.items(): + if value is None: + continue + setattr(policy_config.planner, key, value) + return policy_config + + +class PickClutterRevealEnv(PickClutterYCBEnv): + @property + def _default_sensor_configs(self): + resolution = 224 + return [ + CameraConfig( + "front", + pose=sapien_utils.look_at(eye=[0.30, 0.00, 0.62], target=[-0.06, 0.00, 0.04]), + width=resolution, + height=resolution, + fov=np.pi / 2, + near=0.01, + far=100.0, + ), + CameraConfig( + "left", + pose=sapien_utils.look_at(eye=[0.22, 0.34, 0.34], target=[-0.02, 0.02, 0.03]), + width=resolution, + height=resolution, + fov=np.pi / 2, + near=0.01, + far=100.0, + ), + CameraConfig( + "right", + pose=sapien_utils.look_at(eye=[0.22, -0.34, 0.34], target=[-0.02, -0.02, 0.03]), + width=resolution, + height=resolution, + fov=np.pi / 2, + near=0.01, + far=100.0, + ), + ] + + +def _np(value: Any, *, dtype: np.dtype | None = None) -> np.ndarray: + if isinstance(value, np.ndarray): + array = value + elif isinstance(value, Tensor): + array = value.detach().cpu().numpy() + else: + array = np.asarray(value) + if dtype is not None: + array = array.astype(dtype, copy=False) + return array + + +def _vec3(value: Any) -> np.ndarray: + return _np(value, dtype=np.float32).reshape(-1)[:3] + + +def _camera_intrinsic_from_param(param: dict[str, Any]) -> np.ndarray: + for key in ("intrinsic_cv", "intrinsic", "cam_intrinsic"): + if key in param: + matrix = _np(param[key], dtype=np.float32) + return matrix[0] if matrix.ndim == 3 else matrix + return np.eye(3, dtype=np.float32) + + +def _camera_extrinsic_from_param(param: dict[str, Any]) -> np.ndarray: + for key in ("cam2world_gl", "cam2world", "extrinsic_cv", "extrinsic"): + if key in param: + matrix = _np(param[key], dtype=np.float32) + return matrix[0] if matrix.ndim == 3 else matrix + return np.eye(4, dtype=np.float32) + + +def _convert_depth(depth: np.ndarray) -> np.ndarray: + depth = depth.astype(np.float32, copy=False) + if np.issubdtype(depth.dtype, np.integer): + depth = depth / 1000.0 + return depth + + +def _build_proprio(env: PickClutterRevealEnv) -> np.ndarray: + base = env.unwrapped + qpos = _np(base.agent.robot.get_qpos(), dtype=np.float32).reshape(-1) + qvel = _np(base.agent.robot.get_qvel(), dtype=np.float32).reshape(-1) + tcp_pose = _np(base.agent.tcp.pose.raw_pose, dtype=np.float32).reshape(-1) + gripper_width = qpos[-2:].sum(keepdims=True).astype(np.float32) + pieces = [qpos, qvel, tcp_pose, gripper_width] + flat = np.concatenate(pieces, axis=0) + if flat.shape[0] >= PROPRIO_DIM: + return flat[:PROPRIO_DIM] + padded = np.zeros((PROPRIO_DIM,), dtype=np.float32) + padded[: flat.shape[0]] = flat + return padded + + +def _extract_sensor_bundle(obs: dict[str, Any]) -> dict[str, np.ndarray]: + sensor_data = obs["sensor_data"] + sensor_param = obs["sensor_param"] + rgb_views: list[np.ndarray] = [] + depth_views: list[np.ndarray] = [] + seg_views: list[np.ndarray] = [] + intrinsics: list[np.ndarray] = [] + extrinsics: list[np.ndarray] = [] + for camera_name in CAMERA_NAMES: + view = sensor_data[camera_name] + param = sensor_param[camera_name] + rgb = _np(view["rgb"], dtype=np.uint8) + depth = _np(view["depth"]) + segmentation = _np(view["segmentation"]) + rgb = rgb[0] if rgb.ndim == 4 else rgb + depth = depth[0] if depth.ndim == 4 else depth + segmentation = segmentation[0] if segmentation.ndim == 4 else segmentation + if depth.ndim == 3 and depth.shape[-1] == 1: + depth = depth[..., 0] + if segmentation.ndim == 3 and segmentation.shape[-1] == 1: + segmentation = segmentation[..., 0] + rgb_views.append(rgb.astype(np.uint8, copy=False)) + depth_views.append(_convert_depth(depth)) + seg_views.append(segmentation.astype(np.int32, copy=False)) + intrinsics.append(_camera_intrinsic_from_param(param)) + extrinsics.append(_camera_extrinsic_from_param(param)) + depth_stack = np.stack(depth_views, axis=0).astype(np.float32) + depth_valid = (depth_stack > 1e-5).astype(np.float32) + return { + "images": np.stack(rgb_views, axis=0), + "depths": depth_stack[:, None, :, :], + "depth_valid": depth_valid[:, None, :, :], + "segmentations": np.stack(seg_views, axis=0), + "camera_intrinsics": np.stack(intrinsics, axis=0).astype(np.float32), + "camera_extrinsics": np.stack(extrinsics, axis=0).astype(np.float32), + } + + +def _target_actor(env: PickClutterRevealEnv) -> Any: + return env.unwrapped.target_object._objs[0] + + +def _all_scene_actors(env: PickClutterRevealEnv) -> list[Any]: + return list(env.unwrapped.all_objects._objs) + + +def _target_position(env: PickClutterRevealEnv) -> np.ndarray: + return _vec3(_target_actor(env).pose.p) + + +def _all_positions(env: PickClutterRevealEnv) -> dict[str, np.ndarray]: + return {actor.name: _vec3(actor.pose.p) for actor in _all_scene_actors(env)} + + +def _nearest_non_target_distance(env: PickClutterRevealEnv) -> float: + target = _target_actor(env) + target_xy = _vec3(target.pose.p)[:2] + distances = [] + for actor in _all_scene_actors(env): + if actor.name == target.name: + continue + distances.append(float(np.linalg.norm(_vec3(actor.pose.p)[:2] - target_xy))) + if not distances: + return 1.0 + return float(min(distances)) + + +def _success_from_state(env: PickClutterRevealEnv) -> bool: + target = _target_position(env) + return bool(target[1] <= EXTRACTION_LINE_Y and _nearest_non_target_distance(env) >= MIN_CLEARANCE_FOR_SUCCESS) + + +def _clearance_score(env: PickClutterRevealEnv) -> float: + return float(np.clip((_nearest_non_target_distance(env) - 0.03) / 0.09, 0.0, 1.0)) + + +def _extraction_progress(env: PickClutterRevealEnv) -> float: + y_value = _target_position(env)[1] + return float(np.clip(((-0.05) - y_value) / ((-0.05) - EXTRACTION_LINE_Y), 0.0, 1.0)) + + +def _target_visibility(obs_bundle: dict[str, np.ndarray], target_seg_id: int) -> float: + segmentation = obs_bundle["segmentations"] + fractions = [(view == int(target_seg_id)).mean() for view in segmentation] + return float(np.clip(np.mean(fractions) * 80.0, 0.0, 1.0)) + + +def _snapshot_env(env: PickClutterRevealEnv) -> dict[str, Any]: + base = env.unwrapped + return { + "state_dict": base.get_state_dict(), + "goal_pos": _np(base.goal_pos, dtype=np.float32).copy(), + } + + +def _restore_env(env: PickClutterRevealEnv, snapshot: dict[str, Any]) -> None: + base = env.unwrapped + state_dict = snapshot["state_dict"] + goal_pos = torch.as_tensor(snapshot["goal_pos"], dtype=torch.float32, device=base.device) + base.set_state_dict(state_dict) + base.goal_pos = goal_pos.view_as(base.goal_pos) + base.goal_site.set_pose(Pose.create_from_pq(base.goal_pos)) + + +def _sync_env_state(src_env: PickClutterRevealEnv, dst_env: PickClutterRevealEnv) -> None: + _restore_env(dst_env, _snapshot_env(src_env)) + + +def _canonical_chunks() -> dict[str, np.ndarray]: + base = torch.zeros((1, 8, 14), dtype=torch.float32) + chunks: dict[str, np.ndarray] = {"base_action": base.squeeze(0).numpy().astype(np.float32)} + for mode_name in MODE_ORDER[1:]: + chunk = semantic_macro_chunk(base, task_name=TASK_NAME, mode_name=mode_name).squeeze(0).cpu().numpy() + chunks[mode_name] = chunk.astype(np.float32) + return chunks + + +CANONICAL_CHUNKS = _canonical_chunks() +STATE_METRIC_MASK = np.asarray( + [metric_name in STATE_SUPERVISION_METRICS for metric_name in TASK_METRIC_NAMES], + dtype=np.bool_, +) + + +def _classify_mode_from_chunk(chunk: np.ndarray) -> str: + candidate = np.asarray(chunk, dtype=np.float32) + distances = { + mode_name: float(np.mean(np.abs(candidate - prototype))) + for mode_name, prototype in CANONICAL_CHUNKS.items() + } + return min(distances, key=distances.get) + + +def _gripper_action(open_gripper: bool) -> float: + return 1.0 if open_gripper else -1.0 + + +def _repeat_delta(env: PickClutterRevealEnv, delta_xyz: Sequence[float], *, open_gripper: bool, steps: int) -> dict[str, Any]: + last_obs: dict[str, Any] | None = None + action = np.zeros((1, 4), dtype=np.float32) + action[0, :3] = np.asarray(delta_xyz, dtype=np.float32) + action[0, 3] = _gripper_action(open_gripper) + for _ in range(int(steps)): + obs, _, terminated, truncated, info = env.step(action) + last_obs = obs + if bool(np.asarray(terminated).reshape(-1)[0]) or bool(np.asarray(truncated).reshape(-1)[0]): + break + return { + "obs": last_obs if last_obs is not None else env.get_obs(env.get_info()), + "terminated": False, + "truncated": False, + "info": info if last_obs is not None else env.get_info(), + } + + +def _move_tcp_to( + env: PickClutterRevealEnv, + target_xyz: Sequence[float], + *, + open_gripper: bool, + max_steps: int = 120, + tolerance: float = 0.008, +) -> dict[str, Any]: + last_obs: dict[str, Any] | None = None + target = np.asarray(target_xyz, dtype=np.float32) + info = env.get_info() + for _ in range(int(max_steps)): + tcp = _vec3(env.unwrapped.agent.tcp.pose.p) + delta = target - tcp + if float(np.linalg.norm(delta)) <= float(tolerance): + break + action = np.zeros((1, 4), dtype=np.float32) + action[0, :3] = np.clip(delta / 0.04, -1.0, 1.0) + action[0, 3] = _gripper_action(open_gripper) + obs, _, terminated, truncated, info = env.step(action) + last_obs = obs + if bool(np.asarray(terminated).reshape(-1)[0]) or bool(np.asarray(truncated).reshape(-1)[0]): + break + return { + "obs": last_obs if last_obs is not None else env.get_obs(info), + "info": info, + } + + +def _find_path_blocker(env: PickClutterRevealEnv) -> np.ndarray | None: + target = _target_position(env) + target_name = _target_actor(env).name + blockers: list[tuple[float, np.ndarray]] = [] + for actor in _all_scene_actors(env): + if actor.name == target_name: + continue + position = _vec3(actor.pose.p) + if position[1] <= target[1] + 0.06 and abs(position[0] - target[0]) <= 0.10: + blockers.append((float(np.linalg.norm(position[:2] - target[:2])), position)) + if blockers: + blockers.sort(key=lambda item: item[0]) + return blockers[0][1] + nearest: tuple[float, np.ndarray] | None = None + for actor in _all_scene_actors(env): + if actor.name == target_name: + continue + position = _vec3(actor.pose.p) + distance = float(np.linalg.norm(position[:2] - target[:2])) + if nearest is None or distance < nearest[0]: + nearest = (distance, position) + return None if nearest is None else nearest[1] + + +def _execute_push( + env: PickClutterRevealEnv, + *, + anchor_xyz: np.ndarray, + pre_offset: np.ndarray, + push_delta: np.ndarray, + push_steps: int, +) -> dict[str, Any]: + _move_tcp_to(env, anchor_xyz + np.array([0.0, 0.0, 0.08], dtype=np.float32) + pre_offset, open_gripper=True) + _move_tcp_to(env, anchor_xyz + pre_offset, open_gripper=True, max_steps=100, tolerance=0.010) + result = _repeat_delta(env, push_delta, open_gripper=True, steps=push_steps) + _move_tcp_to( + env, + np.array([_vec3(env.unwrapped.agent.tcp.pose.p)[0], _vec3(env.unwrapped.agent.tcp.pose.p)[1], 0.10], dtype=np.float32), + open_gripper=True, + max_steps=80, + tolerance=0.012, + ) + return result + + +def _execute_mode(env: PickClutterRevealEnv, mode_name: str) -> dict[str, Any]: + target = _target_position(env) + blocker = _find_path_blocker(env) + if mode_name == "retrieve": + return _execute_push( + env, + anchor_xyz=target, + pre_offset=np.array([0.0, 0.035, 0.026], dtype=np.float32), + push_delta=np.array([0.0, -0.7, 0.0], dtype=np.float32), + push_steps=18, + ) + if mode_name == "insert_actor": + return _execute_push( + env, + anchor_xyz=target, + pre_offset=np.array([0.0, 0.045, 0.028], dtype=np.float32), + push_delta=np.array([0.0, -0.4, 0.0], dtype=np.float32), + push_steps=10, + ) + if mode_name == "widen_gap": + anchor = blocker if blocker is not None else target + direction = -1.0 if anchor[0] >= target[0] else 1.0 + return _execute_push( + env, + anchor_xyz=anchor, + pre_offset=np.array([0.0, 0.025, 0.028], dtype=np.float32), + push_delta=np.array([0.75 * direction, -0.12, 0.0], dtype=np.float32), + push_steps=18, + ) + if mode_name == "sweep_left": + anchor = blocker if blocker is not None else target + return _execute_push( + env, + anchor_xyz=anchor, + pre_offset=np.array([0.015, 0.025, 0.028], dtype=np.float32), + push_delta=np.array([-0.70, -0.10, 0.0], dtype=np.float32), + push_steps=14, + ) + if mode_name == "sweep_right": + anchor = blocker if blocker is not None else target + return _execute_push( + env, + anchor_xyz=anchor, + pre_offset=np.array([-0.015, 0.025, 0.028], dtype=np.float32), + push_delta=np.array([0.70, -0.10, 0.0], dtype=np.float32), + push_steps=14, + ) + if mode_name == "pin_canopy": + anchor = blocker if blocker is not None else target + return _execute_push( + env, + anchor_xyz=anchor, + pre_offset=np.array([0.0, -0.015, 0.028], dtype=np.float32), + push_delta=np.array([0.0, 0.35, 0.0], dtype=np.float32), + push_steps=10, + ) + if mode_name in {"maintain_gap", "base_action"}: + _move_tcp_to( + env, + np.array([target[0], target[1] + 0.02, 0.10], dtype=np.float32), + open_gripper=True, + max_steps=60, + tolerance=0.015, + ) + return _repeat_delta(env, np.array([0.0, -0.10, 0.0], dtype=np.float32), open_gripper=True, steps=4) + raise KeyError(f"Unsupported mode: {mode_name}") + + +def _candidate_metrics( + env: PickClutterRevealEnv, + *, + start_positions: dict[str, np.ndarray], + current_obs_bundle: dict[str, np.ndarray] | None = None, +) -> dict[str, float]: + positions = _all_positions(env) + target_name = _target_actor(env).name + non_target_displacements = [] + for name, start_position in start_positions.items(): + if name == target_name or name not in positions: + continue + non_target_displacements.append(float(np.linalg.norm((positions[name] - start_position)[:2]))) + disturbance = float(np.clip(np.mean(non_target_displacements) / 0.10, 0.0, 1.0)) if non_target_displacements else 0.0 + visibility = 0.0 + if current_obs_bundle is not None: + visibility = _target_visibility(current_obs_bundle, getattr(_target_actor(env), "per_scene_id", -1)) + return { + "retrieval_success": float(_success_from_state(env)), + "disturbance": disturbance, + "visibility": visibility, + "clearance": _clearance_score(env), + "progress": _extraction_progress(env), + } + + +def _mean_non_target_displacement( + start_positions: dict[str, np.ndarray], + current_positions: dict[str, np.ndarray], + *, + target_name: str, +) -> float: + displacements = [] + for actor_name, start_position in start_positions.items(): + if actor_name == target_name or actor_name not in current_positions: + continue + displacements.append(float(np.linalg.norm((current_positions[actor_name] - start_position)[:2]))) + if not displacements: + return 0.0 + return float(np.mean(displacements)) + + +def _current_state_targets( + env: PickClutterRevealEnv, + *, + obs_bundle: dict[str, np.ndarray], + candidate_metrics: Sequence[dict[str, float]], + episode_start_positions: dict[str, np.ndarray], + selected_mode: str, +) -> dict[str, Any]: + metrics_by_name = {mode_name: payload for mode_name, payload in zip(MODE_ORDER, candidate_metrics)} + current_positions = _all_positions(env) + target_name = _target_actor(env).name + current_disturbance = float( + np.clip( + _mean_non_target_displacement( + episode_start_positions, + current_positions, + target_name=target_name, + ) + / 0.10, + 0.0, + 1.0, + ) + ) + current_visibility = _target_visibility(obs_bundle, getattr(_target_actor(env), "per_scene_id", -1)) + current_clearance = _clearance_score(env) + current_progress = _extraction_progress(env) + base_gap = float(np.clip(max(current_clearance, current_progress), 0.0, 1.0)) + support_stability = float(np.clip(1.0 - 0.5 * current_disturbance, 0.0, 1.0)) + hold_quality = float(np.clip(0.5 * (support_stability + max(current_clearance, current_progress)), 0.0, 1.0)) + opening_quality = float( + np.clip(0.55 * current_progress + 0.25 * current_clearance + 0.20 * current_visibility, 0.0, 1.0) + ) + actor_feasibility = float(np.clip(0.6 * current_clearance + 0.4 * max(current_visibility, current_progress), 0.0, 1.0)) + reocclusion_rate = float(np.clip(1.0 - max(current_clearance, current_visibility), 0.0, 1.0)) + insertable_actor_corridor = float(np.clip(0.6 * actor_feasibility + 0.4 * base_gap, 0.0, 1.0)) + insertion_corridor = float(np.clip(0.5 * actor_feasibility + 0.5 * base_gap, 0.0, 1.0)) + layer_separation = float(np.clip(0.7 * base_gap + 0.3 * actor_feasibility, 0.0, 1.0)) + fold_preservation = float(np.clip(1.0 - current_disturbance, 0.0, 1.0)) + lift_too_much_risk = float(np.clip(current_disturbance + 0.5 * max(base_gap - 0.5, 0.0), 0.0, 1.0)) + task_metrics = { + "opening_quality": opening_quality, + "actor_feasibility_score": actor_feasibility, + "gap_width": float(0.03 + 0.21 * base_gap), + "damage_proxy": current_disturbance, + "release_collapse_rate": reocclusion_rate, + "target_visibility_confidence": current_visibility, + "insertable_actor_corridor": insertable_actor_corridor, + "insertion_corridor": insertion_corridor, + "hold_quality": hold_quality, + "layer_separation_quality": layer_separation, + "fold_preservation": fold_preservation, + "top_layer_stability": support_stability, + "lift_too_much_risk": lift_too_much_risk, + } + + base_metrics = metrics_by_name["base_action"] + insert_metrics = metrics_by_name["insert_actor"] + retrieve_metrics = metrics_by_name["retrieve"] + reveal_candidates = [metrics_by_name[mode_name] for mode_name in REVEAL_MODES] + reveal_access = max(candidate["candidate_actor_feasibility_auc"] for candidate in reveal_candidates) + reveal_reveal = max(candidate["candidate_reveal_achieved"] for candidate in reveal_candidates) + reveal_hold = max(candidate["candidate_hold_persistence"] for candidate in reveal_candidates) + reveal_visibility = max(candidate["candidate_visibility_integral"] for candidate in reveal_candidates) + + reveal_corridor = float( + np.clip( + 0.45 * opening_quality + + 0.30 * reveal_access + + 0.15 * reveal_reveal + + 0.10 * reveal_visibility + - 0.10 * current_disturbance, + 0.0, + 1.0, + ) + ) + transfer_corridor = float( + np.clip( + 0.45 * insertable_actor_corridor + + 0.30 * insert_metrics["candidate_actor_feasibility_auc"] + + 0.15 * insert_metrics["candidate_reveal_achieved"] + + 0.10 * insert_metrics["candidate_visibility_integral"] + - 0.15 * current_disturbance, + 0.0, + 1.0, + ) + ) + passive_corridor = float( + np.clip( + 0.55 * retrieve_metrics["candidate_retrieval_success"] + + 0.20 * retrieve_metrics["candidate_actor_feasibility_auc"] + + 0.15 * current_progress + + 0.10 * current_clearance + - 0.10 * current_disturbance, + 0.0, + 1.0, + ) + ) + corridor_feasible = np.stack( + [ + np.full((NUM_APPROACH_TEMPLATES,), reveal_corridor, dtype=np.float32), + np.full((NUM_APPROACH_TEMPLATES,), transfer_corridor, dtype=np.float32), + np.full((NUM_APPROACH_TEMPLATES,), passive_corridor, dtype=np.float32), + ], + axis=0, + ) + persistence_horizon = np.asarray( + [ + ROLL_OUT_HORIZON + * float(np.clip(0.35 * hold_quality + 0.35 * reveal_hold + 0.30 * reveal_corridor, 0.0, 1.0)), + ROLL_OUT_HORIZON + * float( + np.clip( + 0.30 * hold_quality + + 0.35 * insert_metrics["candidate_hold_persistence"] + + 0.35 * transfer_corridor, + 0.0, + 1.0, + ) + ), + ROLL_OUT_HORIZON + * float( + np.clip( + 0.25 * hold_quality + + 0.35 * retrieve_metrics["candidate_hold_persistence"] + + 0.40 * passive_corridor, + 0.0, + 1.0, + ) + ), + ], + dtype=np.float32, + ) + + retrieve_margin = float(retrieve_metrics["candidate_utility"] - base_metrics["candidate_utility"]) + insert_margin = float(insert_metrics["candidate_utility"] - base_metrics["candidate_utility"]) + if selected_mode == "retrieve" or (retrieve_metrics["candidate_retrieval_success"] >= 0.5 and retrieve_margin >= 0.15): + support_mode = SUPPORT_MODE_PASSIVE + elif selected_mode == "insert_actor" or (insert_margin >= 0.15 and transfer_corridor >= 0.40): + support_mode = SUPPORT_MODE_TRANSFER + elif selected_mode in REVEAL_MODES or selected_mode == "maintain_gap": + support_mode = SUPPORT_MODE_HOLD + elif selected_mode == "base_action": + support_mode = SUPPORT_MODE_PASSIVE if passive_corridor >= 0.65 and retrieve_margin >= 0.05 else SUPPORT_MODE_HOLD + else: + support_mode = SUPPORT_MODE_HOLD + + best_non_base_utility = max(float(payload["candidate_utility"]) for payload in candidate_metrics[1:]) + intervention_warranted = selected_mode != "base_action" and best_non_base_utility >= float(base_metrics["candidate_utility"]) + 0.15 + + return { + "support_mode": int(support_mode), + "corridor_feasible": corridor_feasible, + "persistence_horizon": persistence_horizon, + "disturbance_cost": np.float32(current_disturbance), + "state_confidence_target": np.float32(1.0 if intervention_warranted else 0.0), + "task_metric_mask": STATE_METRIC_MASK.copy(), + **{metric_name: np.float32(metric_value) for metric_name, metric_value in task_metrics.items()}, + } + + +def _mode_support_mode(mode_name: str, current_support_mode: int) -> int: + if mode_name in REVEAL_MODES or mode_name == "maintain_gap": + return SUPPORT_MODE_HOLD + if mode_name in TRANSFER_MODES: + return SUPPORT_MODE_TRANSFER + if mode_name in RETRIEVE_MODES: + return SUPPORT_MODE_PASSIVE + return int(current_support_mode) + + +def _mode_progress_schedule(mode_name: str) -> np.ndarray: + if mode_name in REVEAL_MODES: + return np.asarray([0.18, 0.38, 0.62, 0.84, 1.0], dtype=np.float32) + if mode_name in TRANSFER_MODES: + return np.asarray([0.22, 0.44, 0.66, 0.86, 1.0], dtype=np.float32) + if mode_name in RETRIEVE_MODES: + return np.asarray([0.34, 0.56, 0.76, 0.92, 1.0], dtype=np.float32) + return np.asarray([0.10, 0.22, 0.34, 0.44, 0.54], dtype=np.float32) + + +def _scalar_rollout(start: float, end: float, schedule: np.ndarray) -> np.ndarray: + return np.clip((1.0 - schedule) * float(start) + schedule * float(end), 0.0, 1.0).astype(np.float32) + + +def _candidate_rollout_targets( + *, + mode_name: str, + state_targets: dict[str, Any], + candidate_payload: dict[str, float], +) -> dict[str, np.ndarray]: + schedule = _mode_progress_schedule(mode_name) + start_visibility = float(state_targets["target_visibility_confidence"]) + start_access = float(state_targets["actor_feasibility_score"]) + start_persistence = float(np.clip(state_targets["hold_quality"], 0.0, 1.0)) + start_support = float(np.clip(state_targets["top_layer_stability"], 0.0, 1.0)) + start_reocclusion = float(np.clip(state_targets["release_collapse_rate"], 0.0, 1.0)) + start_disturbance = float(np.clip(state_targets["disturbance_cost"], 0.0, 1.0)) + start_clearance = float(np.clip(state_targets["actor_feasibility_score"], 0.0, 1.0)) + start_grasp = float(np.clip(max(start_visibility, start_access), 0.0, 1.0)) + + end_visibility = float(np.clip(candidate_payload["candidate_immediate_visibility"], 0.0, 1.0)) + end_access = float(np.clip(candidate_payload["candidate_immediate_access"], 0.0, 1.0)) + end_progress = float(np.clip(candidate_payload["candidate_immediate_progress"], 0.0, 1.0)) + end_disturbance = float(np.clip(candidate_payload["candidate_immediate_disturbance"], 0.0, 1.0)) + end_support = float(np.clip(candidate_payload["candidate_immediate_support_stability"], 0.0, 1.0)) + end_persistence = float(np.clip(candidate_payload["candidate_immediate_hold_persistence"], 0.0, 1.0)) + end_reocclusion = float(np.clip(candidate_payload["candidate_immediate_reocclusion"], 0.0, 1.0)) + end_clearance = float(np.clip(max(end_access, end_progress), 0.0, 1.0)) + end_grasp = float(np.clip(max(end_visibility, 0.5 * end_access + 0.5 * end_progress), 0.0, 1.0)) + + if mode_name in TRANSFER_MODES: + start_visibility = max(start_visibility, 0.35 * end_visibility) + start_access = max(start_access, 0.40 * end_access) + start_persistence = max(start_persistence, 0.45 * end_persistence) + start_support = max(start_support, 0.50 * end_support) + elif mode_name in RETRIEVE_MODES: + start_visibility = max(start_visibility, 0.55 * end_visibility) + start_access = max(start_access, 0.70 * end_access) + start_persistence = max(start_persistence, 0.65 * end_persistence) + start_support = max(start_support, 0.65 * end_support) + start_reocclusion = min(start_reocclusion, max(0.4 * end_reocclusion, 0.0)) + + visibility = _scalar_rollout(start_visibility, end_visibility, schedule) + access = _scalar_rollout(start_access, end_access, schedule) + persistence = _scalar_rollout(start_persistence, end_persistence, schedule) + support = _scalar_rollout(start_support, end_support, schedule) + reocclusion = _scalar_rollout(start_reocclusion, end_reocclusion, schedule) + disturbance = _scalar_rollout(start_disturbance, end_disturbance, schedule) + clearance = _scalar_rollout(start_clearance, end_clearance, schedule) + grasp = _scalar_rollout(start_grasp, end_grasp, schedule) + + reveal_corridor = np.clip( + 0.38 * visibility + 0.34 * access + 0.22 * support - 0.12 * disturbance, + 0.0, + 1.0, + ) + transfer_corridor = np.clip( + 0.30 * visibility + 0.38 * access + 0.18 * persistence + 0.14 * support - 0.12 * disturbance, + 0.0, + 1.0, + ) + passive_corridor = np.clip( + 0.22 * visibility + 0.42 * access + 0.20 * persistence + 0.16 * grasp - 0.14 * disturbance - 0.10 * reocclusion, + 0.0, + 1.0, + ) + if mode_name in REVEAL_MODES: + reveal_corridor = np.clip(reveal_corridor + 0.14, 0.0, 1.0) + passive_corridor = np.clip(0.75 * passive_corridor, 0.0, 1.0) + elif mode_name in TRANSFER_MODES: + transfer_corridor = np.clip(transfer_corridor + 0.16, 0.0, 1.0) + elif mode_name in RETRIEVE_MODES: + passive_corridor = np.clip(passive_corridor + 0.20, 0.0, 1.0) + reveal_corridor = np.clip(0.60 * reveal_corridor, 0.0, 1.0) + else: + reveal_corridor = np.clip(0.85 * reveal_corridor, 0.0, 1.0) + transfer_corridor = np.clip(0.75 * transfer_corridor, 0.0, 1.0) + passive_corridor = np.clip(0.80 * passive_corridor, 0.0, 1.0) + + corridor_feasible = np.stack( + [ + np.repeat(reveal_corridor[:, None], NUM_APPROACH_TEMPLATES, axis=1), + np.repeat(transfer_corridor[:, None], NUM_APPROACH_TEMPLATES, axis=1), + np.repeat(passive_corridor[:, None], NUM_APPROACH_TEMPLATES, axis=1), + ], + axis=1, + ).astype(np.float32) + persistence_horizon = np.stack( + [ + np.clip(ROLL_OUT_HORIZON * (0.55 * reveal_corridor + 0.45 * support), 0.0, float(ROLL_OUT_HORIZON)), + np.clip(ROLL_OUT_HORIZON * (0.50 * transfer_corridor + 0.50 * persistence), 0.0, float(ROLL_OUT_HORIZON)), + np.clip(ROLL_OUT_HORIZON * (0.55 * passive_corridor + 0.45 * persistence), 0.0, float(ROLL_OUT_HORIZON)), + ], + axis=1, + ).astype(np.float32) + support_mode = np.full( + (ROLL_OUT_HORIZON,), + _mode_support_mode(mode_name, int(state_targets["support_mode"])), + dtype=np.int64, + ) + if mode_name == "base_action": + support_mode[:] = int(state_targets["support_mode"]) + + return { + "candidate_rollout_support_mode": support_mode, + "candidate_rollout_corridor_feasible": corridor_feasible, + "candidate_rollout_persistence_horizon": persistence_horizon, + "candidate_rollout_disturbance_cost": disturbance.astype(np.float32), + "candidate_rollout_belief_map": visibility[:, None, None].astype(np.float32), + "candidate_rollout_visibility_map": visibility[:, None, None].astype(np.float32), + "candidate_rollout_clearance_map": np.repeat(clearance[:, None, None, None], 2, axis=1).astype(np.float32), + "candidate_rollout_support_stability": support[:, None, None, None].astype(np.float32), + "candidate_rollout_reocclusion_target": reocclusion[:, None, None].astype(np.float32), + "candidate_rollout_occluder_contact_map": np.clip(access * support, 0.0, 1.0)[:, None, None].astype(np.float32), + "candidate_rollout_grasp_affordance_map": grasp[:, None, None].astype(np.float32), + } + + +def _evaluate_candidate( + sim_env: PickClutterRevealEnv, + obs_env: PickClutterRevealEnv, + snapshot: dict[str, Any], + mode_name: str, +) -> dict[str, float]: + _restore_env(sim_env, snapshot) + start_positions = _all_positions(sim_env) + _execute_mode(sim_env, mode_name) + _sync_env_state(sim_env, obs_env) + after_bundle = _extract_sensor_bundle(obs_env.get_obs(obs_env.get_info())) + immediate = _candidate_metrics(sim_env, start_positions=start_positions, current_obs_bundle=after_bundle) + if not immediate["retrieval_success"] and mode_name != "retrieve": + _execute_mode(sim_env, "retrieve") + _sync_env_state(sim_env, obs_env) + follow_bundle = _extract_sensor_bundle(obs_env.get_obs(obs_env.get_info())) + final_metrics = _candidate_metrics(sim_env, start_positions=start_positions, current_obs_bundle=follow_bundle) + else: + final_metrics = immediate + _restore_env(obs_env, snapshot) + utility = ( + 2.5 * final_metrics["retrieval_success"] + + 1.0 * final_metrics["progress"] + + 0.5 * final_metrics["clearance"] + + 0.25 * final_metrics["visibility"] + - 0.5 * final_metrics["disturbance"] + ) + return { + "candidate_retrieval_success": final_metrics["retrieval_success"], + "candidate_risk": float(np.clip(final_metrics["disturbance"], 0.0, 1.0)), + "candidate_utility": float(utility), + "candidate_final_disturbance_cost": final_metrics["disturbance"], + "candidate_reocclusion_rate": float(np.clip(1.0 - final_metrics["clearance"], 0.0, 1.0)), + "candidate_visibility_integral": final_metrics["visibility"], + "candidate_actor_feasibility_auc": final_metrics["clearance"], + "candidate_reveal_achieved": float(final_metrics["progress"] > 0.15 or final_metrics["clearance"] > 0.35), + "candidate_hold_persistence": float(1.0 - final_metrics["disturbance"]), + "candidate_support_stability_auc": float(1.0 - 0.5 * final_metrics["disturbance"]), + "candidate_disturbance_auc": final_metrics["disturbance"], + "candidate_immediate_retrieval_success": immediate["retrieval_success"], + "candidate_immediate_visibility": immediate["visibility"], + "candidate_immediate_access": immediate["clearance"], + "candidate_immediate_progress": immediate["progress"], + "candidate_immediate_reocclusion": float(np.clip(1.0 - immediate["clearance"], 0.0, 1.0)), + "candidate_immediate_hold_persistence": float(1.0 - immediate["disturbance"]), + "candidate_immediate_support_stability": float(1.0 - 0.5 * immediate["disturbance"]), + "candidate_immediate_disturbance": immediate["disturbance"], + } + + +def _build_episode_splits(spec: SmokeSpec) -> dict[str, list[int]]: + return { + "train": [spec.dataset_seed * 10_000 + index for index in range(spec.train_episodes)], + "val": [spec.dataset_seed * 10_000 + 1_000 + index for index in range(spec.val_episodes)], + "eval": [spec.dataset_seed * 10_000 + 2_000 + index for index in range(spec.eval_episodes)], + } + + +def _save_episode_splits(output_path: Path, payload: dict[str, list[int]]) -> None: + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + + +def _init_history_entry(obs_bundle: dict[str, np.ndarray], proprio: np.ndarray, action_chunk: np.ndarray) -> dict[str, Any]: + return { + "images": obs_bundle["images"].copy(), + "depths": obs_bundle["depths"].copy(), + "depth_valid": obs_bundle["depth_valid"].copy(), + "camera_intrinsics": obs_bundle["camera_intrinsics"].copy(), + "camera_extrinsics": obs_bundle["camera_extrinsics"].copy(), + "proprio": proprio.copy(), + "action": action_chunk.mean(axis=0).astype(np.float32, copy=False), + } + + +def _history_stack( + history: Sequence[dict[str, Any]], + key: str, + *, + pad_shape: tuple[int, ...], + dtype: np.dtype, + history_steps: int, +) -> np.ndarray: + history = list(history)[-history_steps:] + pad_count = history_steps - len(history) + chunks = [np.zeros(pad_shape, dtype=dtype) for _ in range(pad_count)] + chunks.extend(np.asarray(item[key], dtype=dtype) for item in history) + return np.stack(chunks, axis=0).astype(dtype, copy=False) + + +class ManiSkillPickClutterDataset(Dataset[dict[str, Any]]): + def __init__(self, samples: Sequence[dict[str, Any]]) -> None: + self.samples = list(samples) + + def __len__(self) -> int: + return len(self.samples) + + def __getitem__(self, index: int) -> dict[str, Any]: + sample = self.samples[index] + item = { + "images": torch.from_numpy(sample["images"]).permute(0, 3, 1, 2).float() / 255.0, + "depths": torch.from_numpy(sample["depths"]).float(), + "depth_valid": torch.from_numpy(sample["depth_valid"]).float(), + "camera_intrinsics": torch.from_numpy(sample["camera_intrinsics"]).float(), + "camera_extrinsics": torch.from_numpy(sample["camera_extrinsics"]).float(), + "history_images": torch.from_numpy(sample["history_images"]).permute(0, 1, 4, 2, 3).float() / 255.0, + "history_depths": torch.from_numpy(sample["history_depths"]).float(), + "history_depth_valid": torch.from_numpy(sample["history_depth_valid"]).float(), + "history_camera_intrinsics": torch.from_numpy(sample["history_camera_intrinsics"]).float(), + "history_camera_extrinsics": torch.from_numpy(sample["history_camera_extrinsics"]).float(), + "history_proprio": torch.from_numpy(sample["history_proprio"]).float(), + "history_actions": torch.from_numpy(sample["history_actions"]).float(), + "proprio": torch.from_numpy(sample["proprio"]).float(), + "texts": sample["language_goal"], + "task_name": sample["task_name"], + "task_id": torch.as_tensor(sample["task_id"], dtype=torch.long), + "action_chunk": torch.from_numpy(sample["action_chunk"]).float(), + "candidate_action_chunks": torch.from_numpy(sample["candidate_action_chunks"]).float(), + "candidate_retrieval_success": torch.from_numpy(sample["candidate_retrieval_success"]).float(), + "candidate_final_disturbance_cost": torch.from_numpy(sample["candidate_final_disturbance_cost"]).float(), + "candidate_reocclusion_rate": torch.from_numpy(sample["candidate_reocclusion_rate"]).float(), + "candidate_visibility_integral": torch.from_numpy(sample["candidate_visibility_integral"]).float(), + "candidate_actor_feasibility_auc": torch.from_numpy(sample["candidate_actor_feasibility_auc"]).float(), + "candidate_reveal_achieved": torch.from_numpy(sample["candidate_reveal_achieved"]).float(), + "candidate_hold_persistence": torch.from_numpy(sample["candidate_hold_persistence"]).float(), + "candidate_support_stability_auc": torch.from_numpy(sample["candidate_support_stability_auc"]).float(), + "candidate_disturbance_auc": torch.from_numpy(sample["candidate_disturbance_auc"]).float(), + "candidate_risk": torch.from_numpy(sample["candidate_risk"]).float(), + "candidate_utility": torch.from_numpy(sample["candidate_utility"]).float(), + "proposal_target_action_chunks": torch.from_numpy(sample["candidate_action_chunks"]).float(), + "proposal_target_retrieval_success": torch.from_numpy(sample["candidate_retrieval_success"]).float(), + "proposal_target_risk": torch.from_numpy(sample["candidate_risk"]).float(), + "proposal_target_utility": torch.from_numpy(sample["candidate_utility"]).float(), + "episode_seed": sample["episode_seed"], + "decision_step": sample["decision_step"], + "selected_mode": sample["selected_mode"], + } + if "support_mode" in sample: + item["support_mode"] = torch.as_tensor(sample["support_mode"], dtype=torch.long) + if "corridor_feasible" in sample: + item["corridor_feasible"] = torch.from_numpy(sample["corridor_feasible"]).float() + if "persistence_horizon" in sample: + item["persistence_horizon"] = torch.from_numpy(sample["persistence_horizon"]).float() + if "disturbance_cost" in sample: + item["disturbance_cost"] = torch.as_tensor(sample["disturbance_cost"], dtype=torch.float32) + if "state_confidence_target" in sample: + item["state_confidence_target"] = torch.as_tensor(sample["state_confidence_target"], dtype=torch.float32) + if "task_metric_mask" in sample: + item["task_metric_mask"] = torch.from_numpy(sample["task_metric_mask"]).to(dtype=torch.bool) + for metric_name in STATE_SUPERVISION_METRICS: + if metric_name in sample: + item[metric_name] = torch.as_tensor(sample[metric_name], dtype=torch.float32) + for key in ( + "candidate_rollout_support_mode", + "proposal_target_rollout_support_mode", + ): + if key in sample: + item[key] = torch.from_numpy(sample[key]).long() + for key in ( + "candidate_rollout_corridor_feasible", + "candidate_rollout_persistence_horizon", + "candidate_rollout_disturbance_cost", + "candidate_rollout_belief_map", + "candidate_rollout_visibility_map", + "candidate_rollout_clearance_map", + "candidate_rollout_support_stability", + "candidate_rollout_reocclusion_target", + "candidate_rollout_occluder_contact_map", + "candidate_rollout_grasp_affordance_map", + "proposal_target_rollout_corridor_feasible", + "proposal_target_rollout_persistence_horizon", + "proposal_target_rollout_disturbance_cost", + "proposal_target_rollout_belief_map", + "proposal_target_rollout_visibility_map", + "proposal_target_rollout_clearance_map", + "proposal_target_rollout_support_stability", + "proposal_target_rollout_reocclusion_target", + "proposal_target_rollout_occluder_contact_map", + "proposal_target_rollout_grasp_affordance_map", + ): + if key in sample: + item[key] = torch.from_numpy(sample[key]).float() + return item + + +def _make_loader(samples: Sequence[dict[str, Any]], *, batch_size: int, num_workers: int, shuffle: bool) -> DataLoader: + return DataLoader( + ManiSkillPickClutterDataset(samples), + batch_size=batch_size, + shuffle=shuffle, + num_workers=num_workers, + pin_memory=torch.cuda.is_available(), + ) + + +def _load_init_bundle() -> tuple[PolicyConfig, dict[str, Any], dict[str, Any]]: + checkpoint = torch.load(DEFAULT_INIT_CHECKPOINT, map_location="cpu", weights_only=False) + policy_config = PolicyConfig( + backbone=FrozenVLBackboneConfig(**checkpoint["policy_config"]["backbone"]), + fusion=MultiViewFusionConfig(**checkpoint["policy_config"]["fusion"]), + memory=ObservationMemoryConfig(**checkpoint["policy_config"]["memory"]), + decoder=ChunkDecoderConfig(**checkpoint["policy_config"]["decoder"]), + reveal_head=RevealHeadConfig(**checkpoint["policy_config"]["reveal_head"]), + world_model=RevealWMConfig(**checkpoint["policy_config"]["world_model"]), + planner=PlannerConfig(**checkpoint["policy_config"]["planner"]), + ) + return _apply_smoke_planner_overrides(policy_config), checkpoint["trainer_config"], checkpoint["loss_weights"] + + +def _trainer_config_for_variant(variant: str) -> TrainerConfig: + if variant == "trunk_only_ft": + return TrainerConfig( + policy_type="foundation_trunk", + use_bf16=True, + grad_clip_norm=1.0, + freeze_backbone=True, + gradient_checkpointing=False, + trainable_parameter_prefixes=("fusion", "memory", "decoder"), + ) + if variant == "adapter_active_ft": + return TrainerConfig( + policy_type="adapter_wrapped", + use_bf16=True, + grad_clip_norm=1.0, + freeze_backbone=True, + gradient_checkpointing=False, + eval_mode="adapter_active", + trainable_parameter_prefixes=( + "trunk.fusion", + "trunk.memory", + "trunk.decoder", + "adapter.state_head", + "adapter.transition_model", + "adapter.proposal_prior", + "adapter.planner", + ), + adapter_mode="adapter_active", + adapter_use_transition_model=True, + adapter_use_task_conditioning=True, + adapter_action_supervision_source="trunk", + ) + raise KeyError(f"Unsupported variant: {variant}") + + +def _loss_weights_for_smoke() -> LossWeights: + return LossWeights( + action=1.0, + support_mode=0.15, + corridor=0.15, + persistence=0.08, + disturbance=0.08, + planner_success=0.20, + planner_risk=0.08, + planner_ranking=0.20, + proposal_reconstruction=0.10, + proposal_success=0.12, + proposal_ranking=0.15, + proposal_mode=0.10, + proposal_diversity=0.02, + task_metrics=0.15, + transition=0.25, + gate=0.25, + calibration=0.10, + proposal_mode_task_filter=[TASK_NAME], + ) + + +def _save_training_checkpoint( + *, + output_dir: Path, + experiment_name: str, + model: torch.nn.Module, + policy_config: PolicyConfig, + trainer_config: TrainerConfig, + loss_weights: LossWeights, + history: list[dict[str, Any]], + best_val: float, + train_spec: dict[str, Any], +) -> Path: + output_dir.mkdir(parents=True, exist_ok=True) + checkpoint_path = output_dir / "checkpoint_best.pt" + torch.save( + { + "experiment_name": experiment_name, + "policy_config": asdict(policy_config), + "trainer_config": asdict(trainer_config), + "loss_weights": asdict(loss_weights), + "state_dict": model.state_dict(), + "history": history, + "best_val_total": best_val, + "train_spec": train_spec, + }, + checkpoint_path, + ) + return checkpoint_path + + +def _aggregate_epoch(loss_records: Sequence[dict[str, float]]) -> dict[str, float]: + keys = sorted({key for record in loss_records for key in record}) + return { + key: float(np.mean([record.get(key, 0.0) for record in loss_records])) if loss_records else 0.0 + for key in keys + } + + +def _train_variant( + *, + variant: str, + train_samples: Sequence[dict[str, Any]], + val_samples: Sequence[dict[str, Any]], + spec: SmokeSpec, + output_dir: Path, +) -> tuple[Path, dict[str, Any]]: + policy_config, _init_trainer_cfg, _init_loss_weights = _load_init_bundle() + trainer_config = _trainer_config_for_variant(variant) + loss_weights = _loss_weights_for_smoke() + model = build_policy(policy_config, trainer_config) + init_info = _load_init_checkpoint(model, str(DEFAULT_INIT_CHECKPOINT), False) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = model.to(device) + torch.manual_seed(spec.train_seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(spec.train_seed) + torch.backends.cuda.matmul.allow_tf32 = True + matched = apply_trainable_parameter_prefixes(model, trainer_config) + optimizer = torch.optim.AdamW( + [parameter for parameter in model.parameters() if parameter.requires_grad], + lr=spec.learning_rate, + weight_decay=spec.weight_decay, + ) + trainer = BimanualTrainer(model=model, optimizer=optimizer, config=trainer_config) + train_loader = _make_loader(train_samples, batch_size=spec.batch_size, num_workers=spec.num_workers, shuffle=True) + val_loader = _make_loader(val_samples, batch_size=spec.batch_size, num_workers=spec.num_workers, shuffle=False) + + best_val = math.inf + history: list[dict[str, Any]] = [] + train_spec = build_target_training_spec( + track_id="occlusion_track", + model_variant=variant, + seed=spec.train_seed, + train_demos=spec.train_episodes, + val_demos=spec.val_episodes, + init_checkpoint_group=str(DEFAULT_INIT_CHECKPOINT), + optimizer="adamw", + learning_rate=spec.learning_rate, + lr_schedule="constant", + batch_size=spec.batch_size, + augmentations="none", + early_stopping_metric="val_total", + max_gradient_steps=len(train_loader) * spec.epochs, + unfreeze_scope="fusion_memory_decoder", + dataset_split_id=( + f"pickclutter_{SMOKE_VERSION}_seed{spec.dataset_seed}" + if int(spec.dataset_seed) == DEFAULT_SEED + else f"pickclutter_{SMOKE_VERSION}_dataset_seed{spec.dataset_seed}" + ), + ) + + for epoch in range(spec.epochs): + model.train() + train_losses: list[dict[str, float]] = [] + for batch in train_loader: + moved = _move_batch_to_device(batch, device) + loss_dict = trainer.training_step(moved, loss_weights=loss_weights) + train_losses.append({key: float(value.detach().cpu()) for key, value in loss_dict.items()}) + + model.eval() + val_losses: list[dict[str, float]] = [] + with torch.no_grad(): + for batch in val_loader: + moved = _move_batch_to_device(batch, device) + forward_kwargs = { + "images": moved["images"], + "proprio": moved["proprio"], + "texts": moved["texts"], + "task_names": moved.get("task_name"), + "task_ids": moved.get("task_id"), + "history_images": moved.get("history_images"), + "history_proprio": moved.get("history_proprio"), + "history_actions": moved.get("history_actions"), + "depths": moved.get("depths"), + "depth_valid": moved.get("depth_valid"), + "camera_intrinsics": moved.get("camera_intrinsics"), + "camera_extrinsics": moved.get("camera_extrinsics"), + "history_depths": moved.get("history_depths"), + "history_depth_valid": moved.get("history_depth_valid"), + "history_camera_intrinsics": moved.get("history_camera_intrinsics"), + "history_camera_extrinsics": moved.get("history_camera_extrinsics"), + } + if variant == "adapter_active_ft": + forward_kwargs["adapter_mode"] = "adapter_active" + forward_kwargs["use_transition_model"] = True + forward_kwargs["use_task_conditioning"] = True + outputs = model(**forward_kwargs) + from train.losses import compute_total_loss + + losses = compute_total_loss(outputs, moved, weights=loss_weights) + val_losses.append({key: float(value.detach().cpu()) for key, value in losses.items()}) + + train_summary = _aggregate_epoch(train_losses) + val_summary = _aggregate_epoch(val_losses) + history.append({"epoch": epoch, "train": train_summary, "val": val_summary}) + print( + json.dumps( + { + "phase": "epoch_complete", + "variant": variant, + "epoch": epoch, + "train_total": train_summary.get("total", 0.0), + "val_total": val_summary.get("total", 0.0), + } + ), + flush=True, + ) + if val_summary.get("total", math.inf) <= best_val: + best_val = val_summary["total"] + checkpoint_path = _save_training_checkpoint( + output_dir=output_dir, + experiment_name=f"{variant}_seed{spec.train_seed}", + model=model, + policy_config=policy_config, + trainer_config=trainer_config, + loss_weights=loss_weights, + history=history, + best_val=best_val, + train_spec=train_spec, + ) + (output_dir / "summary.json").write_text( + json.dumps( + { + "variant": variant, + "checkpoint_path": str(checkpoint_path), + "init_info": init_info, + "trainable_parameter_names": matched, + "best_val_total": best_val, + "history": history, + "train_spec": train_spec, + }, + indent=2, + ) + + "\n", + encoding="utf-8", + ) + return output_dir / "checkpoint_best.pt", train_spec + + +def _load_checkpoint( + checkpoint_path: Path, + *, + adapter_mode: str | None = None, + planner_overrides: dict[str, float] | None = None, +) -> tuple[torch.nn.Module, dict[str, Any]]: + checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=False) + policy_config = PolicyConfig( + backbone=FrozenVLBackboneConfig(**checkpoint["policy_config"]["backbone"]), + fusion=MultiViewFusionConfig(**checkpoint["policy_config"]["fusion"]), + memory=ObservationMemoryConfig(**checkpoint["policy_config"]["memory"]), + decoder=ChunkDecoderConfig(**checkpoint["policy_config"]["decoder"]), + reveal_head=RevealHeadConfig(**checkpoint["policy_config"]["reveal_head"]), + world_model=RevealWMConfig(**checkpoint["policy_config"]["world_model"]), + planner=PlannerConfig(**checkpoint["policy_config"]["planner"]), + ) + policy_config = _apply_smoke_planner_overrides(policy_config, planner_overrides=planner_overrides) + trainer_config = TrainerConfig(**checkpoint["trainer_config"]) + if adapter_mode is not None and trainer_config.policy_type == "adapter_wrapped": + trainer_config.adapter_mode = adapter_mode + trainer_config.eval_mode = adapter_mode + model = build_policy(policy_config, trainer_config) + filtered_state_dict, skipped, _remapped = filter_compatible_state_dict(model.state_dict(), checkpoint["state_dict"]) + incompatible = model.load_state_dict(filtered_state_dict, strict=False) + if incompatible.unexpected_keys: + raise RuntimeError(f"Unexpected checkpoint keys for {checkpoint_path}: {list(incompatible.unexpected_keys)}") + if skipped: + checkpoint["_shape_skipped_keys"] = skipped + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = model.to(device) + model.eval() + return model, checkpoint + + +def _collect_split( + *, + split_name: str, + seeds: Sequence[int], + spec: SmokeSpec, + output_path: Path, +) -> dict[str, Any]: + obs_env = PickClutterRevealEnv( + obs_mode="rgb+depth+segmentation", + control_mode="pd_ee_delta_pos", + render_mode="rgb_array", + ) + sim_env = PickClutterRevealEnv( + obs_mode="none", + control_mode="pd_ee_delta_pos", + render_mode="rgb_array", + ) + samples: list[dict[str, Any]] = [] + episode_records: list[dict[str, Any]] = [] + try: + for episode_seed in seeds: + obs, _ = obs_env.reset(seed=int(episode_seed)) + sim_env.reset(seed=int(episode_seed)) + _sync_env_state(obs_env, sim_env) + episode_start_positions = _all_positions(obs_env) + history: deque[dict[str, Any]] = deque(maxlen=spec.history_steps) + episode_success = False + for decision_step in range(spec.max_macro_steps): + obs_bundle = _extract_sensor_bundle(obs) + proprio = _build_proprio(obs_env) + snapshot = _snapshot_env(obs_env) + candidate_metrics = [_evaluate_candidate(sim_env, obs_env, snapshot, mode_name) for mode_name in MODE_ORDER] + candidate_chunks = np.stack([CANONICAL_CHUNKS[mode_name] for mode_name in MODE_ORDER], axis=0).astype(np.float32) + utilities = np.asarray([payload["candidate_utility"] for payload in candidate_metrics], dtype=np.float32) + best_index = int(utilities.argmax()) + selected_mode = MODE_ORDER[best_index] + state_targets = _current_state_targets( + obs_env, + obs_bundle=obs_bundle, + candidate_metrics=candidate_metrics, + episode_start_positions=episode_start_positions, + selected_mode=selected_mode, + ) + rollout_targets_by_mode = [ + _candidate_rollout_targets( + mode_name=mode_name, + state_targets=state_targets, + candidate_payload=payload, + ) + for mode_name, payload in zip(MODE_ORDER, candidate_metrics) + ] + sample = { + "images": obs_bundle["images"].copy(), + "depths": obs_bundle["depths"].copy(), + "depth_valid": obs_bundle["depth_valid"].copy(), + "camera_intrinsics": obs_bundle["camera_intrinsics"].copy(), + "camera_extrinsics": obs_bundle["camera_extrinsics"].copy(), + "history_images": _history_stack(history, "images", pad_shape=obs_bundle["images"].shape, dtype=np.uint8, history_steps=spec.history_steps), + "history_depths": _history_stack(history, "depths", pad_shape=obs_bundle["depths"].shape, dtype=np.float32, history_steps=spec.history_steps), + "history_depth_valid": _history_stack(history, "depth_valid", pad_shape=obs_bundle["depth_valid"].shape, dtype=np.float32, history_steps=spec.history_steps), + "history_camera_intrinsics": _history_stack(history, "camera_intrinsics", pad_shape=obs_bundle["camera_intrinsics"].shape, dtype=np.float32, history_steps=spec.history_steps), + "history_camera_extrinsics": _history_stack(history, "camera_extrinsics", pad_shape=obs_bundle["camera_extrinsics"].shape, dtype=np.float32, history_steps=spec.history_steps), + "history_proprio": _history_stack(history, "proprio", pad_shape=(PROPRIO_DIM,), dtype=np.float32, history_steps=spec.history_steps), + "history_actions": _history_stack(history, "action", pad_shape=(14,), dtype=np.float32, history_steps=spec.history_steps), + "proprio": proprio.astype(np.float32), + "language_goal": TEXT_PROMPT, + "task_name": TASK_NAME, + "task_id": TASK_ID, + "action_chunk": CANONICAL_CHUNKS[selected_mode].copy(), + "candidate_action_chunks": candidate_chunks, + "candidate_retrieval_success": np.asarray([payload["candidate_retrieval_success"] for payload in candidate_metrics], dtype=np.float32), + "candidate_final_disturbance_cost": np.asarray([payload["candidate_final_disturbance_cost"] for payload in candidate_metrics], dtype=np.float32), + "candidate_reocclusion_rate": np.asarray([payload["candidate_reocclusion_rate"] for payload in candidate_metrics], dtype=np.float32), + "candidate_visibility_integral": np.asarray([payload["candidate_visibility_integral"] for payload in candidate_metrics], dtype=np.float32), + "candidate_actor_feasibility_auc": np.asarray([payload["candidate_actor_feasibility_auc"] for payload in candidate_metrics], dtype=np.float32), + "candidate_reveal_achieved": np.asarray([payload["candidate_reveal_achieved"] for payload in candidate_metrics], dtype=np.float32), + "candidate_hold_persistence": np.asarray([payload["candidate_hold_persistence"] for payload in candidate_metrics], dtype=np.float32), + "candidate_support_stability_auc": np.asarray([payload["candidate_support_stability_auc"] for payload in candidate_metrics], dtype=np.float32), + "candidate_disturbance_auc": np.asarray([payload["candidate_disturbance_auc"] for payload in candidate_metrics], dtype=np.float32), + "candidate_risk": np.asarray([payload["candidate_risk"] for payload in candidate_metrics], dtype=np.float32), + "candidate_utility": utilities, + "candidate_rollout_support_mode": np.stack( + [payload["candidate_rollout_support_mode"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.int64), + "candidate_rollout_corridor_feasible": np.stack( + [payload["candidate_rollout_corridor_feasible"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_persistence_horizon": np.stack( + [payload["candidate_rollout_persistence_horizon"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_disturbance_cost": np.stack( + [payload["candidate_rollout_disturbance_cost"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_belief_map": np.stack( + [payload["candidate_rollout_belief_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_visibility_map": np.stack( + [payload["candidate_rollout_visibility_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_clearance_map": np.stack( + [payload["candidate_rollout_clearance_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_support_stability": np.stack( + [payload["candidate_rollout_support_stability"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_reocclusion_target": np.stack( + [payload["candidate_rollout_reocclusion_target"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_occluder_contact_map": np.stack( + [payload["candidate_rollout_occluder_contact_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "candidate_rollout_grasp_affordance_map": np.stack( + [payload["candidate_rollout_grasp_affordance_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_support_mode": np.stack( + [payload["candidate_rollout_support_mode"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.int64), + "proposal_target_rollout_corridor_feasible": np.stack( + [payload["candidate_rollout_corridor_feasible"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_persistence_horizon": np.stack( + [payload["candidate_rollout_persistence_horizon"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_disturbance_cost": np.stack( + [payload["candidate_rollout_disturbance_cost"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_belief_map": np.stack( + [payload["candidate_rollout_belief_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_visibility_map": np.stack( + [payload["candidate_rollout_visibility_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_clearance_map": np.stack( + [payload["candidate_rollout_clearance_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_support_stability": np.stack( + [payload["candidate_rollout_support_stability"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_reocclusion_target": np.stack( + [payload["candidate_rollout_reocclusion_target"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_occluder_contact_map": np.stack( + [payload["candidate_rollout_occluder_contact_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "proposal_target_rollout_grasp_affordance_map": np.stack( + [payload["candidate_rollout_grasp_affordance_map"] for payload in rollout_targets_by_mode], + axis=0, + ).astype(np.float32), + "episode_seed": int(episode_seed), + "decision_step": int(decision_step), + "selected_mode": selected_mode, + **state_targets, + } + samples.append(sample) + _execute_mode(obs_env, selected_mode) + obs = obs_env.get_obs(obs_env.get_info()) + history.append(_init_history_entry(obs_bundle, proprio, CANONICAL_CHUNKS[selected_mode])) + if _success_from_state(obs_env): + episode_success = True + break + episode_records.append( + { + "episode_seed": int(episode_seed), + "success": episode_success, + "steps": len(history), + } + ) + print( + json.dumps( + { + "phase": "collect_episode_complete", + "split": split_name, + "episode_seed": int(episode_seed), + "success": episode_success, + "steps": len(history), + "samples_collected": len(samples), + } + ), + flush=True, + ) + finally: + obs_env.close() + sim_env.close() + + payload = { + "split_name": split_name, + "resolution": spec.resolution, + "history_steps": spec.history_steps, + "samples": samples, + "episode_records": episode_records, + } + output_path.parent.mkdir(parents=True, exist_ok=True) + torch.save(payload, output_path) + return payload + + +def _load_split(path: Path) -> dict[str, Any]: + return torch.load(path, map_location="cpu", weights_only=False) + + +def _print_split_supervision_summary(split_name: str, samples: Sequence[dict[str, Any]]) -> None: + mode_counter = collections.Counter(str(sample.get("selected_mode", "unknown")) for sample in samples) + support_counter = collections.Counter(int(sample.get("support_mode", -1)) for sample in samples if "support_mode" in sample) + confidence_values = [float(sample.get("state_confidence_target", 0.0)) for sample in samples if "state_confidence_target" in sample] + payload = { + "phase": "split_supervision_summary", + "split": split_name, + "samples": len(samples), + "selected_modes": dict(mode_counter), + "support_modes": dict(support_counter), + "mean_state_confidence_target": float(np.mean(confidence_values)) if confidence_values else None, + } + print(json.dumps(payload, sort_keys=True), flush=True) + + +def _batch_from_obs(obs_bundle: dict[str, np.ndarray], proprio: np.ndarray, history: Sequence[dict[str, Any]], device: torch.device) -> dict[str, Any]: + return { + "images": torch.from_numpy(obs_bundle["images"]).permute(0, 3, 1, 2).unsqueeze(0).float().div(255.0).to(device), + "depths": torch.from_numpy(obs_bundle["depths"]).unsqueeze(0).float().to(device), + "depth_valid": torch.from_numpy(obs_bundle["depth_valid"]).unsqueeze(0).float().to(device), + "camera_intrinsics": torch.from_numpy(obs_bundle["camera_intrinsics"]).unsqueeze(0).float().to(device), + "camera_extrinsics": torch.from_numpy(obs_bundle["camera_extrinsics"]).unsqueeze(0).float().to(device), + "history_images": torch.from_numpy( + _history_stack(history, "images", pad_shape=obs_bundle["images"].shape, dtype=np.uint8, history_steps=HISTORY_STEPS) + ).permute(0, 1, 4, 2, 3).unsqueeze(0).float().div(255.0).to(device), + "history_depths": torch.from_numpy( + _history_stack(history, "depths", pad_shape=obs_bundle["depths"].shape, dtype=np.float32, history_steps=HISTORY_STEPS) + ).unsqueeze(0).float().to(device), + "history_depth_valid": torch.from_numpy( + _history_stack(history, "depth_valid", pad_shape=obs_bundle["depth_valid"].shape, dtype=np.float32, history_steps=HISTORY_STEPS) + ).unsqueeze(0).float().to(device), + "history_camera_intrinsics": torch.from_numpy( + _history_stack(history, "camera_intrinsics", pad_shape=obs_bundle["camera_intrinsics"].shape, dtype=np.float32, history_steps=HISTORY_STEPS) + ).unsqueeze(0).float().to(device), + "history_camera_extrinsics": torch.from_numpy( + _history_stack(history, "camera_extrinsics", pad_shape=obs_bundle["camera_extrinsics"].shape, dtype=np.float32, history_steps=HISTORY_STEPS) + ).unsqueeze(0).float().to(device), + "history_proprio": torch.from_numpy( + _history_stack(history, "proprio", pad_shape=(PROPRIO_DIM,), dtype=np.float32, history_steps=HISTORY_STEPS) + ).unsqueeze(0).float().to(device), + "history_actions": torch.from_numpy( + _history_stack(history, "action", pad_shape=(14,), dtype=np.float32, history_steps=HISTORY_STEPS) + ).unsqueeze(0).float().to(device), + "proprio": torch.from_numpy(proprio).unsqueeze(0).float().to(device), + "texts": [TEXT_PROMPT], + "task_names": [TASK_NAME], + "task_ids": torch.as_tensor([TASK_ID], dtype=torch.long, device=device), + } + + +def _eval_mode_name( + model_output: dict[str, Any], + checkpoint_mode: str, +) -> tuple[str, bool, bool]: + if checkpoint_mode == "adapter_active_ft" and "proposal_mode_names" in model_output and "best_candidate_indices" in model_output: + active_mask = bool(_np(model_output.get("adapter_active_mask", np.asarray([False]))).reshape(-1)[0]) + if not active_mask: + mode_name = _classify_mode_from_chunk(_np(model_output["action_mean"])[0]) + return mode_name, False, False + best_index = int(_np(model_output["best_candidate_indices"])[0]) + proposal_mode_names = model_output["proposal_mode_names"][0] + mode_name = str(proposal_mode_names[best_index]) if best_index < len(proposal_mode_names) else _classify_mode_from_chunk( + _np(model_output["action_mean"])[0] + ) + non_base = bool(best_index > 0) + return mode_name, active_mask, non_base + mode_name = _classify_mode_from_chunk(_np(model_output["action_mean"])[0]) + return mode_name, False, False + + +def _evaluate_checkpoint( + *, + checkpoint_path: Path, + adapter_mode: str, + result_mode_name: str, + seeds: Sequence[int], + report_path: Path, + train_spec: dict[str, Any] | None, + dataset_seed: int, + planner_overrides: dict[str, float] | None = None, +) -> dict[str, Any]: + model, checkpoint = _load_checkpoint( + checkpoint_path, + adapter_mode=adapter_mode if adapter_mode != "trunk_only" else None, + planner_overrides=planner_overrides, + ) + device = next(model.parameters()).device + obs_env = PickClutterRevealEnv( + obs_mode="rgb+depth+segmentation", + control_mode="pd_ee_delta_pos", + render_mode="rgb_array", + ) + sim_env = PickClutterRevealEnv( + obs_mode="none", + control_mode="pd_ee_delta_pos", + render_mode="rgb_array", + ) + successes: list[int] = [] + episode_records: list[dict[str, Any]] = [] + reveal_steps: list[int] = [] + retrieve_steps: list[int] = [] + disturbance_values: list[float] = [] + intervention_events = 0 + non_base_events = 0 + total_decisions = 0 + try: + for episode_seed in seeds: + obs, _ = obs_env.reset(seed=int(episode_seed)) + sim_env.reset(seed=int(episode_seed)) + _sync_env_state(obs_env, sim_env) + history: deque[dict[str, Any]] = deque(maxlen=HISTORY_STEPS) + success = False + first_reveal_step: int | None = None + first_retrieve_step: int | None = None + episode_disturbance: list[float] = [] + for decision_step in range(MAX_MACRO_STEPS): + obs_bundle = _extract_sensor_bundle(obs) + proprio = _build_proprio(obs_env) + batch = _batch_from_obs(obs_bundle, proprio, list(history), device) + with torch.no_grad(): + if adapter_mode == "trunk_only": + outputs = model(**batch) + else: + outputs = model( + **batch, + adapter_mode=adapter_mode, + use_transition_model=True, + use_task_conditioning=True, + ) + selected_mode, active_mask, non_base = _eval_mode_name(outputs, result_mode_name) + start_positions = _all_positions(obs_env) + _sync_env_state(obs_env, sim_env) + _execute_mode(sim_env, selected_mode) + end_metrics = _candidate_metrics(sim_env, start_positions=start_positions, current_obs_bundle=None) + _sync_env_state(sim_env, obs_env) + obs = obs_env.get_obs(obs_env.get_info()) + history.append(_init_history_entry(obs_bundle, proprio, CANONICAL_CHUNKS.get(selected_mode, CANONICAL_CHUNKS["base_action"]))) + total_decisions += 1 + intervention_events += int(active_mask) + non_base_events += int(non_base) + episode_disturbance.append(end_metrics["disturbance"]) + if selected_mode != "retrieve" and selected_mode not in {"base_action", "maintain_gap"} and first_reveal_step is None: + first_reveal_step = decision_step + 1 + if selected_mode == "retrieve" and first_retrieve_step is None: + first_retrieve_step = decision_step + 1 + if _success_from_state(obs_env): + success = True + break + successes.append(int(success)) + if first_reveal_step is not None: + reveal_steps.append(first_reveal_step) + if first_retrieve_step is not None: + retrieve_steps.append(first_retrieve_step) + disturbance_values.append(float(np.mean(episode_disturbance)) if episode_disturbance else 0.0) + episode_records.append( + { + "episode_seed": int(episode_seed), + "success": success, + "steps": len(history), + "first_reveal_step": first_reveal_step, + "first_retrieve_step": first_retrieve_step, + "episode_disturbance": float(np.mean(episode_disturbance)) if episode_disturbance else 0.0, + } + ) + print( + json.dumps( + { + "phase": "eval_episode_complete", + "adapter_mode": result_mode_name, + "episode_seed": int(episode_seed), + "success": success, + "steps": len(history), + } + ), + flush=True, + ) + finally: + obs_env.close() + sim_env.close() + + eval_protocol = build_public_eval_protocol( + track_id="occlusion_track", + eval_mode=result_mode_name, + seed=int(dataset_seed), + episodes=len(seeds), + resolution=224, + cameras=CAMERA_NAMES, + ) + payload = { + "track_id": "occlusion_track", + "suite": "maniskill3", + "benchmark_task": "PickClutterYCB-v1", + "role": "target", + "adapter_mode": result_mode_name, + "episodes": len(seeds), + "successes": successes, + "success_rate": float(np.mean(successes)) if successes else 0.0, + "intervention_rate": float(intervention_events / max(1, total_decisions)), + "non_base_selection_rate": float(non_base_events / max(1, total_decisions)), + "steps_to_first_reveal_or_access": float(np.mean(reveal_steps)) if reveal_steps else float(MAX_MACRO_STEPS), + "steps_to_retrieve": float(np.mean(retrieve_steps)) if retrieve_steps else float(MAX_MACRO_STEPS), + "disturbance_proxy": float(np.mean(disturbance_values)) if disturbance_values else 0.0, + "episode_records": episode_records, + "eval_protocol": eval_protocol, + } + if train_spec is not None: + payload["train_spec"] = train_spec + report_path.parent.mkdir(parents=True, exist_ok=True) + report_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") + return payload + + +def _summarize_smoke(results: Sequence[dict[str, Any]], output_dir: Path) -> dict[str, Any]: + summary = summarize_public_benchmark_package(list(results), allow_partial=True) + output_dir.mkdir(parents=True, exist_ok=True) + json_path = output_dir / "public_benchmark_package_summary.json" + md_path = output_dir / "public_benchmark_package_summary.md" + json_path.write_text(json.dumps(summary, indent=2, sort_keys=True) + "\n", encoding="utf-8") + lines = [ + "# ManiSkill PickClutter Smoke Summary", + "", + f"- available_tracks: {summary['available_tracks']}", + f"- target_macro_average_delta: {summary['target_macro_average_delta']:.3f}", + f"- headline_pass: {summary['headline_pass']}", + f"- sign_of_life_pass: {summary['sign_of_life_pass']}", + "", + ] + for track_id, payload in summary["tracks"].items(): + lines.append(f"## {track_id}") + lines.append(f"- delta_active_vs_trunk: {payload.get('delta_active_vs_trunk', 0.0):.3f}") + lines.append(f"- delta_noop_vs_trunk: {payload.get('delta_noop_vs_trunk', 0.0):.3f}") + lines.append(f"- signs_of_life: {payload.get('signs_of_life', False)}") + for mode, mode_payload in payload["modes"].items(): + lines.append(f"- {mode}: mean_success={mode_payload['mean_success']:.3f}") + lines.append("") + md_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") + return summary + + +def _default_paths() -> SmokePaths: + return SmokePaths() + + +def _dataset_artifact_path(data_dir: Path, basename: str, *, dataset_seed: int) -> Path: + if int(dataset_seed) == DEFAULT_SEED: + return data_dir / basename + artifact = Path(basename) + return data_dir / f"{artifact.stem}_seed{int(dataset_seed)}{artifact.suffix}" + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Minimum-sign-of-life ManiSkill PickClutter smoke run.") + parser.add_argument("--stage", choices=("collect", "train", "eval", "all"), default="all") + parser.add_argument("--data-dir", type=Path, default=_default_paths().data_dir) + parser.add_argument("--output-dir", type=Path, default=_default_paths().output_dir) + parser.add_argument("--report-dir", type=Path, default=_default_paths().report_dir) + parser.add_argument("--seed", type=int, default=None, help="Deprecated alias for train/eval dataset seed.") + parser.add_argument("--train-seed", type=int, default=None) + parser.add_argument("--dataset-seed", type=int, default=None) + parser.add_argument("--eval-split", choices=("val", "eval"), default="eval") + parser.add_argument("--adapter-confidence-threshold", type=float, default=None) + parser.add_argument("--retrieve-access-threshold", type=float, default=None) + parser.add_argument("--retrieve-persistence-threshold", type=float, default=None) + parser.add_argument("--retrieve-support-threshold", type=float, default=None) + parser.add_argument("--retrieve-reocclusion-threshold", type=float, default=None) + parser.add_argument("--planner-mode-preference-bonus", type=float, default=None) + parser.add_argument("--planner-premature-retrieve-penalty", type=float, default=None) + parser.add_argument("--planner-premature-insert-penalty", type=float, default=None) + parser.add_argument("--planner-premature-occlusion-sweep-penalty", type=float, default=None) + parser.add_argument("--planner-premature-maintain-penalty", type=float, default=None) + parser.add_argument("--planner-retrieve-stage-access-threshold", type=float, default=None) + parser.add_argument("--planner-retrieve-stage-reveal-threshold", type=float, default=None) + parser.add_argument("--planner-retrieve-stage-persistence-threshold", type=float, default=None) + parser.add_argument("--planner-retrieve-stage-support-threshold", type=float, default=None) + parser.add_argument("--planner-insert-stage-access-threshold", type=float, default=None) + parser.add_argument("--planner-insert-stage-visibility-threshold", type=float, default=None) + parser.add_argument("--planner-insert-stage-support-threshold", type=float, default=None) + parser.add_argument("--planner-occlusion-maintain-gap-min-access", type=float, default=None) + parser.add_argument("--planner-occlusion-maintain-gap-min-visibility", type=float, default=None) + return parser.parse_args() + + +def _planner_overrides_from_args(args: argparse.Namespace) -> dict[str, float]: + override_pairs = ( + ("adapter_confidence_threshold", args.adapter_confidence_threshold), + ("retrieve_access_threshold", args.retrieve_access_threshold), + ("retrieve_persistence_threshold", args.retrieve_persistence_threshold), + ("retrieve_support_threshold", args.retrieve_support_threshold), + ("retrieve_reocclusion_threshold", args.retrieve_reocclusion_threshold), + ("mode_preference_bonus", args.planner_mode_preference_bonus), + ("premature_retrieve_penalty", args.planner_premature_retrieve_penalty), + ("premature_insert_penalty", args.planner_premature_insert_penalty), + ("premature_occlusion_sweep_penalty", args.planner_premature_occlusion_sweep_penalty), + ("premature_maintain_penalty", args.planner_premature_maintain_penalty), + ("retrieve_stage_access_threshold", args.planner_retrieve_stage_access_threshold), + ("retrieve_stage_reveal_threshold", args.planner_retrieve_stage_reveal_threshold), + ("retrieve_stage_persistence_threshold", args.planner_retrieve_stage_persistence_threshold), + ("retrieve_stage_support_threshold", args.planner_retrieve_stage_support_threshold), + ("insert_stage_access_threshold", args.planner_insert_stage_access_threshold), + ("insert_stage_visibility_threshold", args.planner_insert_stage_visibility_threshold), + ("insert_stage_support_threshold", args.planner_insert_stage_support_threshold), + ("occlusion_maintain_gap_min_access", args.planner_occlusion_maintain_gap_min_access), + ("occlusion_maintain_gap_min_visibility", args.planner_occlusion_maintain_gap_min_visibility), + ) + return {key: value for key, value in override_pairs if value is not None} + + +def main() -> None: + args = _parse_args() + planner_overrides = _planner_overrides_from_args(args) + base_seed = DEFAULT_SEED if args.seed is None else int(args.seed) + train_seed = int(args.train_seed) if args.train_seed is not None else base_seed + dataset_seed = int(args.dataset_seed) if args.dataset_seed is not None else base_seed + spec = SmokeSpec(dataset_seed=dataset_seed, train_seed=train_seed) + splits = _build_episode_splits(spec) + split_path = _dataset_artifact_path(args.data_dir, "episode_splits.json", dataset_seed=spec.dataset_seed) + train_path = _dataset_artifact_path(args.data_dir, "train.pt", dataset_seed=spec.dataset_seed) + val_path = _dataset_artifact_path(args.data_dir, "val.pt", dataset_seed=spec.dataset_seed) + + if args.stage in {"collect", "all"}: + _save_episode_splits(split_path, splits) + if not train_path.exists(): + print(json.dumps({"phase": "collect_train_start", "episodes": len(splits["train"])}), flush=True) + _collect_split(split_name="train", seeds=splits["train"], spec=spec, output_path=train_path) + if not val_path.exists(): + print(json.dumps({"phase": "collect_val_start", "episodes": len(splits["val"])}), flush=True) + _collect_split(split_name="val", seeds=splits["val"], spec=spec, output_path=val_path) + + if args.stage == "collect": + return + + train_bundle = _load_split(train_path) + val_bundle = _load_split(val_path) + train_samples = train_bundle["samples"] + val_samples = val_bundle["samples"] + _print_split_supervision_summary("train", train_samples) + _print_split_supervision_summary("val", val_samples) + + trunk_checkpoint = args.output_dir / f"trunk_only_ft_seed{spec.train_seed}" / "checkpoint_best.pt" + adapter_checkpoint = args.output_dir / f"adapter_active_ft_seed{spec.train_seed}" / "checkpoint_best.pt" + trunk_train_spec: dict[str, Any] | None = None + adapter_train_spec: dict[str, Any] | None = None + + if args.stage in {"train", "all"}: + if not trunk_checkpoint.exists(): + print(json.dumps({"phase": "train_variant_start", "variant": "trunk_only_ft"}), flush=True) + trunk_checkpoint, trunk_train_spec = _train_variant( + variant="trunk_only_ft", + train_samples=train_samples, + val_samples=val_samples, + spec=spec, + output_dir=args.output_dir / f"trunk_only_ft_seed{spec.train_seed}", + ) + else: + trunk_payload = torch.load(trunk_checkpoint, map_location="cpu", weights_only=False) + trunk_train_spec = trunk_payload.get("train_spec") + if not adapter_checkpoint.exists(): + print(json.dumps({"phase": "train_variant_start", "variant": "adapter_active_ft"}), flush=True) + adapter_checkpoint, adapter_train_spec = _train_variant( + variant="adapter_active_ft", + train_samples=train_samples, + val_samples=val_samples, + spec=spec, + output_dir=args.output_dir / f"adapter_active_ft_seed{spec.train_seed}", + ) + else: + adapter_payload = torch.load(adapter_checkpoint, map_location="cpu", weights_only=False) + adapter_train_spec = adapter_payload.get("train_spec") + + if args.stage == "train": + return + + if trunk_train_spec is None and trunk_checkpoint.exists(): + trunk_payload = torch.load(trunk_checkpoint, map_location="cpu", weights_only=False) + trunk_train_spec = trunk_payload.get("train_spec") + if adapter_train_spec is None and adapter_checkpoint.exists(): + adapter_payload = torch.load(adapter_checkpoint, map_location="cpu", weights_only=False) + adapter_train_spec = adapter_payload.get("train_spec") + + eval_seeds = splits[args.eval_split] + print(json.dumps({"phase": "eval_start", "episodes": len(eval_seeds)}), flush=True) + trunk_result = _evaluate_checkpoint( + checkpoint_path=trunk_checkpoint, + adapter_mode="trunk_only", + result_mode_name="trunk_only_ft", + seeds=eval_seeds, + report_path=args.report_dir / f"trunk_only_ft_seed{spec.train_seed}.json", + train_spec=trunk_train_spec, + dataset_seed=spec.dataset_seed, + planner_overrides=planner_overrides, + ) + noop_result = _evaluate_checkpoint( + checkpoint_path=adapter_checkpoint, + adapter_mode="adapter_noop", + result_mode_name="adapter_noop", + seeds=eval_seeds, + report_path=args.report_dir / f"adapter_noop_seed{spec.train_seed}.json", + train_spec=adapter_train_spec, + dataset_seed=spec.dataset_seed, + planner_overrides=planner_overrides, + ) + active_result = _evaluate_checkpoint( + checkpoint_path=adapter_checkpoint, + adapter_mode="adapter_active", + result_mode_name="adapter_active_ft", + seeds=eval_seeds, + report_path=args.report_dir / f"adapter_active_ft_seed{spec.train_seed}.json", + train_spec=adapter_train_spec, + dataset_seed=spec.dataset_seed, + planner_overrides=planner_overrides, + ) + summary = _summarize_smoke([trunk_result, noop_result, active_result], args.report_dir) + print(json.dumps({"phase": "complete", "summary": summary}, indent=2), flush=True) + + +if __name__ == "__main__": + main() diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_public_benchmark_package.py b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_public_benchmark_package.py new file mode 100644 index 0000000000000000000000000000000000000000..ba1c1ce4f9f0b1dcf976ee8b8e7f415f37ab1e3f --- /dev/null +++ b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_public_benchmark_package.py @@ -0,0 +1,369 @@ +from __future__ import annotations + +import argparse +import json +from pathlib import Path +from typing import Any + +import numpy as np + +from eval.public_benchmark_package import ( + ANCHOR_ROLE, + DEFAULT_ANCHOR_TOLERANCE, + DEFAULT_SIGN_OF_LIFE_GAIN, + DEFAULT_SIGN_OF_LIFE_INTERVENTION, + DEFAULT_SIGN_OF_LIFE_NON_BASE, + TARGET_ROLE, + build_public_eval_protocol, + build_target_training_spec, + default_public_benchmark_manifest, + expected_eval_modes, + public_benchmark_tracks, + public_protocol_identity_signature, + public_track_by_id, + training_fairness_signature, + write_default_public_benchmark_manifest, +) + + +def _load_json(path: str | Path) -> dict[str, Any]: + with Path(path).open("r", encoding="utf-8") as handle: + payload = json.load(handle) + if not isinstance(payload, dict): + raise TypeError(f"Expected a JSON object in {path!s}, got {type(payload)!r}.") + return payload + + +def _normalize_success_samples(payload: dict[str, Any]) -> np.ndarray: + if "successes" in payload: + raw = np.asarray(payload["successes"], dtype=np.float32).reshape(-1) + return raw + if "success_rate" in payload: + return np.asarray([float(payload["success_rate"])], dtype=np.float32) + raise KeyError("Each result payload must include either `successes` or `success_rate`.") + + +def _mean_optional(records: list[dict[str, Any]], key: str) -> float | None: + values = [float(record[key]) for record in records if key in record] + if not values: + return None + return float(np.mean(values)) + + +def _bootstrap_delta_ci( + lhs: np.ndarray, + rhs: np.ndarray, + *, + bootstrap_samples: int, + bootstrap_seed: int, +) -> tuple[float, float]: + if lhs.size == 0 or rhs.size == 0: + return 0.0, 0.0 + rng = np.random.default_rng(int(bootstrap_seed)) + deltas = np.empty(int(bootstrap_samples), dtype=np.float32) + for index in range(int(bootstrap_samples)): + lhs_sample = lhs[rng.integers(0, lhs.shape[0], size=lhs.shape[0])] + rhs_sample = rhs[rng.integers(0, rhs.shape[0], size=rhs.shape[0])] + deltas[index] = float(lhs_sample.mean() - rhs_sample.mean()) + low, high = np.percentile(deltas, [2.5, 97.5]) + return float(low), float(high) + + +def _normalize_record(payload: dict[str, Any]) -> dict[str, Any]: + if "track_id" not in payload: + raise KeyError("Missing required field `track_id`.") + if "adapter_mode" not in payload: + raise KeyError("Missing required field `adapter_mode`.") + track = public_track_by_id(str(payload["track_id"])) + success_samples = _normalize_success_samples(payload) + success_rate = float(payload.get("success_rate", float(success_samples.mean()))) + episodes = int(payload.get("episodes", success_samples.shape[0])) + record = dict(payload) + record["track_id"] = track.track_id + record["suite"] = payload.get("suite", track.suite) + record["benchmark_task"] = payload.get("benchmark_task", track.benchmark_task) + record["role"] = payload.get("role", track.role) + record["adapter_mode"] = str(payload["adapter_mode"]) + record["successes"] = success_samples.tolist() + record["success_rate"] = success_rate + record["episodes"] = episodes + return record + + +def _validate_protocols(records: list[dict[str, Any]]) -> None: + by_track: dict[str, list[dict[str, Any]]] = {} + for record in records: + by_track.setdefault(record["track_id"], []).append(record) + for track_id, grouped in by_track.items(): + signatures = [] + for record in grouped: + protocol = record.get("eval_protocol") + if protocol is None: + raise ValueError( + f"Missing eval_protocol for track {track_id!r}, mode {record['adapter_mode']!r}." + ) + signatures.append(public_protocol_identity_signature(protocol)) + if any(signature != signatures[0] for signature in signatures[1:]): + raise ValueError(f"Protocol identity mismatch detected for track {track_id!r}.") + + +def _validate_training_fairness(records: list[dict[str, Any]]) -> None: + grouped: dict[tuple[str, str], list[dict[str, Any]]] = {} + for record in records: + grouped.setdefault((record["track_id"], record["adapter_mode"]), []).append(record) + for track in public_benchmark_tracks(TARGET_ROLE): + trunk_records = grouped.get((track.track_id, "trunk_only_ft"), []) + active_records = grouped.get((track.track_id, "adapter_active_ft"), []) + if not trunk_records or not active_records: + continue + if len(trunk_records) != len(active_records): + raise ValueError( + f"Training fairness mismatch for {track.track_id!r}: different run counts " + f"between trunk_only_ft ({len(trunk_records)}) and adapter_active_ft ({len(active_records)})." + ) + if any(record.get("train_spec") is None for record in trunk_records + active_records): + raise ValueError( + f"Training fairness mismatch for {track.track_id!r}: missing train_spec on a target-track result." + ) + trunk_by_seed = { + int(record["train_spec"]["seed"]): training_fairness_signature(record["train_spec"]) + for record in trunk_records + } + active_by_seed = { + int(record["train_spec"]["seed"]): training_fairness_signature(record["train_spec"]) + for record in active_records + } + if set(trunk_by_seed) != set(active_by_seed): + raise ValueError(f"Training fairness mismatch for {track.track_id!r}: seed sets differ.") + for seed, trunk_signature in trunk_by_seed.items(): + if trunk_signature != active_by_seed[seed]: + raise ValueError( + f"Training fairness mismatch for {track.track_id!r} at seed {seed}: " + "trunk_only_ft and adapter_active_ft do not share the same data/init signature." + ) + + +def _aggregate_mode(records: list[dict[str, Any]]) -> dict[str, Any]: + success_rates = np.asarray([float(record["success_rate"]) for record in records], dtype=np.float32) + success_samples = np.concatenate( + [np.asarray(record["successes"], dtype=np.float32).reshape(-1) for record in records], + axis=0, + ) + payload: dict[str, Any] = { + "num_runs": len(records), + "mean_success": float(success_rates.mean()) if success_rates.size else 0.0, + "success_samples": success_samples.tolist(), + } + for key in ( + "intervention_rate", + "non_base_selection_rate", + "steps_to_first_reveal_or_access", + "steps_to_retrieve", + "disturbance_proxy", + ): + mean_value = _mean_optional(records, key) + if mean_value is not None: + payload[key] = mean_value + return payload + + +def summarize_public_benchmark_package( + result_payloads: list[dict[str, Any]], + *, + bootstrap_samples: int = 2000, + bootstrap_seed: int = 0, + allow_partial: bool = False, +) -> dict[str, Any]: + records = [_normalize_record(payload) for payload in result_payloads] + _validate_protocols(records) + _validate_training_fairness(records) + + grouped: dict[tuple[str, str], list[dict[str, Any]]] = {} + for record in records: + grouped.setdefault((record["track_id"], record["adapter_mode"]), []).append(record) + + track_summaries: dict[str, Any] = {} + target_deltas: list[float] = [] + anchor_pass = True + sign_of_life_tracks: list[str] = [] + ci_above_zero_tracks: list[str] = [] + available_tracks: list[str] = [] + + for track in public_benchmark_tracks(): + track_modes = expected_eval_modes(track.track_id) + mode_payloads: dict[str, Any] = {} + missing_modes: list[str] = [] + for mode in track_modes: + mode_records = grouped.get((track.track_id, mode), []) + if not mode_records: + missing_modes.append(mode) + continue + mode_payloads[mode] = _aggregate_mode(mode_records) + if missing_modes: + if allow_partial: + continue + raise ValueError(f"Missing results for track {track.track_id!r}, mode(s) {missing_modes!r}.") + available_tracks.append(track.track_id) + + track_summary: dict[str, Any] = { + "suite": track.suite, + "benchmark_task": track.benchmark_task, + "role": track.role, + "task_family": track.task_family, + "target_behavior": track.target_behavior, + "public_source": track.public_source, + "notes": track.notes, + "modes": { + mode: { + key: value + for key, value in payload.items() + if key != "success_samples" + } + for mode, payload in mode_payloads.items() + }, + } + + if track.role == TARGET_ROLE: + trunk = mode_payloads["trunk_only_ft"] + active = mode_payloads["adapter_active_ft"] + noop = mode_payloads["adapter_noop"] + delta_active = float(active["mean_success"] - trunk["mean_success"]) + delta_noop = float(noop["mean_success"] - trunk["mean_success"]) + target_deltas.append(delta_active) + ci_low, ci_high = _bootstrap_delta_ci( + np.asarray(active["success_samples"], dtype=np.float32), + np.asarray(trunk["success_samples"], dtype=np.float32), + bootstrap_samples=bootstrap_samples, + bootstrap_seed=bootstrap_seed + len(target_deltas), + ) + sign_of_life = bool( + float(active.get("intervention_rate", 0.0)) >= DEFAULT_SIGN_OF_LIFE_INTERVENTION + and float(active.get("non_base_selection_rate", 0.0)) >= DEFAULT_SIGN_OF_LIFE_NON_BASE + and delta_active >= DEFAULT_SIGN_OF_LIFE_GAIN + ) + if sign_of_life: + sign_of_life_tracks.append(track.track_id) + if ci_low > 0.0: + ci_above_zero_tracks.append(track.track_id) + track_summary.update( + { + "delta_active_vs_trunk": delta_active, + "delta_noop_vs_trunk": delta_noop, + "delta_active_vs_trunk_ci95": [ci_low, ci_high], + "signs_of_life": sign_of_life, + } + ) + else: + trunk = mode_payloads["trunk_only"] + active = mode_payloads["adapter_active"] + noop = mode_payloads["adapter_noop"] + active_delta = float(active["mean_success"] - trunk["mean_success"]) + noop_delta = float(noop["mean_success"] - trunk["mean_success"]) + within_tolerance = bool( + abs(active_delta) <= DEFAULT_ANCHOR_TOLERANCE + and abs(noop_delta) <= DEFAULT_ANCHOR_TOLERANCE + ) + anchor_pass = anchor_pass and within_tolerance + track_summary.update( + { + "delta_active_vs_trunk": active_delta, + "delta_noop_vs_trunk": noop_delta, + "anchor_within_tolerance": within_tolerance, + } + ) + + track_summaries[track.track_id] = track_summary + + headline_pass = bool( + target_deltas + and all(delta > 0.0 for delta in target_deltas) + and len(ci_above_zero_tracks) >= 1 + ) + sign_of_life_pass = len(sign_of_life_tracks) >= 2 + + return { + "package_name": default_public_benchmark_manifest()["package_name"], + "tracks": track_summaries, + "available_tracks": available_tracks, + "target_macro_average_delta": float(np.mean(target_deltas)) if target_deltas else 0.0, + "headline_pass": headline_pass, + "sign_of_life_pass": sign_of_life_pass, + "sign_of_life_track_count": len(sign_of_life_tracks), + "sign_of_life_tracks": sign_of_life_tracks, + "ci_above_zero_tracks": ci_above_zero_tracks, + "anchor_pass": anchor_pass, + } + + +def _write_markdown(output_path: Path, summary: dict[str, Any]) -> None: + lines = [ + "# Public Benchmark Package Summary", + "", + f"- package_name: {summary['package_name']}", + f"- headline_pass: {summary['headline_pass']}", + f"- sign_of_life_pass: {summary['sign_of_life_pass']}", + f"- sign_of_life_track_count: {summary['sign_of_life_track_count']}", + f"- anchor_pass: {summary['anchor_pass']}", + f"- target_macro_average_delta: {summary['target_macro_average_delta']:.3f}", + "", + ] + for track_id, payload in summary["tracks"].items(): + lines.append(f"## {track_id}") + lines.append(f"- suite: {payload['suite']}") + lines.append(f"- benchmark_task: {payload['benchmark_task']}") + lines.append(f"- role: {payload['role']}") + for mode, mode_payload in payload["modes"].items(): + lines.append(f"- {mode}: mean_success={mode_payload['mean_success']:.3f}, num_runs={mode_payload['num_runs']}") + if "delta_active_vs_trunk" in payload: + lines.append(f"- delta_active_vs_trunk: {payload['delta_active_vs_trunk']:.3f}") + if "delta_noop_vs_trunk" in payload: + lines.append(f"- delta_noop_vs_trunk: {payload['delta_noop_vs_trunk']:.3f}") + if "delta_active_vs_trunk_ci95" in payload: + low, high = payload["delta_active_vs_trunk_ci95"] + lines.append(f"- delta_active_vs_trunk_ci95: [{low:.3f}, {high:.3f}]") + if "signs_of_life" in payload: + lines.append(f"- signs_of_life: {payload['signs_of_life']}") + if "anchor_within_tolerance" in payload: + lines.append(f"- anchor_within_tolerance: {payload['anchor_within_tolerance']}") + lines.append("") + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Validate and summarize the public benchmark package results.") + parser.add_argument("--result", action="append", default=[], help="Path to a normalized benchmark result JSON.") + parser.add_argument("--output-dir", type=Path, default=Path.home() / "workspace" / "reports" / "public_benchmark_package_v1") + parser.add_argument("--bootstrap-samples", type=int, default=2000) + parser.add_argument("--bootstrap-seed", type=int, default=0) + parser.add_argument("--write-default-manifest", type=Path, default=None) + return parser.parse_args() + + +def main() -> None: + args = _parse_args() + if args.write_default_manifest is not None: + path = write_default_public_benchmark_manifest(args.write_default_manifest) + print(json.dumps({"wrote_manifest": str(path)}, indent=2)) + if not args.result: + return + if not args.result: + raise SystemExit("No results provided. Pass one or more --result files or use --write-default-manifest.") + + payloads = [_load_json(path) for path in args.result] + summary = summarize_public_benchmark_package( + payloads, + bootstrap_samples=args.bootstrap_samples, + bootstrap_seed=args.bootstrap_seed, + ) + + args.output_dir.mkdir(parents=True, exist_ok=True) + json_path = args.output_dir / "public_benchmark_package_summary.json" + md_path = args.output_dir / "public_benchmark_package_summary.md" + json_path.write_text(json.dumps(summary, indent=2, sort_keys=True) + "\n", encoding="utf-8") + _write_markdown(md_path, summary) + print(json.dumps({"summary_json": str(json_path), "summary_md": str(md_path)}, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/__init__.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2d0374331ed8537b6c100bb2280d6bbd73152636 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/__init__.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/__init__.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b9f159210dbfaedda3b9cff88fac6e6976d7b Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/__init__.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/action_decoder.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/action_decoder.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1e2042cdcabc6af45ea3beb4596736bee9494a89 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/action_decoder.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/action_decoder.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/action_decoder.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62ee359f27c7541978af015153ac6734354391ff Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/action_decoder.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/backbones.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/backbones.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..590f1fdb49ac4176c0e098b7ae622e8ba98718a1 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/backbones.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/backbones.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/backbones.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..505141e4ac75aa4c7bb16bf8b50e808d0b15a9ef Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/backbones.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/multiview_fusion.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/multiview_fusion.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..03afac5e306a90ab692b4ba03d96058d1162a715 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/multiview_fusion.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/multiview_fusion.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/multiview_fusion.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2da10b51a41c54b4d35a34c28049da60b86e0284 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/multiview_fusion.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/observation_memory.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/observation_memory.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c1ceaaea4bbff4ca15b8f119054a7048c421686d Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/observation_memory.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/observation_memory.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/observation_memory.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..72897081a51a82ca22f5fdc9c4527ecc2afc1d4f Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/observation_memory.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/planner.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/planner.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ddac7b1afac4266f7622a9eab6aff5de87f9cfa2 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/planner.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/planner.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/planner.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0b294dae66024c99bb1fdf2f4bfeab14c27123b9 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/planner.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/policy.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/policy.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2cc1a59a658473e16f18164d23872797337a8db2 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/policy.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/policy.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/policy.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b9c1f0d96f5fb36352687857165f6c07991af55d Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/policy.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/reveal_head.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/reveal_head.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5369fcabe9ef5e18892e580e6665ed67139e9f85 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/reveal_head.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/reveal_head.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/reveal_head.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f76fa1e0a30444f9d2ffcdb1413c593634fbad4e Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/reveal_head.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/rvt_backbone.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/rvt_backbone.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b82245f4d617bf57bd41443dd1b6f1fbfbe1bde9 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/rvt_backbone.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/rvt_backbone.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/rvt_backbone.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..da6da53cdd3a08283113a7d4d1f8fcc0b3d81cfb Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/rvt_backbone.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/world_model.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/world_model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2489a9591a3e1300a8c223e0ff5bf00f8e38dca Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/world_model.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/world_model.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/world_model.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44a919c3c416000b13049fdabec04e3341e30be9 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/__pycache__/world_model.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/planner.py b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/planner.py new file mode 100644 index 0000000000000000000000000000000000000000..d73596940b6f8172b27ef95e09c16b2503b219a4 --- /dev/null +++ b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/planner.py @@ -0,0 +1,887 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import torch +from torch import Tensor, nn + + +@dataclass +class PlannerConfig: + hidden_dim: int = 512 + num_candidates: int = 8 + action_dim: int = 14 + num_support_modes: int = 3 + utility_margin: float = 0.1 + corridor_weight: float = 1.0 + persistence_weight: float = 0.5 + proposal_weight: float = 0.5 + task_progress_weight: float = 0.75 + disturbance_weight: float = 0.75 + reocclusion_weight: float = 0.5 + visibility_weight: float = 0.25 + num_heads: int = 4 + num_layers: int = 2 + num_phases: int = 5 + num_arm_roles: int = 4 + top_k: int = 4 + belief_gain_weight: float = 1.0 + visibility_gain_weight: float = 0.75 + clearance_weight: float = 0.75 + occluder_contact_weight: float = 0.5 + grasp_affordance_weight: float = 0.75 + support_stability_weight: float = 0.5 + residual_weight: float = 0.5 + retrieve_access_threshold: float = 0.15 + retrieve_persistence_threshold: float = 0.15 + retrieve_support_threshold: float = 0.25 + retrieve_reocclusion_threshold: float = 0.6 + adapter_confidence_threshold: float = 0.55 + mode_preference_bonus: float = 3.0 + premature_retrieve_penalty: float = 1.5 + premature_insert_penalty: float = 0.75 + premature_occlusion_sweep_penalty: float = 0.75 + premature_maintain_penalty: float = 0.0 + retrieve_stage_access_threshold: float = 0.45 + retrieve_stage_reveal_threshold: float = 0.40 + retrieve_stage_persistence_threshold: float = 0.20 + retrieve_stage_support_threshold: float = 0.25 + insert_stage_access_threshold: float = 0.40 + insert_stage_visibility_threshold: float = 0.30 + insert_stage_support_threshold: float = 0.25 + occlusion_maintain_gap_min_access: float = 0.0 + occlusion_maintain_gap_min_visibility: float = 0.0 + + +class RevealPlanner(nn.Module): + def __init__(self, config: PlannerConfig) -> None: + super().__init__() + self.config = config + summary_dim = ( + config.action_dim * 2 + + 3 + + 3 + + 1 + + 3 + + 1 + ) + self.trunk = nn.Sequential( + nn.LayerNorm(summary_dim), + nn.Linear(summary_dim, config.hidden_dim), + nn.GELU(), + nn.Linear(config.hidden_dim, config.hidden_dim), + nn.GELU(), + ) + self.success_head = nn.Linear(config.hidden_dim, 1) + self.risk_head = nn.Linear(config.hidden_dim, 1) + + def summarize_candidates(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor]) -> Tensor: + candidate_mean = candidate_chunks.mean(dim=2) + candidate_terminal = candidate_chunks[:, :, -1] + corridor_prob = rollout_state["corridor_logits"].sigmoid().amax(dim=-1).mean(dim=-2) + persistence = rollout_state["persistence_horizon"].mean(dim=-2) + disturbance = rollout_state["disturbance_cost"].mean(dim=-1, keepdim=True) + reocclusion = rollout_state["reocclusion_logit"].sigmoid().mean(dim=-2) + uncertainty = rollout_state["uncertainty"].mean(dim=-1, keepdim=True) + return torch.cat( + [ + candidate_mean, + candidate_terminal, + corridor_prob, + persistence, + disturbance, + reocclusion, + uncertainty, + ], + dim=-1, + ) + + def score_rollouts(self, rollout_state: dict[str, Tensor], candidate_chunks: Tensor) -> dict[str, Tensor]: + features = self.summarize_candidates(candidate_chunks, rollout_state) + hidden = self.trunk(features) + success_logits = self.success_head(hidden).squeeze(-1) + risk_values = torch.sigmoid(self.risk_head(hidden)).squeeze(-1) + utility_scores = success_logits.sigmoid() - risk_values + return { + "planner_features": features, + "planner_hidden": hidden, + "success_logits": success_logits, + "risk_values": risk_values, + "utility_scores": utility_scores, + } + + def select_best(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor]) -> dict[str, Tensor]: + outputs = self.score_rollouts(rollout_state=rollout_state, candidate_chunks=candidate_chunks) + best_idx = outputs["utility_scores"].argmax(dim=-1) + batch_indices = torch.arange(candidate_chunks.shape[0], device=candidate_chunks.device) + return { + **outputs, + "best_indices": best_idx, + "best_chunk": candidate_chunks[batch_indices, best_idx], + } + + +class InteractionPlanner(nn.Module): + def __init__(self, config: PlannerConfig) -> None: + super().__init__() + self.config = config + step_dim = ( + config.action_dim + + config.num_phases + + (2 * config.num_arm_roles) + + config.num_support_modes + + 7 + ) + self.step_proj = nn.Sequential( + nn.LayerNorm(step_dim), + nn.Linear(step_dim, config.hidden_dim), + nn.GELU(), + ) + encoder_layer = nn.TransformerEncoderLayer( + d_model=config.hidden_dim, + nhead=config.num_heads, + dim_feedforward=config.hidden_dim * 4, + batch_first=True, + norm_first=True, + ) + self.sequence_encoder = nn.TransformerEncoder(encoder_layer, num_layers=config.num_layers) + self.cls_token = nn.Parameter(torch.randn(1, 1, config.hidden_dim) * 0.02) + self.success_head = nn.Linear(config.hidden_dim, 1) + self.risk_head = nn.Linear(config.hidden_dim, 1) + self.score_head = nn.Linear(config.hidden_dim, 1) + + def _mean_field(self, tensor: Tensor) -> Tensor: + return tensor.mean(dim=(-1, -2)) + + def summarize_trajectory(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor]) -> Tensor: + horizon = min(candidate_chunks.shape[2], rollout_state["phase_logits"].shape[2]) + candidate_steps = candidate_chunks[:, :, :horizon] + phase_probs = rollout_state["phase_logits"][:, :, :horizon].softmax(dim=-1) + support_probs = rollout_state["support_mode_logits"][:, :, :horizon].softmax(dim=-1) + arm_role_probs = rollout_state["arm_role_logits"][:, :, :horizon].softmax(dim=-1).flatten(start_dim=-2) + target_mean = self._mean_field(rollout_state["target_field"][:, :, :horizon].sigmoid()) + feasibility_mean = self._mean_field(rollout_state["actor_feasibility_field"][:, :, :horizon].sigmoid()) + persistence_mean = self._mean_field(rollout_state["persistence_field"][:, :, :horizon]) + risk_mean = self._mean_field(rollout_state["risk_field"][:, :, :horizon]) + uncertainty_mean = self._mean_field(rollout_state["uncertainty_field"][:, :, :horizon]) + role_gap = ( + rollout_state["arm_role_logits"][:, :, :horizon, 0].softmax(dim=-1) + - rollout_state["arm_role_logits"][:, :, :horizon, 1].softmax(dim=-1) + ).abs().mean(dim=-1, keepdim=True) + return torch.cat( + [ + candidate_steps, + phase_probs, + arm_role_probs, + support_probs, + target_mean, + feasibility_mean, + persistence_mean, + risk_mean, + uncertainty_mean, + role_gap, + ], + dim=-1, + ) + + def score_rollouts( + self, + rollout_state: dict[str, Tensor], + candidate_chunks: Tensor, + proposal_logits: Tensor | None = None, + ) -> dict[str, Tensor]: + features = self.summarize_trajectory(candidate_chunks, rollout_state) + batch_size, num_candidates, horizon, _ = features.shape + flat_features = features.view(batch_size * num_candidates, horizon, -1) + hidden_steps = self.step_proj(flat_features) + cls = self.cls_token.expand(batch_size * num_candidates, -1, -1) + encoded = self.sequence_encoder(torch.cat([cls, hidden_steps], dim=1)) + pooled = encoded[:, 0] + success_logits = self.success_head(pooled).view(batch_size, num_candidates).squeeze(-1) + risk_values = torch.sigmoid(self.risk_head(pooled)).view(batch_size, num_candidates).squeeze(-1) + utility_scores = self.score_head(pooled).view(batch_size, num_candidates).squeeze(-1) + utility_scores = utility_scores + success_logits.sigmoid() - risk_values + if proposal_logits is not None and proposal_logits.shape == utility_scores.shape: + utility_scores = utility_scores + self.config.proposal_weight * proposal_logits.sigmoid() + return { + "planner_features": features.mean(dim=2), + "planner_hidden": pooled.view(batch_size, num_candidates, -1), + "success_logits": success_logits, + "risk_values": risk_values, + "utility_scores": utility_scores, + } + + def select_best( + self, + candidate_chunks: Tensor, + rollout_state: dict[str, Tensor], + proposal_logits: Tensor | None = None, + ) -> dict[str, Tensor]: + outputs = self.score_rollouts( + rollout_state=rollout_state, + candidate_chunks=candidate_chunks, + proposal_logits=proposal_logits, + ) + best_idx = outputs["utility_scores"].argmax(dim=-1) + batch_indices = torch.arange(candidate_chunks.shape[0], device=candidate_chunks.device) + return { + **outputs, + "best_indices": best_idx, + "best_chunk": candidate_chunks[batch_indices, best_idx], + } + + +class StructuredElasticUtility(nn.Module): + def __init__(self, config: PlannerConfig) -> None: + super().__init__() + self.config = config + + def _field_mean(self, tensor: Tensor) -> Tensor: + if tensor.ndim == 6: + return tensor.mean(dim=(-1, -2, -3)) + if tensor.ndim == 5: + return tensor.mean(dim=(-1, -2)) + if tensor.ndim == 4: + return tensor.mean(dim=(-1, -2)) + return tensor + + def _initial_scalar(self, state: dict[str, Tensor], key: str) -> Tensor: + value = state[key] + if value.ndim >= 4: + return value.mean(dim=tuple(range(1, value.ndim))) + if value.ndim == 3: + return value.mean(dim=(-1, -2)) + if value.ndim == 2: + return value.mean(dim=-1) + return value + + def forward( + self, + initial_state: dict[str, Tensor], + rollout_state: dict[str, Tensor], + candidate_chunks: Tensor, + ) -> dict[str, Tensor]: + initial_belief = self._initial_scalar(initial_state, "target_belief_field").unsqueeze(1) + initial_visibility = self._initial_scalar(initial_state, "visibility_field").unsqueeze(1) + belief_future = self._field_mean(rollout_state["target_belief_field"]).mean(dim=-1) + visibility_future = self._field_mean(rollout_state["visibility_field"]).mean(dim=-1) + clearance = self._field_mean(rollout_state["clearance_field"]).mean(dim=-1) + occluder_contact = self._field_mean(rollout_state["occluder_contact_field"]).mean(dim=-1) + grasp_affordance = self._field_mean(rollout_state["grasp_affordance_field"]).mean(dim=-1) + support_stability = torch.sigmoid(self._field_mean(rollout_state["support_stability_field"])).mean(dim=-1) + persistence_traj = self._field_mean(rollout_state["persistence_field"]) + reocclusion_traj = self._field_mean(rollout_state["reocclusion_field"]) + disturbance_traj = self._field_mean(rollout_state["disturbance_field"]) + access_traj = torch.sigmoid(self._field_mean(rollout_state["access_field"])) + persistence = persistence_traj.mean(dim=-1) + reocclusion = reocclusion_traj.mean(dim=-1) + disturbance = disturbance_traj.mean(dim=-1) + access_quality = access_traj.mean(dim=-1) + access_floor = access_traj.amin(dim=-1) + persistence_floor = persistence_traj.amin(dim=-1) + support_floor = torch.sigmoid(self._field_mean(rollout_state["support_stability_field"])).amin(dim=-1) + reocclusion_worst = reocclusion_traj.amax(dim=-1) + retrieve_progress = torch.sigmoid(candidate_chunks[:, :, :, -1]).mean(dim=-1) + utility = ( + self.config.belief_gain_weight * (belief_future - initial_belief) + + self.config.visibility_gain_weight * (visibility_future - initial_visibility) + + self.config.clearance_weight * clearance + + self.config.occluder_contact_weight * occluder_contact + + self.config.grasp_affordance_weight * grasp_affordance + + self.config.persistence_weight * persistence + + self.config.support_stability_weight * support_stability + + self.config.corridor_weight * access_quality + + self.config.task_progress_weight * retrieve_progress + - self.config.reocclusion_weight * reocclusion + - self.config.disturbance_weight * disturbance + - self.config.visibility_weight * (1.0 - visibility_future) + ) + return { + "belief_gain": belief_future - initial_belief, + "visibility_gain": visibility_future - initial_visibility, + "clearance": clearance, + "occluder_contact_quality": occluder_contact, + "grasp_affordance": grasp_affordance, + "persistence": persistence, + "support_stability": support_stability, + "reocclusion_penalty": reocclusion, + "reocclusion_worst": reocclusion_worst, + "disturbance_penalty": disturbance, + "access_quality": access_quality, + "access_floor": access_floor, + "persistence_floor": persistence_floor, + "support_floor": support_floor, + "task_progress": retrieve_progress, + "utility_structured": utility, + } + + +class ResidualPlannerScorer(nn.Module): + def __init__(self, config: PlannerConfig) -> None: + super().__init__() + feature_dim = (config.action_dim * 2) + 11 + self.trunk = nn.Sequential( + nn.LayerNorm(feature_dim), + nn.Linear(feature_dim, config.hidden_dim), + nn.GELU(), + nn.Linear(config.hidden_dim, config.hidden_dim), + nn.GELU(), + ) + self.success_head = nn.Linear(config.hidden_dim, 1) + self.risk_head = nn.Linear(config.hidden_dim, 1) + self.residual_head = nn.Linear(config.hidden_dim, 1) + + def forward( + self, + candidate_chunks: Tensor, + structured: dict[str, Tensor], + proposal_logits: Tensor | None = None, + ) -> dict[str, Tensor]: + candidate_mean = candidate_chunks.mean(dim=2) + candidate_terminal = candidate_chunks[:, :, -1] + components = torch.stack( + [ + structured["belief_gain"], + structured["visibility_gain"], + structured["clearance"], + structured["occluder_contact_quality"], + structured["grasp_affordance"], + structured["persistence"], + structured["support_stability"], + structured["reocclusion_penalty"], + structured["disturbance_penalty"], + structured["access_quality"], + structured["task_progress"], + ], + dim=-1, + ) + features = torch.cat([candidate_mean, candidate_terminal, components], dim=-1) + hidden = self.trunk(features) + success_logits = self.success_head(hidden).squeeze(-1) + risk_values = torch.sigmoid(self.risk_head(hidden)).squeeze(-1) + residual = self.residual_head(hidden).squeeze(-1) + if proposal_logits is not None and proposal_logits.shape == residual.shape: + residual = residual + 0.25 * proposal_logits.sigmoid() + return { + "planner_hidden": hidden, + "success_logits": success_logits, + "risk_values": risk_values, + "utility_residual": residual, + } + + +class CascadePlanner(nn.Module): + def __init__(self, config: PlannerConfig) -> None: + super().__init__() + self.config = config + self.structured = StructuredElasticUtility(config) + self.residual = ResidualPlannerScorer(config) + + def shortlist( + self, + proposal_logits: Tensor | None, + candidate_chunks: Tensor, + proposal_mode_assignments: Tensor | None = None, + ) -> Tensor: + batch_size, num_candidates = candidate_chunks.shape[:2] + top_k = min(max(1, self.config.top_k), num_candidates) + if proposal_logits is None: + cheap_scores = -candidate_chunks.square().mean(dim=(-1, -2)) + else: + cheap_scores = proposal_logits + if proposal_mode_assignments is None: + return cheap_scores.topk(top_k, dim=-1).indices + if proposal_mode_assignments.ndim == 1: + proposal_mode_assignments = proposal_mode_assignments.unsqueeze(0).expand(batch_size, -1) + + shortlisted = [] + for batch_idx in range(batch_size): + scores = cheap_scores[batch_idx] + mode_ids = proposal_mode_assignments[batch_idx] + mode_best: list[tuple[float, int]] = [] + for mode_id in torch.unique(mode_ids): + mode_indices = torch.nonzero(mode_ids == mode_id, as_tuple=False).squeeze(-1) + best_local = mode_indices[scores[mode_indices].argmax()] + mode_best.append((float(scores[best_local].detach()), int(best_local))) + mode_best.sort(key=lambda item: item[0], reverse=True) + chosen = [index for _, index in mode_best[:top_k]] + if len(chosen) < top_k: + for candidate_idx in scores.argsort(descending=True).tolist(): + if candidate_idx not in chosen: + chosen.append(candidate_idx) + if len(chosen) >= top_k: + break + shortlisted.append(torch.as_tensor(chosen[:top_k], device=candidate_chunks.device, dtype=torch.long)) + return torch.stack(shortlisted, dim=0) + + def select_best( + self, + initial_state: dict[str, Tensor], + candidate_chunks: Tensor, + rollout_state: dict[str, Tensor], + proposal_logits: Tensor | None = None, + candidate_indices: Tensor | None = None, + proposal_mode_names: list[list[str]] | None = None, + ) -> dict[str, Tensor]: + structured = self.structured( + initial_state=initial_state, + rollout_state=rollout_state, + candidate_chunks=candidate_chunks, + ) + residual = self.residual( + candidate_chunks=candidate_chunks, + structured=structured, + proposal_logits=proposal_logits, + ) + utility_total = structured["utility_structured"] + self.config.residual_weight * residual["utility_residual"] + utility_total = utility_total + residual["success_logits"].sigmoid() - residual["risk_values"] + feasibility_penalty = torch.zeros_like(utility_total) + if proposal_mode_names is not None: + retrieve_like = torch.zeros_like(utility_total, dtype=torch.bool) + for batch_idx, names in enumerate(proposal_mode_names): + for candidate_idx, name in enumerate(names[: utility_total.shape[1]]): + retrieve_like[batch_idx, candidate_idx] = any( + token in name for token in ("retrieve", "insert_actor", "probe_inside") + ) + blocked = ( + (structured["access_floor"] < 0.15) + | (structured["persistence_floor"] < 0.15) + | (structured["support_floor"] < 0.25) + | (structured["reocclusion_worst"] > 0.6) + ) + feasibility_penalty = retrieve_like.to(dtype=utility_total.dtype) * blocked.to(dtype=utility_total.dtype) * 2.0 + utility_total = utility_total - feasibility_penalty + best_local = utility_total.argmax(dim=-1) + batch_indices = torch.arange(candidate_chunks.shape[0], device=candidate_chunks.device) + if candidate_indices is None: + best_indices = best_local + else: + best_indices = candidate_indices[batch_indices, best_local] + return { + **structured, + **residual, + "utility_total": utility_total, + "utility_scores": utility_total, + "feasibility_penalty": feasibility_penalty, + "best_indices": best_indices, + "best_chunk": candidate_chunks[batch_indices, best_local], + "ranking_diagnostics": { + "topk_indices": candidate_indices if candidate_indices is not None else best_local.unsqueeze(-1), + "best_local_indices": best_local, + }, + } + + +def _summary_scalar(state: dict[str, Tensor], key: str, fallback_keys: tuple[str, ...] = ()) -> Tensor: + for candidate in (key, *fallback_keys): + value = state.get(candidate) + if value is None: + continue + if value.ndim >= 5: + return value.mean(dim=tuple(range(value.ndim - 2, value.ndim))).mean(dim=-1) + if value.ndim == 4: + return value.mean(dim=(-1, -2)) + if value.ndim == 3: + return value + if value.ndim == 2: + return value + return value.unsqueeze(-1) + raise KeyError(f"Missing summary key {key} and fallbacks {fallback_keys}.") + + +def _optional_summary_scalar( + state: dict[str, Tensor], + key: str, + *, + reference: Tensor, + fallback_keys: tuple[str, ...] = (), +) -> Tensor: + try: + return _summary_scalar(state, key, fallback_keys) + except KeyError: + return torch.zeros_like(reference) + + +class ElasticFeasibilityGate(nn.Module): + def __init__(self, config: PlannerConfig) -> None: + super().__init__() + self.config = config + + def forward( + self, + *, + rollout_state: dict[str, Tensor], + proposal_mode_names: list[list[str]], + ) -> dict[str, Tensor | list[list[dict[str, float | bool | str]]]]: + access = _summary_scalar(rollout_state, "access_summary", ("access_quality",)) + persistence = _summary_scalar(rollout_state, "persistence_summary", ("persistence", "persistence_horizon")) + support = _summary_scalar(rollout_state, "support_summary", ("support_stability",)) + reocclusion = _summary_scalar(rollout_state, "reocclusion_summary", ("reocclusion_penalty",)) + disturbance = _summary_scalar(rollout_state, "disturbance_summary", ("disturbance_penalty",)) + access_floor = access.amin(dim=-1) + persistence_floor = persistence.amin(dim=-1) + support_floor = support.amin(dim=-1) + reocclusion_worst = reocclusion.amax(dim=-1) + disturbance_worst = disturbance.amax(dim=-1) + + blocked = ( + (access_floor < self.config.retrieve_access_threshold) + | (persistence_floor < self.config.retrieve_persistence_threshold) + | (support_floor < self.config.retrieve_support_threshold) + | (reocclusion_worst > self.config.retrieve_reocclusion_threshold) + ) + penalties = blocked.to(dtype=access.dtype) * 2.0 + allowed_mask = torch.ones_like(access_floor, dtype=torch.bool) + reject_diagnostics: list[list[dict[str, float | bool | str]]] = [] + for batch_idx, names in enumerate(proposal_mode_names): + sample_records: list[dict[str, float | bool | str]] = [] + for candidate_idx, name in enumerate(names[: access_floor.shape[1]]): + retrieve_like = any(token in name for token in ("retrieve", "insert_actor", "probe_inside")) + candidate_blocked = bool(retrieve_like and blocked[batch_idx, candidate_idx]) + if candidate_blocked: + allowed_mask[batch_idx, candidate_idx] = False + sample_records.append( + { + "mode_name": name, + "retrieve_like": retrieve_like, + "blocked": candidate_blocked, + "access_floor": float(access_floor[batch_idx, candidate_idx].detach()), + "persistence_floor": float(persistence_floor[batch_idx, candidate_idx].detach()), + "support_floor": float(support_floor[batch_idx, candidate_idx].detach()), + "reocclusion_worst": float(reocclusion_worst[batch_idx, candidate_idx].detach()), + "disturbance_worst": float(disturbance_worst[batch_idx, candidate_idx].detach()), + } + ) + reject_diagnostics.append(sample_records) + + confidence = torch.sigmoid( + 2.0 * access.mean(dim=-1) + + 1.5 * persistence.mean(dim=-1) + + 1.5 * support.mean(dim=-1) + - 1.5 * reocclusion.mean(dim=-1) + - disturbance.mean(dim=-1) + ) + return { + "allowed_mask": allowed_mask, + "penalties": penalties, + "blocked_mask": blocked, + "adapter_confidence": confidence, + "gate_access_floor": access_floor, + "gate_persistence_floor": persistence_floor, + "gate_support_floor": support_floor, + "gate_reocclusion_worst": reocclusion_worst, + "reject_diagnostics": reject_diagnostics, + } + + +class ResidualActionReranker(nn.Module): + def __init__(self, config: PlannerConfig) -> None: + super().__init__() + feature_dim = (config.action_dim * 2) + 8 + self.network = nn.Sequential( + nn.LayerNorm(feature_dim), + nn.Linear(feature_dim, config.hidden_dim), + nn.GELU(), + nn.Linear(config.hidden_dim, config.hidden_dim), + nn.GELU(), + ) + self.score_head = nn.Linear(config.hidden_dim, 1) + self.success_head = nn.Linear(config.hidden_dim, 1) + self.risk_head = nn.Linear(config.hidden_dim, 1) + + def forward( + self, + *, + candidate_chunks: Tensor, + rollout_state: dict[str, Tensor], + proposal_logits: Tensor | None, + ) -> dict[str, Tensor]: + candidate_mean = candidate_chunks.mean(dim=2) + candidate_terminal = candidate_chunks[:, :, -1] + visibility = _summary_scalar(rollout_state, "visibility_summary", ("visibility_gain",)) + access = _summary_scalar(rollout_state, "access_summary", ("access_quality",)) + persistence = _summary_scalar(rollout_state, "persistence_summary", ("persistence", "persistence_horizon")) + support = _summary_scalar(rollout_state, "support_summary", ("support_stability",)) + reocclusion = _summary_scalar(rollout_state, "reocclusion_summary", ("reocclusion_penalty",)) + disturbance = _summary_scalar(rollout_state, "disturbance_summary", ("disturbance_penalty",)) + fold_preservation = _optional_summary_scalar( + rollout_state, + "fold_preservation_summary", + reference=visibility, + fallback_keys=("fold_preservation",), + ) + lift_risk = _optional_summary_scalar( + rollout_state, + "lift_too_much_risk_summary", + reference=visibility, + fallback_keys=("lift_too_much_risk",), + ) + features = torch.cat( + [ + candidate_mean, + candidate_terminal, + visibility.mean(dim=-1, keepdim=True), + access.mean(dim=-1, keepdim=True), + persistence.mean(dim=-1, keepdim=True), + support.mean(dim=-1, keepdim=True), + reocclusion.mean(dim=-1, keepdim=True), + disturbance.mean(dim=-1, keepdim=True), + fold_preservation.mean(dim=-1, keepdim=True), + lift_risk.mean(dim=-1, keepdim=True), + ], + dim=-1, + ) + hidden = self.network(features) + residual = self.score_head(hidden).squeeze(-1) + success = self.success_head(hidden).squeeze(-1) + risk = torch.sigmoid(self.risk_head(hidden).squeeze(-1)) + if proposal_logits is not None and proposal_logits.shape == residual.shape: + residual = residual + 0.25 * proposal_logits.sigmoid() + return { + "residual_scores": residual, + "planner_success_logits": success, + "planner_risk_values": risk, + } + + +class AdapterPlanner(nn.Module): + def __init__(self, config: PlannerConfig) -> None: + super().__init__() + self.config = config + self.gate = ElasticFeasibilityGate(config) + self.reranker = ResidualActionReranker(config) + + def select_best( + self, + *, + candidate_chunks: Tensor, + rollout_state: dict[str, Tensor], + proposal_mode_names: list[list[str]], + proposal_logits: Tensor | None = None, + planning_mode: str = "adapter_active", + ) -> dict[str, Tensor | list[list[dict[str, float | bool | str]]]]: + batch_size = candidate_chunks.shape[0] + batch_indices = torch.arange(batch_size, device=candidate_chunks.device) + if planning_mode in {"identity", "trunk_only", "adapter_noop"}: + zero_scores = candidate_chunks.new_zeros((batch_size, candidate_chunks.shape[1])) + return { + "best_indices": torch.zeros(batch_size, dtype=torch.long, device=candidate_chunks.device), + "best_chunk": candidate_chunks[:, 0], + "utility_scores": zero_scores, + "utility_total": zero_scores, + "planner_success_logits": zero_scores, + "planner_risk_values": zero_scores, + "adapter_confidence": candidate_chunks.new_ones((batch_size, candidate_chunks.shape[1])), + "reject_diagnostics": [[] for _ in range(batch_size)], + "planning_mode": planning_mode, + } + + gate_outputs = self.gate(rollout_state=rollout_state, proposal_mode_names=proposal_mode_names) + reranker = self.reranker( + candidate_chunks=candidate_chunks, + rollout_state=rollout_state, + proposal_logits=proposal_logits, + ) + utility = reranker["residual_scores"] + reranker["planner_success_logits"].sigmoid() - reranker["planner_risk_values"] + visibility = _summary_scalar(rollout_state, "visibility_summary", ("visibility_gain",)).mean(dim=-1) + access = _summary_scalar(rollout_state, "access_summary", ("access_quality",)).mean(dim=-1) + persistence = _summary_scalar(rollout_state, "persistence_summary", ("persistence", "persistence_horizon")).mean(dim=-1) + support = _summary_scalar(rollout_state, "support_summary", ("support_stability",)).mean(dim=-1) + reocclusion = _summary_scalar(rollout_state, "reocclusion_summary", ("reocclusion_penalty",)).mean(dim=-1) + disturbance = _summary_scalar(rollout_state, "disturbance_summary", ("disturbance_penalty",)).mean(dim=-1) + fold_preservation = _optional_summary_scalar( + rollout_state, + "fold_preservation_summary", + reference=_summary_scalar(rollout_state, "access_summary", ("access_quality",)), + fallback_keys=("fold_preservation",), + ).mean(dim=-1) + mouth_aperture = _optional_summary_scalar( + rollout_state, + "mouth_aperture_summary", + reference=_summary_scalar(rollout_state, "access_summary", ("access_quality",)), + fallback_keys=("mouth_aperture",), + ).mean(dim=-1) + layer_separation = _optional_summary_scalar( + rollout_state, + "layer_separation_summary", + reference=_summary_scalar(rollout_state, "access_summary", ("access_quality",)), + fallback_keys=("layer_separation_quality",), + ).mean(dim=-1) + lift_risk = _optional_summary_scalar( + rollout_state, + "lift_too_much_risk_summary", + reference=_summary_scalar(rollout_state, "access_summary", ("access_quality",)), + fallback_keys=("lift_too_much_risk",), + ).mean(dim=-1) + mode_bias = utility.new_zeros(utility.shape) + stage_penalty = utility.new_zeros(utility.shape) + unresolved_reveal = (1.0 - visibility) + (1.0 - access) + stabilized_reveal = 0.5 * (access + persistence + support) + # Use optimistic scene readiness summaries for stage switching. + # Candidate-level safety is still enforced by the retrieve gate below, so + # we should not let one poor candidate keep the entire scene stuck in + # "reveal forever" mode when another candidate already makes retrieve feasible. + batch_visibility = visibility.amax(dim=1) + batch_access = access.amax(dim=1) + batch_persistence = persistence.amax(dim=1) + batch_support = support.amax(dim=1) + batch_reocclusion = reocclusion.amin(dim=1) + batch_disturbance = disturbance.amin(dim=1) + batch_fold = fold_preservation.amax(dim=1) + batch_mouth = mouth_aperture.amax(dim=1) + batch_layer = layer_separation.amax(dim=1) + batch_lift = lift_risk.amin(dim=1) + batch_reveal_readiness = torch.maximum(batch_visibility, batch_access) + for batch_idx, names in enumerate(proposal_mode_names): + is_bag = any(any(token in name for token in ("mouth", "rim", "probe_inside")) for name in names) + is_cloth = any(any(token in name for token in ("fold", "lift", "layer")) for name in names) + can_retrieve = ( + batch_access[batch_idx] >= self.config.retrieve_stage_access_threshold + and batch_reveal_readiness[batch_idx] >= self.config.retrieve_stage_reveal_threshold + and batch_persistence[batch_idx] >= self.config.retrieve_stage_persistence_threshold + and batch_support[batch_idx] >= self.config.retrieve_stage_support_threshold + and batch_reocclusion[batch_idx] <= self.config.retrieve_reocclusion_threshold + ) + if is_bag: + can_retrieve = bool( + can_retrieve + and batch_mouth[batch_idx] >= 0.30 + and batch_persistence[batch_idx] >= 0.55 + ) + elif is_cloth: + can_retrieve = bool( + can_retrieve + and batch_layer[batch_idx] >= 0.18 + and batch_fold[batch_idx] >= 0.60 + and batch_lift[batch_idx] <= 0.30 + and batch_support[batch_idx] >= 0.70 + ) + can_insert = ( + batch_access[batch_idx] >= self.config.insert_stage_access_threshold + and batch_visibility[batch_idx] >= self.config.insert_stage_visibility_threshold + and batch_support[batch_idx] >= self.config.insert_stage_support_threshold + and batch_reocclusion[batch_idx] <= 0.65 + ) + maintain_ready = ( + batch_access[batch_idx] >= self.config.occlusion_maintain_gap_min_access + and batch_visibility[batch_idx] >= self.config.occlusion_maintain_gap_min_visibility + ) + if can_retrieve: + preferred_tokens = ("retrieve",) + elif can_insert: + preferred_tokens = ("probe_inside", "insert_actor") if is_bag else ("insert_actor",) + elif is_bag: + if batch_access[batch_idx] < 0.15 or batch_visibility[batch_idx] < 0.20: + preferred_tokens = ("widen_mouth", "maintain_mouth") + else: + preferred_tokens = ("maintain_mouth", "widen_mouth") + elif is_cloth: + if batch_access[batch_idx] < 0.15 or batch_visibility[batch_idx] < 0.20: + preferred_tokens = ("lift_edge", "separate_layer") + elif batch_lift[batch_idx] > 0.15 or batch_disturbance[batch_idx] > 0.25: + preferred_tokens = ("stabilize_fold", "maintain_lift") + else: + preferred_tokens = ("maintain_lift", "stabilize_fold") + else: + if not maintain_ready: + preferred_tokens = ("widen_gap", "pin_canopy", "sweep_left", "sweep_right") + elif batch_visibility[batch_idx] < 0.20 or batch_access[batch_idx] < 0.25: + preferred_tokens = ("widen_gap", "pin_canopy") + elif batch_disturbance[batch_idx] > 0.25 or batch_reocclusion[batch_idx] > 0.40: + preferred_tokens = ("maintain_gap", "pin_canopy") + else: + preferred_tokens = ("pin_canopy", "widen_gap") + for candidate_idx, name in enumerate(names[: utility.shape[1]]): + if name == "base_action": + continue + if any(token in name for token in ("retrieve",)): + bonus = ( + 0.85 * visibility[batch_idx, candidate_idx] + + 0.85 * access[batch_idx, candidate_idx] + + 0.65 * persistence[batch_idx, candidate_idx] + + 0.50 * support[batch_idx, candidate_idx] + - 0.60 * reocclusion[batch_idx, candidate_idx] + - 0.25 * disturbance[batch_idx, candidate_idx] + ) + elif any(token in name for token in ("insert_actor", "probe_inside")): + bonus = ( + 0.70 * visibility[batch_idx, candidate_idx] + + 0.70 * access[batch_idx, candidate_idx] + + 0.35 * persistence[batch_idx, candidate_idx] + - 0.35 * reocclusion[batch_idx, candidate_idx] + - 0.15 * disturbance[batch_idx, candidate_idx] + ) + elif any(token in name for token in ("maintain", "stabilize", "pin_canopy")): + bonus = ( + 0.85 * stabilized_reveal[batch_idx, candidate_idx] + + 0.25 * visibility[batch_idx, candidate_idx] + - 0.20 * reocclusion[batch_idx, candidate_idx] + - 0.10 * disturbance[batch_idx, candidate_idx] + ) + else: + bonus = ( + 0.95 * unresolved_reveal[batch_idx, candidate_idx] + + 0.20 * (1.0 - persistence[batch_idx, candidate_idx]) + - 0.10 * disturbance[batch_idx, candidate_idx] + ) + if any(token in name for token in ("fold", "lift", "layer")): + bonus = bonus + 0.35 * fold_preservation[batch_idx, candidate_idx] - 0.35 * lift_risk[batch_idx, candidate_idx] + if any(token in name for token in preferred_tokens): + bonus = bonus + self.config.mode_preference_bonus + elif "retrieve" in name and not can_retrieve: + bonus = bonus - self.config.premature_retrieve_penalty + stage_penalty[batch_idx, candidate_idx] = ( + stage_penalty[batch_idx, candidate_idx] + self.config.premature_retrieve_penalty + ) + elif is_cloth and any(token in name for token in ("stabilize", "maintain")) and any( + token in preferred_tokens for token in ("lift_edge", "separate_layer") + ): + bonus = bonus - 1.0 + stage_penalty[batch_idx, candidate_idx] = stage_penalty[batch_idx, candidate_idx] + 1.0 + elif (not is_bag and not is_cloth) and any(token in name for token in ("sweep_left", "sweep_right")) and any( + token in preferred_tokens for token in ("pin_canopy", "widen_gap", "maintain_gap") + ): + bonus = bonus - self.config.premature_occlusion_sweep_penalty + elif any(token in name for token in ("probe_inside", "insert_actor", "retrieve")) and not can_insert: + bonus = bonus - self.config.premature_insert_penalty + stage_penalty[batch_idx, candidate_idx] = ( + stage_penalty[batch_idx, candidate_idx] + self.config.premature_insert_penalty + ) + if ( + (not is_bag and not is_cloth) + and "maintain_gap" in name + and not maintain_ready + and self.config.premature_maintain_penalty > 0.0 + ): + bonus = bonus - self.config.premature_maintain_penalty + stage_penalty[batch_idx, candidate_idx] = ( + stage_penalty[batch_idx, candidate_idx] + self.config.premature_maintain_penalty + ) + if is_bag and (batch_mouth[batch_idx] < 0.18 or batch_access[batch_idx] < 0.15) and "widen_mouth" in name: + stage_penalty[batch_idx, candidate_idx] = stage_penalty[batch_idx, candidate_idx] + 1.5 + if is_cloth and (batch_layer[batch_idx] < 0.12 or batch_visibility[batch_idx] < 0.05) and any( + token in name for token in ("lift_edge", "separate_layer") + ): + stage_penalty[batch_idx, candidate_idx] = stage_penalty[batch_idx, candidate_idx] + 1.5 + mode_bias[batch_idx, candidate_idx] = bonus + utility = utility + mode_bias + utility = utility + 0.5 * fold_preservation - 0.5 * lift_risk + utility = utility - stage_penalty + utility = utility - gate_outputs["penalties"] + allowed_mask = gate_outputs["allowed_mask"] + assert isinstance(allowed_mask, Tensor) + utility = utility.masked_fill(~allowed_mask, -1e6) + best_indices = utility.argmax(dim=-1) + best_chunk = candidate_chunks[batch_indices, best_indices] + return { + "best_indices": best_indices, + "best_chunk": best_chunk, + "utility_scores": utility, + "utility_total": utility, + "planner_success_logits": reranker["planner_success_logits"], + "planner_risk_values": reranker["planner_risk_values"], + "adapter_confidence": gate_outputs["adapter_confidence"], + "allowed_mask": gate_outputs["allowed_mask"], + "reject_diagnostics": gate_outputs["reject_diagnostics"], + "planning_mode": planning_mode, + } diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pytorch3d/__pycache__/__init__.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pytorch3d/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09bc4ec43fb4c4c1a89778b44eedfc3e9a7aa380 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pytorch3d/__pycache__/__init__.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pytorch3d/__pycache__/__init__.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pytorch3d/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab697d93d33857caf35bb00a9e20ee3c331cb310 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pytorch3d/__pycache__/__init__.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pytorch3d/__pycache__/transforms.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pytorch3d/__pycache__/transforms.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f088e4cff84d5cc7693164c7036061c7391982d Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pytorch3d/__pycache__/transforms.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pytorch3d/__pycache__/transforms.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pytorch3d/__pycache__/transforms.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..584de08b083a566253bb9f190e9103a6a917a849 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pytorch3d/__pycache__/transforms.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__pycache__/base.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__pycache__/base.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..38c29d85f991b0468431e3ad690a015856208f0f Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__pycache__/base.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__pycache__/dataset.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__pycache__/dataset.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..19d7bda65195405efe0d2127cde7a7e90491c3cb Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__pycache__/dataset.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__pycache__/procedural_envs.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__pycache__/procedural_envs.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cf4cd6c33b8f2a354f3add8dec7c865a65ccd22c Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__pycache__/procedural_envs.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__pycache__/proxy_specs.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__pycache__/proxy_specs.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..988ef585750128f9c0cf0716890f9214c4af1c29 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__pycache__/proxy_specs.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/__init__.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b2041e1c36002841da5bb50848a92ec14dbaf46c Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/__init__.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/__init__.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e4b157531afdca0384f53c9b89af101ca5c5afde Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/__init__.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/build_aligned_proposal_dataset.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/build_aligned_proposal_dataset.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7955f87340724e9fde6095426d19c70951e327c2 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/build_aligned_proposal_dataset.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/checkpoint_compat.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/checkpoint_compat.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cb723229faaa64bb23018e168fb0a1e610f44247 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/checkpoint_compat.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/checkpoint_compat.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/checkpoint_compat.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..299f630646ac6a9979ea22c7a56bc0df199dc84e Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/checkpoint_compat.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/dataset_build_utils.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/dataset_build_utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..174d4806a662cf9924ddb794fa892c8b451664bd Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/dataset_build_utils.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/losses.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/losses.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dcf475d8e1ef1ba892c64cccd5c176d63c6e4294 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/losses.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/losses.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/losses.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6fe6dd29fd50e9bf84f35fc37a4719e4e875118f Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/losses.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/run_experiment.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/run_experiment.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f0e7261d141c0cbe47fd38e0c3763d115e13f06 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/run_experiment.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/trainer.cpython-310.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/trainer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e9fb61f10290ea8c37cccb7f6f85def744d6477 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/trainer.cpython-310.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/trainer.cpython-311.pyc b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/trainer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..007cccdda81bb43c1f31eaedbc43dcb1a39467eb Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/__pycache__/trainer.cpython-311.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/losses.py b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..35118cf5328e53d19c272be3e61c7dfc7e6d3d01 --- /dev/null +++ b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/losses.py @@ -0,0 +1,855 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import torch +import torch.nn.functional as F +from torch import Tensor + +from models.reveal_head import TASK_METRIC_NAMES, task_metric_valid_mask + + +@dataclass +class LossWeights: + action: float = 1.0 + phase: float = 0.05 + arm_role: float = 0.2 + support_mode: float = 0.1 + corridor: float = 0.1 + persistence: float = 0.05 + disturbance: float = 0.05 + world_model: float = 0.1 + belief: float = 0.05 + visibility: float = 0.05 + clearance: float = 0.05 + support_stability: float = 0.05 + reocclusion: float = 0.05 + occluder_contact: float = 0.05 + grasp_affordance: float = 0.05 + planner_success: float = 0.1 + planner_risk: float = 0.05 + planner_ranking: float = 0.05 + proposal_reconstruction: float = 0.1 + proposal_success: float = 0.05 + proposal_ranking: float = 0.05 + proposal_mode: float = 0.05 + proposal_mode_cloth_only: bool = False + proposal_mode_task_filter: list[str] | None = None + proposal_diversity: float = 0.05 + role_swap_consistency: float = 0.05 + task_metrics: float = 0.05 + transition: float = 0.0 + gate: float = 0.0 + distillation: float = 0.0 + calibration: float = 0.0 + + +def chunk_bc_loss(pred_actions: Tensor, target_actions: Tensor, mask: Tensor | None = None) -> Tensor: + loss = F.smooth_l1_loss(pred_actions, target_actions, reduction="none") + if mask is not None: + loss = loss * mask.unsqueeze(-1) + return loss.sum() / mask.sum().clamp_min(1.0) + return loss.mean() + + +def _command_probability(command: Tensor) -> Tensor: + return (torch.tanh(command) + 1.0) * 0.5 + + +def infer_phase_targets_from_actions(action_chunk: Tensor) -> Tensor: + open_cmd = action_chunk[..., 0] + actor_reach = _command_probability(action_chunk[..., 8]) + retrieve_cmd = _command_probability(action_chunk[..., 13]) + + retrieve = retrieve_cmd >= 0.55 + recover = open_cmd <= -0.10 + reveal = open_cmd > 0.35 + hold = (~retrieve) & (~recover) & (~reveal) & (actor_reach >= 0.55) + + phase_target = torch.zeros_like(open_cmd, dtype=torch.long) + phase_target = torch.where(reveal, torch.ones_like(phase_target), phase_target) + phase_target = torch.where(hold, torch.full_like(phase_target, 2), phase_target) + phase_target = torch.where(retrieve, torch.full_like(phase_target, 3), phase_target) + phase_target = torch.where(recover, torch.full_like(phase_target, 4), phase_target) + return phase_target + + +def _role_targets_like(arm_role_logits: Tensor) -> Tensor: + role_target = torch.as_tensor([1, 2], device=arm_role_logits.device, dtype=torch.long) + expand_shape = [1] * (arm_role_logits.ndim - 2) + [2] + return role_target.view(*expand_shape).expand(*arm_role_logits.shape[:-1]) + + +def swap_arm_actions(action_chunk: Tensor) -> Tensor: + midpoint = action_chunk.shape[-1] // 2 + return torch.cat([action_chunk[..., midpoint:], action_chunk[..., :midpoint]], dim=-1) + + +def permutation_invariant_role_loss(arm_role_logits: Tensor) -> Tensor: + role_target = _role_targets_like(arm_role_logits) + swapped_target = role_target.flip(-1) + flat_logits = arm_role_logits.reshape(-1, arm_role_logits.shape[-1]) + loss_a = F.cross_entropy(flat_logits, role_target.reshape(-1), reduction="none").view(*role_target.shape) + loss_b = F.cross_entropy(flat_logits, swapped_target.reshape(-1), reduction="none").view(*role_target.shape) + return torch.minimum(loss_a.sum(dim=-1), loss_b.sum(dim=-1)).mean() + + +def role_swap_consistency_loss(pred: Tensor, target: Tensor) -> Tensor: + return F.smooth_l1_loss(pred, target) + + +def proposal_diversity_loss(proposal_candidates: Tensor, minimum_distance: float = 0.05) -> Tensor: + if proposal_candidates.ndim != 4 or proposal_candidates.shape[1] <= 1: + return proposal_candidates.new_tensor(0.0) + flat = proposal_candidates.flatten(start_dim=2) + distances = torch.cdist(flat, flat, p=1) + eye = torch.eye(distances.shape[-1], device=distances.device, dtype=torch.bool).unsqueeze(0) + valid = (~eye).expand(distances.shape[0], -1, -1) + if not valid.any(): + return proposal_candidates.new_tensor(0.0) + return torch.relu(minimum_distance - distances[valid]).mean() + + +def proposal_set_reconstruction_loss(proposal_candidates: Tensor, target_candidates: Tensor) -> Tensor: + if proposal_candidates.ndim != 4 or target_candidates.ndim != 4: + return proposal_candidates.new_tensor(0.0) + if proposal_candidates.shape[1] == 0 or target_candidates.shape[1] == 0: + return proposal_candidates.new_tensor(0.0) + flat_proposals = proposal_candidates.flatten(start_dim=2) + flat_targets = target_candidates.flatten(start_dim=2).to(dtype=flat_proposals.dtype) + distances = torch.cdist(flat_proposals, flat_targets, p=1) / float(max(1, flat_proposals.shape[-1])) + return 0.5 * (distances.min(dim=-1).values.mean() + distances.min(dim=-2).values.mean()) + + +def _proposal_target_batch(batch: dict[str, Tensor]) -> tuple[Tensor | None, Tensor | None, Tensor | None, Tensor | None]: + proposal_chunks = batch.get("proposal_target_action_chunks") + if proposal_chunks is None: + proposal_chunks = batch.get("candidate_action_chunks") + proposal_success = batch.get("proposal_target_retrieval_success") + if proposal_success is None: + proposal_success = batch.get("candidate_retrieval_success") + proposal_risk = batch.get("proposal_target_risk") + if proposal_risk is None: + proposal_risk = batch.get("candidate_risk") + proposal_utility = batch.get("proposal_target_utility") + if proposal_utility is None: + proposal_utility = batch.get("candidate_utility") + return proposal_chunks, proposal_success, proposal_risk, proposal_utility + + +def _proposal_mode_targets( + proposal_mode_assignments: Tensor, + proposal_success: Tensor, + proposal_utility: Tensor, + num_modes: int, +) -> tuple[Tensor, Tensor]: + batch_size, candidate_count = proposal_success.shape + mode_assignments = proposal_mode_assignments.view(-1)[:candidate_count].long().to(device=proposal_success.device) + mode_success = torch.zeros(batch_size, num_modes, dtype=proposal_success.dtype, device=proposal_success.device) + mode_utility = torch.full( + (batch_size, num_modes), + fill_value=-1e6, + dtype=proposal_utility.dtype, + device=proposal_utility.device, + ) + valid_assignment_mask = mode_assignments >= 0 + for mode_idx in range(num_modes): + mask = mode_assignments == mode_idx + if not torch.any(mask): + continue + mode_success[:, mode_idx] = proposal_success[:, mask].amax(dim=1) + mode_utility[:, mode_idx] = proposal_utility[:, mask].amax(dim=1) + no_mode = torch.logical_or( + ~valid_assignment_mask.any(), + torch.isclose(mode_success.sum(dim=1), mode_success.new_zeros(batch_size)), + ) + if torch.any(no_mode): + mode_utility[no_mode] = 0.0 + return mode_success, mode_utility + + +def _proposal_reconstruction_targets( + batch: dict[str, Tensor], + proposal_count: int, + fallback_targets: Tensor | None, +) -> Tensor | None: + task_name = batch.get("task_name") + if isinstance(task_name, str) and task_name == "bag" and fallback_targets is not None: + return fallback_targets + teacher_candidates = batch.get("candidate_action_chunks") + teacher_utility = batch.get("candidate_utility") + if teacher_candidates is None: + return fallback_targets + if teacher_utility is None or teacher_candidates.shape[1] <= 1: + return teacher_candidates + top_k = min(teacher_candidates.shape[1], max(1, proposal_count // 2)) + top_indices = teacher_utility.topk(k=top_k, dim=1).indices + gather_index = top_indices[..., None, None].expand( + -1, + -1, + teacher_candidates.shape[2], + teacher_candidates.shape[3], + ) + return teacher_candidates.gather(1, gather_index) + + +def _task_name_mask(batch: dict[str, Tensor | list[str] | tuple[str, ...] | str], task_name: str, batch_size: int, device: torch.device) -> Tensor | None: + return _task_name_mask_for_values(batch, [task_name], batch_size=batch_size, device=device) + + +def _task_name_mask_for_values( + batch: dict[str, Tensor | list[str] | tuple[str, ...] | str], + task_names: list[str] | tuple[str, ...], + batch_size: int, + device: torch.device, +) -> Tensor | None: + target_names = {str(name) for name in task_names} + task_names = batch.get("task_name") + if isinstance(task_names, str): + return torch.full((batch_size,), task_names in target_names, dtype=torch.bool, device=device) + if isinstance(task_names, (list, tuple)): + if len(task_names) < batch_size: + return None + values = [str(task_names[idx]) in target_names for idx in range(batch_size)] + return torch.as_tensor(values, dtype=torch.bool, device=device) + return None + + +def _resize_like(target: Tensor, prediction: Tensor) -> Tensor: + if target.shape == prediction.shape: + return target + if target.ndim == prediction.ndim == 4: + return F.interpolate(target.float(), size=prediction.shape[-2:], mode="bilinear", align_corners=False) + if target.ndim == 3 and prediction.ndim == 4: + return F.interpolate(target.unsqueeze(1).float(), size=prediction.shape[-2:], mode="bilinear", align_corners=False) + return target + + +def reveal_state_loss(pred: dict[str, Tensor], target: dict[str, Tensor], weights: LossWeights) -> dict[str, Tensor]: + losses = {} + if "phase_logits" in pred: + if "phase" in target: + phase_target = target["phase"].long() + else: + action_chunk = target.get("action_chunk") + if action_chunk is not None: + phase_target = infer_phase_targets_from_actions(action_chunk[:, 0]) + else: + phase_map = torch.as_tensor([2, 3, 0], device=target["support_mode"].device, dtype=torch.long) + phase_target = phase_map[target["support_mode"].long()] + losses["phase"] = F.cross_entropy(pred["phase_logits"], phase_target) + else: + losses["phase"] = pred["support_mode_logits"].new_tensor(0.0) + if "arm_role_logits" in pred: + role_ce = permutation_invariant_role_loss(pred["arm_role_logits"]) + role_probs = pred["arm_role_logits"].softmax(dim=-1) + role_gap = torch.mean(torch.abs(role_probs[:, 0] - role_probs[:, 1]), dim=-1) + role_separation = torch.relu(0.25 - role_gap).mean() + losses["arm_role"] = role_ce + 0.5 * role_separation + else: + losses["arm_role"] = pred["support_mode_logits"].new_tensor(0.0) + support_target = target["support_mode"].long() + losses["support_mode"] = F.cross_entropy(pred["support_mode_logits"], support_target) + losses["corridor"] = F.binary_cross_entropy_with_logits( + pred["corridor_logits"], + target["corridor_feasible"].float(), + ) + losses["persistence"] = F.mse_loss(pred["persistence_horizon"], target["persistence_horizon"].float()) + losses["disturbance"] = F.mse_loss(pred["disturbance_cost"], target["disturbance_cost"].float()) + if "belief_map" in pred and "belief_map" in target: + losses["belief"] = F.binary_cross_entropy_with_logits(pred["belief_map"], _resize_like(target["belief_map"].float(), pred["belief_map"])) + else: + losses["belief"] = pred["support_mode_logits"].new_tensor(0.0) + if "visibility_field" in pred and "visibility_map" in target: + losses["visibility"] = F.binary_cross_entropy_with_logits( + pred["visibility_field"], + _resize_like(target["visibility_map"].float(), pred["visibility_field"]), + ) + else: + losses["visibility"] = pred["support_mode_logits"].new_tensor(0.0) + if "clearance_field" in pred and "clearance_map" in target: + losses["clearance"] = F.binary_cross_entropy_with_logits( + pred["clearance_field"], + _resize_like(target["clearance_map"].float(), pred["clearance_field"]), + ) + else: + losses["clearance"] = pred["support_mode_logits"].new_tensor(0.0) + if "support_stability_field" in pred and "support_stability_map" in target: + losses["support_stability"] = F.binary_cross_entropy_with_logits( + pred["support_stability_field"], + _resize_like(target["support_stability_map"].float(), pred["support_stability_field"]), + ) + else: + losses["support_stability"] = pred["support_mode_logits"].new_tensor(0.0) + if "occluder_contact_field" in pred and "occluder_contact_map" in target: + losses["occluder_contact"] = F.binary_cross_entropy_with_logits( + pred["occluder_contact_field"], + _resize_like(target["occluder_contact_map"].float(), pred["occluder_contact_field"]), + ) + else: + losses["occluder_contact"] = pred["support_mode_logits"].new_tensor(0.0) + if "grasp_affordance_field" in pred and "grasp_affordance_map" in target: + losses["grasp_affordance"] = F.binary_cross_entropy_with_logits( + pred["grasp_affordance_field"], + _resize_like(target["grasp_affordance_map"].float(), pred["grasp_affordance_field"]), + ) + else: + losses["grasp_affordance"] = pred["support_mode_logits"].new_tensor(0.0) + if "reocclusion_logit" in pred and "corridor_feasible" in target: + target_reocclusion = target.get("reocclusion_target") + if target_reocclusion is None: + target_reocclusion = 1.0 - target["corridor_feasible"].float().amax(dim=-1) + if target_reocclusion.ndim < pred["reocclusion_logit"].ndim: + target_reocclusion = target_reocclusion.unsqueeze(-1).expand_as(pred["reocclusion_logit"]) + losses["reocclusion"] = F.binary_cross_entropy_with_logits(pred["reocclusion_logit"], target_reocclusion) + else: + losses["reocclusion"] = pred["support_mode_logits"].new_tensor(0.0) + if "persistence_uncertainty" in pred: + losses["uncertainty"] = pred["persistence_uncertainty"].mean() + else: + losses["uncertainty"] = pred["support_mode_logits"].new_tensor(0.0) + task_metric_pairs = tuple(TASK_METRIC_NAMES) + metric_mask = target.get("task_metric_mask") + if metric_mask is None: + target_task_names = target.get("task_name") + if isinstance(target_task_names, (list, tuple, str)): + metric_mask = task_metric_valid_mask( + [str(name) for name in target_task_names] if not isinstance(target_task_names, str) else [target_task_names] * pred["support_mode_logits"].shape[0], + device=pred["support_mode_logits"].device, + batch_size=pred["support_mode_logits"].shape[0], + ) + task_losses = [] + for metric_idx, key in enumerate(task_metric_pairs): + if key not in pred or key not in target: + continue + if metric_mask is None: + task_losses.append(F.mse_loss(pred[key].float(), target[key].float())) + continue + per_sample = F.mse_loss(pred[key].float(), target[key].float(), reduction="none") + while per_sample.ndim > 1: + per_sample = per_sample.mean(dim=-1) + valid = metric_mask[:, metric_idx].to(dtype=per_sample.dtype) + if valid.sum() <= 0: + continue + task_losses.append((per_sample * valid).sum() / valid.sum().clamp_min(1.0)) + losses["task_metrics"] = ( + torch.stack(task_losses).mean() + if task_losses + else pred["support_mode_logits"].new_tensor(0.0) + ) + if "state_confidence_logit" in pred and "state_confidence_target" in target: + losses["calibration"] = F.binary_cross_entropy_with_logits( + pred["state_confidence_logit"], + target["state_confidence_target"].float(), + ) + else: + losses["calibration"] = pred["support_mode_logits"].new_tensor(0.0) + return losses + + +def world_model_rollout_consistency_loss(pred_rollout: dict[str, Tensor], target_rollout: dict[str, Tensor]) -> Tensor: + has_candidates = pred_rollout["support_mode_logits"].ndim == 4 + candidate_dim = pred_rollout["support_mode_logits"].shape[1] if has_candidates else 1 + + def _expand_target(value: Tensor) -> Tensor: + if not has_candidates: + return value + if value.ndim >= 2 and value.shape[1] == candidate_dim: + return value + return value.unsqueeze(1).expand(-1, candidate_dim, *value.shape[1:]) + + def _resize_rollout_target_like(target_value: Tensor, pred_value: Tensor) -> Tensor: + if target_value.shape == pred_value.shape: + return target_value + if pred_value.ndim == 6: + flat_target = target_value.reshape(-1, target_value.shape[-3], target_value.shape[-2], target_value.shape[-1]) + flat_pred = pred_value.reshape(-1, pred_value.shape[-3], pred_value.shape[-2], pred_value.shape[-1]) + resized = _resize_like(flat_target.float(), flat_pred) + return resized.reshape(*pred_value.shape[:-3], pred_value.shape[-3], pred_value.shape[-2], pred_value.shape[-1]) + if pred_value.ndim == 5: + flat_target = target_value.reshape(-1, target_value.shape[-2], target_value.shape[-1]) + if flat_target.shape[-2:] != pred_value.shape[-2:]: + flat_target = F.interpolate( + flat_target.unsqueeze(1).float(), + size=pred_value.shape[-2:], + mode="bilinear", + align_corners=False, + ).squeeze(1) + return flat_target.reshape(*pred_value.shape[:-2], pred_value.shape[-2], pred_value.shape[-1]) + return target_value + + horizon = min( + pred_rollout["support_mode_logits"].shape[-2], + target_rollout["support_mode"].shape[-1], + ) + pred_rollout = { + "support_mode_logits": pred_rollout["support_mode_logits"][..., :horizon, :], + "corridor_logits": pred_rollout["corridor_logits"][..., :horizon, :, :], + "persistence_horizon": pred_rollout["persistence_horizon"][..., :horizon, :], + "disturbance_cost": pred_rollout["disturbance_cost"][..., :horizon], + } + target_rollout = { + "support_mode": _expand_target(target_rollout["support_mode"][..., :horizon]), + "corridor_feasible": _expand_target(target_rollout["corridor_feasible"][..., :horizon, :, :]), + "persistence_horizon": _expand_target(target_rollout["persistence_horizon"][..., :horizon, :]), + "disturbance_cost": _expand_target(target_rollout["disturbance_cost"][..., :horizon]), + "action_chunk": _expand_target(target_rollout["action_chunk"][..., :horizon, :]), + } + if "phase" in target_rollout: + target_rollout["phase"] = _expand_target(target_rollout["phase"][..., :horizon]) + corridor_target = _resize_rollout_target_like( + target_rollout["corridor_feasible"], + pred_rollout["corridor_logits"], + ) + loss = ( + F.cross_entropy( + pred_rollout["support_mode_logits"].reshape(-1, pred_rollout["support_mode_logits"].shape[-1]), + target_rollout["support_mode"].reshape(-1).long(), + ) + + F.binary_cross_entropy_with_logits( + pred_rollout["corridor_logits"], + corridor_target.float(), + ) + + F.mse_loss(pred_rollout["persistence_horizon"], target_rollout["persistence_horizon"].float()) + + F.mse_loss(pred_rollout["disturbance_cost"], target_rollout["disturbance_cost"].float()) + ) + if "phase_logits" in pred_rollout: + phase_target = target_rollout.get("phase") + if phase_target is None: + phase_target = infer_phase_targets_from_actions(target_rollout["action_chunk"]) + loss = loss + 0.5 * F.cross_entropy( + pred_rollout["phase_logits"].reshape(-1, pred_rollout["phase_logits"].shape[-1]), + phase_target.reshape(-1), + ) + if "arm_role_logits" in pred_rollout: + loss = loss + 0.25 * permutation_invariant_role_loss(pred_rollout["arm_role_logits"]) + optional_pairs = ( + ("target_belief_field", "belief_map", "rollout_belief_map"), + ("visibility_field", "visibility_map", "rollout_visibility_map"), + ("clearance_field", "clearance_map", "rollout_clearance_map"), + ("support_stability_field", "support_stability_map", "rollout_support_stability"), + ("occluder_contact_field", "occluder_contact_map", "rollout_occluder_contact_map"), + ("grasp_affordance_field", "grasp_affordance_map", "rollout_grasp_affordance_map"), + ("reocclusion_field", "reocclusion_map", "rollout_reocclusion_target"), + ) + for pred_key, _, target_key in optional_pairs: + if pred_key not in pred_rollout or target_key not in target_rollout: + continue + target_value = _expand_target(target_rollout[target_key][..., :horizon, ...]) + pred_value = pred_rollout[pred_key][..., :horizon, :, :, :] if pred_rollout[pred_key].ndim >= 6 else pred_rollout[pred_key][..., :horizon, :, :] + while target_value.ndim < pred_value.ndim: + target_value = target_value.unsqueeze(-1) + if pred_value.ndim >= 5: + target_value = _resize_rollout_target_like(target_value, pred_value) + loss = loss + 0.1 * F.binary_cross_entropy_with_logits(pred_value, target_value.float()) + return loss + + +def compute_total_loss( + model_output: dict[str, Tensor], + batch: dict[str, Tensor], + weights: LossWeights | None = None, +) -> dict[str, Tensor]: + weights = weights or LossWeights() + losses = { + "action": chunk_bc_loss( + model_output["action_mean"], + batch["action_chunk"], + mask=batch.get("action_mask"), + ), + } + total = weights.action * losses["action"] + + state_output = model_output.get("interaction_state") + if state_output is None: + state_output = model_output.get("reveal_state") + + if state_output is not None and "support_mode" in batch: + reveal_losses = reveal_state_loss(state_output, batch, weights) + losses.update(reveal_losses) + total = ( + total + + weights.phase * reveal_losses["phase"] + + weights.arm_role * reveal_losses["arm_role"] + + weights.support_mode * reveal_losses["support_mode"] + + weights.corridor * reveal_losses["corridor"] + + weights.persistence * reveal_losses["persistence"] + + weights.disturbance * reveal_losses["disturbance"] + + weights.belief * reveal_losses["belief"] + + weights.visibility * reveal_losses["visibility"] + + weights.clearance * reveal_losses["clearance"] + + weights.support_stability * reveal_losses["support_stability"] + + weights.occluder_contact * reveal_losses["occluder_contact"] + + weights.grasp_affordance * reveal_losses["grasp_affordance"] + + weights.reocclusion * reveal_losses["reocclusion"] + + weights.task_metrics * reveal_losses["task_metrics"] + + weights.calibration * reveal_losses["calibration"] + + 0.01 * reveal_losses["uncertainty"] + ) + + if model_output.get("planned_rollout") and model_output.get("rollout_source", "learned") in {"learned", "lightweight"} and ( + "proposal_target_rollout_support_mode" in batch + or "candidate_rollout_support_mode" in batch + or "rollout_support_mode" in batch + ): + if "proposal_target_rollout_support_mode" in batch: + rollout_target = { + "support_mode": batch["proposal_target_rollout_support_mode"], + "corridor_feasible": batch["proposal_target_rollout_corridor_feasible"], + "persistence_horizon": batch["proposal_target_rollout_persistence_horizon"], + "disturbance_cost": batch["proposal_target_rollout_disturbance_cost"], + "action_chunk": batch["proposal_target_action_chunks"], + } + if "proposal_target_rollout_phase" in batch: + rollout_target["phase"] = batch["proposal_target_rollout_phase"] + for optional_key in ( + "proposal_target_rollout_belief_map", + "proposal_target_rollout_visibility_map", + "proposal_target_rollout_clearance_map", + "proposal_target_rollout_support_stability", + "proposal_target_rollout_reocclusion_target", + "proposal_target_rollout_occluder_contact_map", + "proposal_target_rollout_grasp_affordance_map", + ): + if optional_key in batch: + rollout_target[optional_key.replace("proposal_target_", "")] = batch[optional_key] + elif "candidate_rollout_support_mode" in batch: + rollout_target = { + "support_mode": batch["candidate_rollout_support_mode"], + "corridor_feasible": batch["candidate_rollout_corridor_feasible"], + "persistence_horizon": batch["candidate_rollout_persistence_horizon"], + "disturbance_cost": batch["candidate_rollout_disturbance_cost"], + "action_chunk": batch["candidate_action_chunks"], + } + if "candidate_rollout_phase" in batch: + rollout_target["phase"] = batch["candidate_rollout_phase"] + for optional_key in ( + "candidate_rollout_belief_map", + "candidate_rollout_visibility_map", + "candidate_rollout_clearance_map", + "candidate_rollout_support_stability", + "candidate_rollout_reocclusion_target", + "candidate_rollout_occluder_contact_map", + "candidate_rollout_grasp_affordance_map", + ): + if optional_key in batch: + rollout_target[optional_key.replace("candidate_", "")] = batch[optional_key] + planner_indices = model_output.get("planner_topk_indices") + if planner_indices is not None: + for key, value in list(rollout_target.items()): + if isinstance(value, Tensor) and value.ndim >= 2 and value.shape[1] >= planner_indices.shape[1]: + expand_indices = planner_indices + while expand_indices.ndim < value.ndim: + expand_indices = expand_indices.unsqueeze(-1) + rollout_target[key] = value.gather( + 1, + expand_indices.expand(-1, -1, *value.shape[2:]), + ) + else: + rollout_target = { + "support_mode": batch["rollout_support_mode"], + "corridor_feasible": batch["rollout_corridor_feasible"], + "persistence_horizon": batch["rollout_persistence_horizon"], + "disturbance_cost": batch["rollout_disturbance_cost"], + "action_chunk": batch["action_chunk"], + } + if "rollout_phase" in batch: + rollout_target["phase"] = batch["rollout_phase"] + for optional_key in ( + "rollout_belief_map", + "rollout_visibility_map", + "rollout_clearance_map", + "rollout_support_stability", + "rollout_reocclusion_target", + "rollout_occluder_contact_map", + "rollout_grasp_affordance_map", + ): + if optional_key in batch: + rollout_target[optional_key] = batch[optional_key] + world_model_loss = world_model_rollout_consistency_loss( + model_output["planned_rollout"], + rollout_target, + ) + if model_output.get("rollout_source", "learned") == "lightweight": + losses["transition"] = world_model_loss + losses["world_model"] = model_output["action_mean"].new_tensor(0.0) + total = total + weights.transition * world_model_loss + else: + losses["world_model"] = world_model_loss + losses["transition"] = model_output["action_mean"].new_tensor(0.0) + total = total + weights.world_model * world_model_loss + else: + losses["world_model"] = model_output["action_mean"].new_tensor(0.0) + losses["transition"] = model_output["action_mean"].new_tensor(0.0) + + if "planner_success_logits" in model_output and "candidate_retrieval_success" in batch: + success_target = batch["candidate_retrieval_success"].float() + risk_target = batch.get("candidate_risk") + if risk_target is None: + risk_target = torch.clamp( + batch["candidate_final_disturbance_cost"].float() + batch["candidate_reocclusion_rate"].float(), + 0.0, + 1.0, + ) + utility_target = batch.get("candidate_utility") + planner_indices = model_output.get("planner_topk_indices") + if planner_indices is not None and success_target.shape[1] != model_output["planner_success_logits"].shape[1]: + success_target = success_target.gather(1, planner_indices) + risk_target = risk_target.gather(1, planner_indices) + if utility_target is not None: + utility_target = utility_target.gather(1, planner_indices) + if utility_target is None: + utility_target = success_target - risk_target + elif "utility_structured" in model_output: + utility_target = 0.5 * utility_target + 0.5 * model_output["utility_structured"].detach() + planner_scores = model_output["planner_scores"].float().clamp(-20.0, 20.0) + success_loss = F.binary_cross_entropy_with_logits(model_output["planner_success_logits"], success_target) + risk_loss = F.mse_loss(model_output["planner_risk_values"], risk_target.float()) + pred_diff = planner_scores.unsqueeze(-1) - planner_scores.unsqueeze(-2) + target_diff = utility_target.float().unsqueeze(-1) - utility_target.float().unsqueeze(-2) + ranking_mask = target_diff.abs() > 1e-4 + if ranking_mask.any(): + ranking_loss = torch.relu(0.1 - torch.sign(target_diff) * pred_diff)[ranking_mask].mean() + else: + ranking_loss = planner_scores.new_tensor(0.0) + oracle_target = utility_target.argmax(dim=-1) + oracle_loss = F.cross_entropy(planner_scores, oracle_target) + ranking_loss = ranking_loss + 0.5 * oracle_loss + losses["planner_success"] = success_loss + losses["planner_risk"] = risk_loss + losses["planner_ranking"] = ranking_loss + total = ( + total + + weights.planner_success * success_loss + + weights.planner_risk * risk_loss + + weights.planner_ranking * ranking_loss + ) + else: + losses["planner_success"] = model_output["action_mean"].new_tensor(0.0) + losses["planner_risk"] = model_output["action_mean"].new_tensor(0.0) + losses["planner_ranking"] = model_output["action_mean"].new_tensor(0.0) + + if "adapter_confidence" in model_output and "state_confidence_target" in batch: + confidence = model_output["adapter_confidence"] + if confidence.ndim > 1: + confidence = confidence.max(dim=-1).values + with torch.autocast(device_type=confidence.device.type, enabled=False): + gate_loss = F.binary_cross_entropy( + confidence.float().clamp(1e-4, 1.0 - 1e-4), + batch["state_confidence_target"].float(), + ) + losses["gate"] = gate_loss + total = total + weights.gate * gate_loss + else: + losses["gate"] = model_output["action_mean"].new_tensor(0.0) + + if "trunk_action_mean" in model_output: + distill_loss = chunk_bc_loss( + model_output["action_mean"], + model_output["trunk_action_mean"].detach(), + ) + losses["distillation"] = distill_loss + total = total + weights.distillation * distill_loss + else: + losses["distillation"] = model_output["action_mean"].new_tensor(0.0) + + proposal_target_chunks, proposal_success_values, proposal_risk_values, proposal_utility_values = _proposal_target_batch(batch) + + if "proposal_candidates" in model_output: + reconstruction_losses = [] + batch_size = model_output["proposal_candidates"].shape[0] + task_names = batch.get("task_name") + for sample_idx in range(batch_size): + sample_batch: dict[str, Tensor | str] = {} + for key in ("candidate_action_chunks", "candidate_utility"): + value = batch.get(key) + if value is not None: + sample_batch[key] = value[sample_idx : sample_idx + 1] + if isinstance(task_names, list): + sample_batch["task_name"] = str(task_names[sample_idx]) + elif isinstance(task_names, tuple): + sample_batch["task_name"] = str(task_names[sample_idx]) + elif isinstance(task_names, str): + sample_batch["task_name"] = task_names + fallback_sample = ( + proposal_target_chunks[sample_idx : sample_idx + 1] + if proposal_target_chunks is not None + else None + ) + sample_targets = _proposal_reconstruction_targets( + sample_batch, # type: ignore[arg-type] + proposal_count=model_output["proposal_candidates"].shape[1], + fallback_targets=fallback_sample, + ) + if sample_targets is None: + continue + reconstruction_losses.append( + proposal_set_reconstruction_loss( + model_output["proposal_candidates"][sample_idx : sample_idx + 1], + sample_targets, + ) + ) + if reconstruction_losses: + proposal_reconstruction = torch.stack(reconstruction_losses).mean() + else: + proposal_reconstruction = model_output["action_mean"].new_tensor(0.0) + losses["proposal_reconstruction"] = proposal_reconstruction + total = total + weights.proposal_reconstruction * proposal_reconstruction + else: + losses["proposal_reconstruction"] = model_output["action_mean"].new_tensor(0.0) + + if "proposal_logits" in model_output and proposal_success_values is not None: + candidate_count = min( + model_output["proposal_logits"].shape[1], + proposal_success_values.shape[1], + ) + proposal_logits = model_output["proposal_logits"][:, :candidate_count] + proposal_success_target = proposal_success_values[:, :candidate_count].float() + proposal_utility = proposal_utility_values + if proposal_utility is None: + proposal_risk = proposal_risk_values + if proposal_risk is None: + proposal_risk = torch.clamp( + batch["candidate_final_disturbance_cost"].float() + batch["candidate_reocclusion_rate"].float(), + 0.0, + 1.0, + ) + proposal_utility = proposal_success_target - proposal_risk[:, :candidate_count] + else: + proposal_utility = proposal_utility[:, :candidate_count] + proposal_success_loss = F.binary_cross_entropy_with_logits( + proposal_logits, + proposal_success_target, + ) + proposal_pred_diff = proposal_logits.unsqueeze(-1) - proposal_logits.unsqueeze(-2) + proposal_target_diff = proposal_utility.float().unsqueeze(-1) - proposal_utility.float().unsqueeze(-2) + proposal_mask = proposal_target_diff.abs() > 1e-4 + if proposal_mask.any(): + proposal_ranking_loss = torch.relu(0.1 - torch.sign(proposal_target_diff) * proposal_pred_diff)[ + proposal_mask + ].mean() + else: + proposal_ranking_loss = model_output["proposal_logits"].new_tensor(0.0) + proposal_oracle_target = proposal_utility.argmax(dim=-1) + proposal_oracle_loss = F.cross_entropy(proposal_logits, proposal_oracle_target) + proposal_ranking_loss = proposal_ranking_loss + 0.5 * proposal_oracle_loss + losses["proposal_success"] = proposal_success_loss + losses["proposal_ranking"] = proposal_ranking_loss + total = ( + total + + weights.proposal_success * proposal_success_loss + + weights.proposal_ranking * proposal_ranking_loss + ) + else: + losses["proposal_success"] = model_output["action_mean"].new_tensor(0.0) + losses["proposal_ranking"] = model_output["action_mean"].new_tensor(0.0) + + if ( + "proposal_mode_logits" in model_output + and "proposal_mode_assignments" in model_output + and proposal_success_values is not None + ): + candidate_count = min( + proposal_success_values.shape[1], + proposal_utility_values.shape[1] if proposal_utility_values is not None else proposal_success_values.shape[1], + model_output["proposal_mode_assignments"].numel(), + ) + proposal_success_target = proposal_success_values[:, :candidate_count].float() + proposal_utility = proposal_utility_values + if proposal_utility is None: + proposal_risk = proposal_risk_values + if proposal_risk is None: + proposal_risk = torch.clamp( + batch["candidate_final_disturbance_cost"].float() + batch["candidate_reocclusion_rate"].float(), + 0.0, + 1.0, + ) + proposal_utility = proposal_success_target - proposal_risk[:, :candidate_count] + else: + proposal_utility = proposal_utility[:, :candidate_count].float() + mode_success_target, mode_utility_target = _proposal_mode_targets( + model_output["proposal_mode_assignments"], + proposal_success=proposal_success_target, + proposal_utility=proposal_utility, + num_modes=model_output["proposal_mode_logits"].shape[1], + ) + proposal_mode_logits = model_output["proposal_mode_logits"] + proposal_mode_success_loss = F.binary_cross_entropy_with_logits( + proposal_mode_logits, + mode_success_target, + reduction="none", + ) + proposal_mode_success_loss = proposal_mode_success_loss.mean(dim=-1) + proposal_mode_pred_diff = proposal_mode_logits.unsqueeze(-1) - proposal_mode_logits.unsqueeze(-2) + proposal_mode_target_diff = mode_utility_target.unsqueeze(-1) - mode_utility_target.unsqueeze(-2) + proposal_mode_mask = proposal_mode_target_diff.abs() > 1e-4 + proposal_mode_ranking_terms = torch.relu( + 0.1 - torch.sign(proposal_mode_target_diff) * proposal_mode_pred_diff + ) + proposal_mode_ranking_den = proposal_mode_mask.sum(dim=(-1, -2)).clamp_min(1) + proposal_mode_ranking_loss = (proposal_mode_ranking_terms * proposal_mode_mask).sum(dim=(-1, -2)) / proposal_mode_ranking_den + proposal_mode_ranking_loss = torch.where( + proposal_mode_mask.any(dim=(-1, -2)), + proposal_mode_ranking_loss, + proposal_mode_logits.new_zeros(proposal_mode_ranking_loss.shape), + ) + proposal_mode_oracle_target = mode_utility_target.argmax(dim=-1) + proposal_mode_oracle_loss = F.cross_entropy( + proposal_mode_logits, + proposal_mode_oracle_target, + reduction="none", + ) + proposal_mode_loss_per_sample = ( + proposal_mode_success_loss + + proposal_mode_ranking_loss + + 0.5 * proposal_mode_oracle_loss + ) + task_filter = weights.proposal_mode_task_filter + if task_filter: + filtered_mask = _task_name_mask_for_values( + batch, + task_names=list(task_filter), + batch_size=proposal_mode_loss_per_sample.shape[0], + device=proposal_mode_loss_per_sample.device, + ) + if filtered_mask is not None and filtered_mask.any(): + proposal_mode_loss = proposal_mode_loss_per_sample[filtered_mask].mean() + else: + proposal_mode_loss = proposal_mode_logits.new_tensor(0.0) + elif weights.proposal_mode_cloth_only: + cloth_mask = _task_name_mask( + batch, + task_name="cloth", + batch_size=proposal_mode_loss_per_sample.shape[0], + device=proposal_mode_loss_per_sample.device, + ) + if cloth_mask is not None and cloth_mask.any(): + proposal_mode_loss = proposal_mode_loss_per_sample[cloth_mask].mean() + else: + proposal_mode_loss = proposal_mode_logits.new_tensor(0.0) + else: + proposal_mode_loss = proposal_mode_loss_per_sample.mean() + losses["proposal_mode"] = proposal_mode_loss + total = total + weights.proposal_mode * proposal_mode_loss + else: + losses["proposal_mode"] = model_output["action_mean"].new_tensor(0.0) + + if "proposal_candidates" in model_output: + diversity_loss = proposal_diversity_loss(model_output["proposal_candidates"]) + losses["proposal_diversity"] = diversity_loss + total = total + weights.proposal_diversity * diversity_loss + else: + losses["proposal_diversity"] = model_output["action_mean"].new_tensor(0.0) + + if "equivariance_probe_action_mean" in model_output and "equivariance_target_action_mean" in model_output: + swap_loss = role_swap_consistency_loss( + model_output["equivariance_probe_action_mean"], + model_output["equivariance_target_action_mean"].detach(), + ) + losses["role_swap_consistency"] = swap_loss + total = total + weights.role_swap_consistency * swap_loss + else: + losses["role_swap_consistency"] = model_output["action_mean"].new_tensor(0.0) + + losses["total"] = total + return losses diff --git a/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/trainer.py b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..c249599cd2dcfe2e0eb48512e23ccfdd59805c83 --- /dev/null +++ b/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/trainer.py @@ -0,0 +1,184 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Sequence + +import torch +from torch import Tensor, nn + +from models.policy import ( + AdapterWrappedPolicy, + BackboneOnlyPolicy, + ElasticRevealBimanualPolicy, + FoundationTrunkPolicy, + InteractionBimanualPolicy, + PolicyConfig, + RevealBimanualPolicy, +) +from train.losses import LossWeights, compute_total_loss + + +@dataclass +class TrainerConfig: + policy_type: str = "backbone_only" + training_regime: str = "adapter_train_frozen_trunk" + eval_mode: str = "adapter_active" + use_bf16: bool = True + grad_clip_norm: float = 1.0 + freeze_backbone: bool = True + gradient_checkpointing: bool = True + plan_during_train: bool = True + plan_during_eval: bool = True + support_mode_conditioning: bool = True + planner_mode: str = "trainable" + use_depth: bool = True + use_world_model: bool = True + use_role_tokens: bool = True + history_steps_override: int | None = None + compute_equivariance_probe: bool = True + trainable_parameter_prefixes: tuple[str, ...] = () + adapter_mode: str = "adapter_active" + adapter_use_transition_model: bool = True + adapter_use_task_conditioning: bool = True + adapter_action_supervision_source: str = "selected" + + +def build_policy(config: PolicyConfig, trainer_config: TrainerConfig) -> nn.Module: + config.backbone.freeze_backbone = trainer_config.freeze_backbone + config.backbone.gradient_checkpointing = trainer_config.gradient_checkpointing + if trainer_config.policy_type == "adapter_wrapped": + return AdapterWrappedPolicy(config) + if trainer_config.policy_type == "foundation_trunk": + return FoundationTrunkPolicy(config) + if trainer_config.policy_type == "elastic_reveal": + return ElasticRevealBimanualPolicy(config) + if trainer_config.policy_type == "reveal_state": + return RevealBimanualPolicy(config) + if trainer_config.policy_type == "interaction_state": + return InteractionBimanualPolicy(config) + return BackboneOnlyPolicy(config) + + +def policy_supports_planning(policy_type: str) -> bool: + return policy_type in {"reveal_state", "interaction_state", "elastic_reveal"} + + +def planner_enabled(trainer_config: TrainerConfig, during_eval: bool) -> bool: + if not policy_supports_planning(trainer_config.policy_type): + return False + if trainer_config.planner_mode == "off": + return False + if during_eval: + return trainer_config.plan_during_eval + return trainer_config.plan_during_train + + +def apply_planner_mode(model: nn.Module, trainer_config: TrainerConfig) -> list[str]: + if trainer_config.planner_mode != "proxy_pretrained": + return [] + frozen_modules = [] + for module_name in ("interaction_head", "world_model", "planner"): + module = getattr(model, module_name, None) + if module is None: + continue + frozen_modules.append(module_name) + for parameter in module.parameters(): + parameter.requires_grad = False + return frozen_modules + + +def apply_trainable_parameter_prefixes(model: nn.Module, trainer_config: TrainerConfig) -> list[str]: + prefixes = tuple(str(prefix) for prefix in trainer_config.trainable_parameter_prefixes) + if not prefixes: + return [] + matched = [] + for name, parameter in model.named_parameters(): + trainable = any(name.startswith(prefix) for prefix in prefixes) + parameter.requires_grad = trainable + if trainable: + matched.append(name) + return matched + + +class BimanualTrainer: + def __init__(self, model: nn.Module, optimizer: torch.optim.Optimizer, config: TrainerConfig) -> None: + self.model = model + self.optimizer = optimizer + self.config = config + + def _autocast_context(self) -> torch.autocast: + if self.config.use_bf16 and torch.cuda.is_available(): + return torch.autocast(device_type="cuda", dtype=torch.bfloat16) + return torch.autocast(device_type="cpu", enabled=False) + + def training_step(self, batch: dict[str, Tensor | Sequence[str]], loss_weights: LossWeights | None = None) -> dict[str, Tensor]: + self.optimizer.zero_grad(set_to_none=True) + images = batch["images"] + proprio = batch["proprio"] + texts = batch.get("texts") + language_tokens = batch.get("language_tokens") + with self._autocast_context(): + task_names = batch.get("task_name") + texts_value = texts if isinstance(texts, Sequence) and not isinstance(texts, str) else None + task_name_value = task_names if isinstance(task_names, Sequence) and not isinstance(task_names, str) else None + forward_kwargs = { + "images": images, + "proprio": proprio, + "texts": texts_value, + "task_names": task_name_value, + "task_ids": batch.get("task_id"), + "language_tokens": language_tokens if isinstance(language_tokens, dict) else None, + "history_images": batch.get("history_images"), + "history_proprio": batch.get("history_proprio"), + "history_actions": batch.get("history_actions"), + "depths": batch.get("depths"), + "depth_valid": batch.get("depth_valid"), + "camera_intrinsics": batch.get("camera_intrinsics"), + "camera_extrinsics": batch.get("camera_extrinsics"), + "history_depths": batch.get("history_depths"), + "history_depth_valid": batch.get("history_depth_valid"), + "history_camera_intrinsics": batch.get("history_camera_intrinsics"), + "history_camera_extrinsics": batch.get("history_camera_extrinsics"), + } + if policy_supports_planning(self.config.policy_type): + forward_kwargs["plan"] = planner_enabled(self.config, during_eval=False) + forward_kwargs["support_mode_conditioning"] = self.config.support_mode_conditioning + if "candidate_action_chunks" in batch: + forward_kwargs["candidate_chunks_override"] = batch["candidate_action_chunks"] + if self.config.policy_type == "adapter_wrapped": + forward_kwargs["adapter_mode"] = self.config.adapter_mode + forward_kwargs["use_transition_model"] = self.config.adapter_use_transition_model + forward_kwargs["use_task_conditioning"] = self.config.adapter_use_task_conditioning + if self.config.policy_type == "elastic_reveal": + forward_kwargs["depths"] = batch.get("depths") + forward_kwargs["depth_valid"] = batch.get("depth_valid") + forward_kwargs["camera_intrinsics"] = batch.get("camera_intrinsics") + forward_kwargs["camera_extrinsics"] = batch.get("camera_extrinsics") + forward_kwargs["history_depths"] = batch.get("history_depths") + forward_kwargs["history_depth_valid"] = batch.get("history_depth_valid") + forward_kwargs["history_camera_intrinsics"] = batch.get("history_camera_intrinsics") + forward_kwargs["history_camera_extrinsics"] = batch.get("history_camera_extrinsics") + forward_kwargs["history_camera_valid_mask"] = batch.get("history_camera_valid_mask") + forward_kwargs["use_depth"] = self.config.use_depth + forward_kwargs["use_world_model"] = self.config.use_world_model + forward_kwargs["use_planner"] = planner_enabled(self.config, during_eval=False) + forward_kwargs["use_role_tokens"] = self.config.use_role_tokens + forward_kwargs["history_steps_override"] = self.config.history_steps_override + forward_kwargs["compute_equivariance_probe"] = self.config.compute_equivariance_probe + elif self.config.policy_type == "interaction_state": + forward_kwargs["use_role_tokens"] = self.config.use_role_tokens + forward_kwargs["history_steps_override"] = self.config.history_steps_override + model_output = self.model(**forward_kwargs) + if ( + self.config.policy_type == "adapter_wrapped" + and self.config.adapter_action_supervision_source == "trunk" + and "trunk_action_mean" in model_output + ): + model_output = dict(model_output) + model_output["selected_action_mean"] = model_output["action_mean"] + model_output["action_mean"] = model_output["trunk_action_mean"] + losses = compute_total_loss(model_output, batch, weights=loss_weights) + losses["total"].backward() + torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config.grad_clip_norm) + self.optimizer.step() + return losses diff --git a/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/conftest.cpython-310-pytest-9.0.2.pyc b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/conftest.cpython-310-pytest-9.0.2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1050f0715966db2b84d8925b165780e8b73497c3 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/conftest.cpython-310-pytest-9.0.2.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/conftest.cpython-311-pytest-9.0.2.pyc b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/conftest.cpython-311-pytest-9.0.2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..02d4fe5172a8568df1aefd95ddde47e022cfbd33 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/conftest.cpython-311-pytest-9.0.2.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_eval_toggle_paths_work.cpython-310-pytest-9.0.2.pyc b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_eval_toggle_paths_work.cpython-310-pytest-9.0.2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa864989ca0d9a1785c994295d54db2d3e2d875a Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_eval_toggle_paths_work.cpython-310-pytest-9.0.2.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_general_eval_protocol_is_identical.cpython-310-pytest-9.0.2.pyc b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_general_eval_protocol_is_identical.cpython-310-pytest-9.0.2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..75e1fc6ea59b766efd4e4332e2b78902c446dfa5 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_general_eval_protocol_is_identical.cpython-310-pytest-9.0.2.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_public_benchmark_package_summary.cpython-310-pytest-9.0.2.pyc b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_public_benchmark_package_summary.cpython-310-pytest-9.0.2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30871f3b9d10868c14bcdee2196121a77b2cf9b4 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_public_benchmark_package_summary.cpython-310-pytest-9.0.2.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_public_benchmark_package_summary.cpython-311-pytest-9.0.2.pyc b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_public_benchmark_package_summary.cpython-311-pytest-9.0.2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..24f1391dde62b97998296ced3aaad89b2fb15c87 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_public_benchmark_package_summary.cpython-311-pytest-9.0.2.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_public_benchmark_package_tracks.cpython-310-pytest-9.0.2.pyc b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_public_benchmark_package_tracks.cpython-310-pytest-9.0.2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4af3872c45927696ada83ad1525e0c2db9c0c623 Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_public_benchmark_package_tracks.cpython-310-pytest-9.0.2.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_public_benchmark_package_tracks.cpython-311-pytest-9.0.2.pyc b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_public_benchmark_package_tracks.cpython-311-pytest-9.0.2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..80a70687a6760c66883857ccbee435d98dd6aa8e Binary files /dev/null and b/code/VLAarchtests2_code/VLAarchtests/tests/__pycache__/test_public_benchmark_package_tracks.cpython-311-pytest-9.0.2.pyc differ diff --git a/code/VLAarchtests2_code/VLAarchtests/tests/test_public_benchmark_package_summary.py b/code/VLAarchtests2_code/VLAarchtests/tests/test_public_benchmark_package_summary.py new file mode 100644 index 0000000000000000000000000000000000000000..53cb9f0530102574dcd3ebd1bf3802dc4c0a63aa --- /dev/null +++ b/code/VLAarchtests2_code/VLAarchtests/tests/test_public_benchmark_package_summary.py @@ -0,0 +1,91 @@ +import pytest + +from eval.public_benchmark_package import build_public_eval_protocol, build_target_training_spec +from eval.run_public_benchmark_package import summarize_public_benchmark_package + + +def _successes(num_success: int, total: int = 100) -> list[int]: + return [1] * num_success + [0] * (total - num_success) + + +def _target_record(track_id: str, adapter_mode: str, seed: int, num_success: int, *, intervention: float, non_base: float) -> dict: + successes = _successes(num_success) + record = { + "track_id": track_id, + "adapter_mode": adapter_mode, + "successes": successes, + "success_rate": sum(successes) / len(successes), + "episodes": len(successes), + "seed": seed, + "eval_protocol": build_public_eval_protocol(track_id=track_id, eval_mode=adapter_mode, seed=seed, episodes=len(successes)), + "intervention_rate": intervention, + "non_base_selection_rate": non_base, + "steps_to_first_reveal_or_access": 8.0, + "steps_to_retrieve": 22.0, + "disturbance_proxy": 0.3, + } + if adapter_mode != "adapter_noop": + record["train_spec"] = build_target_training_spec(track_id=track_id, model_variant=adapter_mode, seed=seed) + else: + record["train_spec"] = build_target_training_spec(track_id=track_id, model_variant="adapter_active_ft", seed=seed) + return record + + +def _anchor_record(adapter_mode: str, seed: int, num_success: int) -> dict: + successes = _successes(num_success) + return { + "track_id": "anchor_track", + "adapter_mode": adapter_mode, + "successes": successes, + "success_rate": sum(successes) / len(successes), + "episodes": len(successes), + "seed": seed, + "eval_protocol": build_public_eval_protocol(track_id="anchor_track", eval_mode=adapter_mode, seed=seed, episodes=len(successes)), + } + + +def test_public_benchmark_package_summary_passes_with_clear_gain(): + payloads = [ + _target_record("bag_track", "trunk_only_ft", 17, 35, intervention=0.0, non_base=0.0), + _target_record("bag_track", "adapter_noop", 17, 35, intervention=0.0, non_base=0.0), + _target_record("bag_track", "adapter_active_ft", 17, 75, intervention=0.30, non_base=0.40), + _target_record("occlusion_track", "trunk_only_ft", 17, 30, intervention=0.0, non_base=0.0), + _target_record("occlusion_track", "adapter_noop", 17, 30, intervention=0.0, non_base=0.0), + _target_record("occlusion_track", "adapter_active_ft", 17, 68, intervention=0.24, non_base=0.22), + _target_record("cloth_track", "trunk_only_ft", 17, 28, intervention=0.0, non_base=0.0), + _target_record("cloth_track", "adapter_noop", 17, 28, intervention=0.0, non_base=0.0), + _target_record("cloth_track", "adapter_active_ft", 17, 60, intervention=0.18, non_base=0.20), + _anchor_record("trunk_only", 17, 96), + _anchor_record("adapter_noop", 17, 96), + _anchor_record("adapter_active", 17, 95), + ] + + summary = summarize_public_benchmark_package(payloads, bootstrap_samples=200, bootstrap_seed=0) + + assert summary["headline_pass"] + assert summary["sign_of_life_pass"] + assert summary["anchor_pass"] + assert summary["sign_of_life_track_count"] == 3 + assert summary["tracks"]["bag_track"]["delta_active_vs_trunk"] > 0.0 + assert summary["tracks"]["anchor_track"]["anchor_within_tolerance"] + + +def test_public_benchmark_package_detects_training_mismatch(): + payloads = [ + _target_record("bag_track", "trunk_only_ft", 17, 35, intervention=0.0, non_base=0.0), + _target_record("bag_track", "adapter_noop", 17, 35, intervention=0.0, non_base=0.0), + _target_record("bag_track", "adapter_active_ft", 17, 75, intervention=0.30, non_base=0.40), + _target_record("occlusion_track", "trunk_only_ft", 17, 30, intervention=0.0, non_base=0.0), + _target_record("occlusion_track", "adapter_noop", 17, 30, intervention=0.0, non_base=0.0), + _target_record("occlusion_track", "adapter_active_ft", 17, 68, intervention=0.24, non_base=0.22), + _target_record("cloth_track", "trunk_only_ft", 17, 28, intervention=0.0, non_base=0.0), + _target_record("cloth_track", "adapter_noop", 17, 28, intervention=0.0, non_base=0.0), + _target_record("cloth_track", "adapter_active_ft", 17, 60, intervention=0.18, non_base=0.20), + _anchor_record("trunk_only", 17, 96), + _anchor_record("adapter_noop", 17, 96), + _anchor_record("adapter_active", 17, 95), + ] + payloads[8]["train_spec"]["batch_size"] = 64 + + with pytest.raises(ValueError, match="Training fairness mismatch"): + summarize_public_benchmark_package(payloads, bootstrap_samples=50, bootstrap_seed=0) diff --git a/code/VLAarchtests2_code/VLAarchtests/tests/test_public_benchmark_package_tracks.py b/code/VLAarchtests2_code/VLAarchtests/tests/test_public_benchmark_package_tracks.py new file mode 100644 index 0000000000000000000000000000000000000000..ef09a1990bff2005af75180e8b23ac436537245e --- /dev/null +++ b/code/VLAarchtests2_code/VLAarchtests/tests/test_public_benchmark_package_tracks.py @@ -0,0 +1,60 @@ +from eval.public_benchmark_package import ( + ANCHOR_ROLE, + TARGET_ROLE, + build_public_eval_protocol, + build_target_training_spec, + default_public_benchmark_manifest, + expected_eval_modes, + public_benchmark_tracks, + public_protocol_identity_signature, + training_fairness_signature, +) + + +def test_public_benchmark_package_contains_expected_tracks(): + manifest = default_public_benchmark_manifest() + + assert manifest["target_track_ids"] == ["bag_track", "occlusion_track", "cloth_track"] + assert manifest["anchor_track_ids"] == ["anchor_track"] + assert manifest["thresholds"]["anchor_tolerance"] == 0.02 + + +def test_public_target_protocol_identity_is_mode_invariant(): + protocol_signatures = { + public_protocol_identity_signature( + build_public_eval_protocol(track_id="bag_track", eval_mode=mode, seed=17) + ) + for mode in expected_eval_modes("bag_track") + } + + assert len(protocol_signatures) == 1 + + +def test_public_anchor_protocol_identity_is_mode_invariant(): + protocol_signatures = { + public_protocol_identity_signature( + build_public_eval_protocol(track_id="anchor_track", eval_mode=mode, seed=17) + ) + for mode in expected_eval_modes("anchor_track") + } + + assert len(protocol_signatures) == 1 + + +def test_training_fairness_signature_matches_for_trunk_and_adapter(): + trunk = build_target_training_spec(track_id="cloth_track", model_variant="trunk_only_ft", seed=17) + active = build_target_training_spec(track_id="cloth_track", model_variant="adapter_active_ft", seed=17) + + assert training_fairness_signature(trunk) == training_fairness_signature(active) + + +def test_public_track_roles_are_partitioned(): + target_roles = {track.track_id: track.role for track in public_benchmark_tracks(TARGET_ROLE)} + anchor_roles = {track.track_id: track.role for track in public_benchmark_tracks(ANCHOR_ROLE)} + + assert target_roles == { + "bag_track": TARGET_ROLE, + "occlusion_track": TARGET_ROLE, + "cloth_track": TARGET_ROLE, + } + assert anchor_roles == {"anchor_track": ANCHOR_ROLE} diff --git a/history/VLAarchtests2_previous_README.md b/history/VLAarchtests2_previous_README.md new file mode 100644 index 0000000000000000000000000000000000000000..dd3a488fdca20f3c16610ed6f7d2696734eeeca0 --- /dev/null +++ b/history/VLAarchtests2_previous_README.md @@ -0,0 +1,301 @@ +# VLAarchtests2 + +Bundle staged from `/workspace` on `2026-03-31 UTC`. + +This repo is the follow-on organization repo to `lsnu/VLAarchtests`. It includes: + +- current code under `VLAarchtests/` +- current third-party baseline code under `third_party/` +- current baseline runs, replay artifacts, demo roots, and released checkpoint material under `baselines/` +- current training outputs and checkpoints under `outputs/` +- current logs under `reports/` +- environment recreation files under `environment/` +- raw results and change/test logs at the repo root +- the previous repo README under `history/VLAarchtests_previous_README.md` +- the active handoff file under `handoff/instructions4.md` + +## Top-Level Contents + +- `VLAarchtests/` + - code, tests, configs, generated configs, reports, checkpoints, and proxy datasets from the current runpod workspace +- `third_party/AnyBimanual/` + - local AnyBimanual checkout used for the official overlap baseline branch, including local compatibility patches +- `baselines/` + - released AnyBimanual checkpoint material + - overlap replay artifacts + - HF export packaging note: `baselines/AnyBimanual_overlap_replay/multi/` is sharded into subdirectories to satisfy the Hub `10000 files per directory` limit + - overlap run directories + - local subset3 demo roots used by the overlap branch +- `outputs/` + - RLBench training outputs and checkpoints used by the current anchor, RVT, dual-push, and elastic-controller branches +- `reports/` + - training and evaluation logs copied from `/workspace/reports` +- `environment/` + - machine snapshot, package lists, and setup helpers +- `history/` + - copied previous-repo README +- `handoff/` + - active sprint instruction file +- `RESULTS_RAW.md` + - raw result tables and final official overlap eval outputs +- `CHANGE_AND_TEST_LOG.md` + - file-level change log and executed test commands +- `MODEL_AND_ARTIFACT_INDEX.md` + - staged directory map with main artifact roots + +## Previous Repo Coverage + +The earlier `lsnu/VLAarchtests` repo covered the `2026-03-25/26` work. Its README is copied verbatim at: + +- `history/VLAarchtests_previous_README.md` + +Previous-repo items explicitly referenced there include: + +- compact, spatial, compact-phase, and spatial-phase proxy branches +- earlier RLBench direct-policy and kNN runs +- environment recreation files +- prior raw result tables + +## Current Session Additions + +Current-session folders added or expanded in this repo include: + +- `VLAarchtests/artifacts/reports/sprint_v7_summary/` +- `VLAarchtests/artifacts/reports/sprint_v7_followup/` +- `VLAarchtests/artifacts/reports/selector_finetune_v7_iterations/` +- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter6/` +- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter7/` +- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/` +- `VLAarchtests/artifacts/reports/task_routed_proxy_v1/` +- `VLAarchtests/artifacts/reports/rlbench_general_debug_20260330/` +- `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/` +- `VLAarchtests/artifacts/reports/bag_mode_specialization_20260330/` +- `VLAarchtests/artifacts/reports/general_task_anchor_20260330_dual_push_buttons/` +- `VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/` +- `VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/` +- `VLAarchtests/artifacts/reports/dual_push_nonzero_branch_20260330/` +- `VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/` + +## Raw Results Snapshot + +### Proxy sprint v7 + +Source: + +- `VLAarchtests/artifacts/reports/sprint_v7_summary/reveal_sprint_summary_compact.json` + +Raw values: + +- base model mean success: `0.28` +- base per-task: foliage `0.39`, bag `0.31`, cloth `0.14` +- random mean success: `0.43333333333333335` +- candidate0 mean success: `0.2` +- oracle mean success: `0.4066666666666667` +- scripted mean success: `1.0` + +### Eval-time ablations + +Source: + +- `VLAarchtests/artifacts/reports/sprint_v7_summary/reveal_sprint_summary_compact.json` + +Raw values: + +- `no_planner`: `0.2` +- `no_memory`: `0.3233333333333333` +- `no_task_conditioning`: `0.28` +- `no_geometry`: `0.27` +- `no_camera_pose`: `0.29333333333333333` + +### Selector checkpoints + +Sources: + +- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter6/default/reveal_benchmark.json` +- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter7/full_fixed_default/reveal_benchmark.json` +- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/bag_fixed_default/reveal_benchmark.json` +- `VLAarchtests/artifacts/reports/task_routed_proxy_v1/summary.md` + +Raw values: + +- `iter6` mean success: `0.4566666666666667` + - foliage `0.46`, bag `0.4`, cloth `0.51` +- `iter7` mean success: `0.4666666666666666` + - foliage `0.4`, bag `0.41`, cloth `0.59` +- `iter8` bag-only fixed slice: `0.41` +- routed controller mean success: `0.48666666666666664` + - routing rule: `foliage -> iter6`, `bag -> iter8`, `cloth -> iter8` + - per-task: foliage `0.46`, bag `0.41`, cloth `0.59` + +### Real baseline compare on proxy suite + +Source: + +- `VLAarchtests/artifacts/reports/real_baseline_compare_v7_full/reveal_benchmark.json` + +Raw values: + +- `baseline_rgbd_stage3` mean success: `0.31` + - foliage `0.21`, bag `0.15`, cloth `0.57` +- `iter5_selector` mean success: `0.45` + - foliage `0.44`, bag `0.4`, cloth `0.51` + +### RLBench recovered push-box comparator + +Sources: + +- `reports/rlbench_general_debug/rlbench_push_box_fair_step1_final_knn_ep10_x99_res224_len180_train80_fixed/bimanual_push_box/rollout_eval.json` +- `reports/rlbench_general_debug/rlbench_push_box_historical_step1_knn_ep10_x99_res224_len180_train80_fixed/bimanual_push_box/rollout_eval.json` + +Raw values: + +- current fair-step1 final mean success: `0.7` +- current fair-step1 final successes: + - `[1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]` +- historical push-box control mean success: `0.4` +- historical push-box control successes: + - `[0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0]` + +### Official AnyBimanual overlap branch + +Sources: + +- `baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/training.log` +- `reports/anybimanual_subset3_overlap_resume1000_eval.log` + +Raw train milestones: + +- global step `300`: loss `40.91718` +- global step `400`: loss `33.26684` +- global step `500`: loss `36.07054` +- global step `600`: loss `35.32345` +- global step `700`: loss `28.50959` +- global step `800`: loss `23.60169` +- global step `900`: loss `15.28901` +- run reached `weights/1000` and the train exited cleanly + +Raw eval outputs: + +- source log: `reports/anybimanual_subset3_overlap_resume1000_eval.log` +- summary files: + - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.md` + - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json` +- local last complete step: `1000` +- local mean success: `0.16` +- local per-task success: + - `coordinated_push_box`: `0.0` + - `coordinated_lift_ball`: `0.0` + - `dual_push_buttons`: `0.48` +- local per-task return: + - `coordinated_push_box`: `0.0` + - `coordinated_lift_ball`: `0.0` + - `dual_push_buttons`: `12.0` +- public best overlap step in the local summary: `60000` +- public best mean success in the local summary: `0.6933333333333334` + +### Validated general-task anchor: `dual_push_buttons` + +Sources: + +- `VLAarchtests/artifacts/reports/general_task_anchor_20260330_dual_push_buttons/summary.json` +- `baselines/AnyBimanual_release_eval_anchor/perlf_release_dual_push_buttons_ep25/PERACT_BC/seed0/eval_data.csv` + +Raw values: + +- public AnyBimanual release, step `60000`: success `0.96`, return `24.0`, length `21.56` +- local official single-task eval, step `60000`, `25` episodes: success `0.96`, return `24.0`, length `21.84` +- local clip backbone-only result on same task: success `0.0`, return `0.0` +- local elastic reveal proxy iter6 result on same task: success `0.0`, return `0.0` +- local RVT frozen fixed-bounds result on same task: success `0.0`, return `0.0` + +### RVT overlap branch + +Sources: + +- `VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/summary.md` +- `VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/summary.md` + +Raw values: + +- frozen RVT stage1 train summary: + - `outputs/rlbench_rvt_branch/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17/summary.json` + - final train total `0.043179353826920445` + - final val total `0.039591669984665984` +- frozen RVT overlap eval: mean success `0.0` +- frozen fixed-bounds RVT overlap eval: mean success `0.0` +- both branch gates: + - local AnyBimanual overlap floor `0.16` + - stage2 run `false` + +### Dual-push non-privileged retarget branch + +Sources: + +- `VLAarchtests/artifacts/reports/dual_push_nonzero_branch_20260330/summary.md` + +Raw values: + +- demo replay through `absolute_action_from_delta`: + - `reports/dual_push_nonzero_branch_20260330/demo_replay/replay_summary.json` + - mean success `0.8` + - mean return `0.8` +- retargeted demo with checkpoint backbone retrieval and vision-only button localization: + - `reports/dual_push_nonzero_branch_20260330/retargeted_demo_backbone_vision_ep1/summary.json` + - mean success `1.0` + - mean return `1.0` +- retargeted demo with checkpoint backbone retrieval and vision-only button localization: + - `reports/dual_push_nonzero_branch_20260330/retargeted_demo_backbone_vision_ep5/summary.json` + - mean success `1.0` + - mean return `1.0` + +### Dual-push full-architecture hybrid branch + +Sources: + +- `VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.md` +- `reports/dual_push_full_arch_probe_iter6_scene_ep1/summary.json` +- `reports/dual_push_full_arch_hybrid_iter6_backbone_ep1/summary.json` + +Raw values: + +- elastic checkpoint retargeted-demo probe with scene retrieval and vision-only button localization: + - `1` episode + - mean success `1.0` + - mean return `1.0` + - steps `94` + - retrieved episode index `11` + - retrieval similarity `0.9998629689216614` +- full-architecture hybrid eval with elastic controller checkpoint plus dual-push retrieval checkpoint: + - `1` episode + - mean success `1.0` + - mean return `1.0` + - steps `116` + - path recoveries `0` + - noop fallbacks `0` + - first selected mode `residual::maintain_opening` + - last selected mode `residual::base_action` + +## Environment Recreation + +Environment files are under `environment/`, including: + +- `environment/setup_same_hardware.sh` +- `environment/runtime_env_vars.sh` +- `environment/reconstruct_anybimanual_overlap_replay.sh` +- `environment/hardware_snapshot.txt` +- `environment/env_list.txt` +- `environment/base_python.txt` +- `environment/base_pip_freeze.txt` +- `environment/rlbench_python.txt` +- `environment/rlbench_pip_freeze.txt` + +## Notes On Result Presentation + +This repo-level README and the new root docs intentionally keep result text raw: + +- file paths +- exact commands +- exact numeric outputs +- exact partial status for in-flight runs + +Interpretive material already present inside older staged artifacts remains preserved as part of the historical workspace contents. diff --git a/history/VLAarchtests3_previous_README.md b/history/VLAarchtests3_previous_README.md new file mode 100644 index 0000000000000000000000000000000000000000..8fc7c834104f30129f99099210b76e127409013a --- /dev/null +++ b/history/VLAarchtests3_previous_README.md @@ -0,0 +1,240 @@ +# VLAarchtests3 + +`VLAarchtests3` is the organized export of the elastic-occlusion bimanual VLA handoff completed on a 1x L40S RunPod machine. + +It is a successor snapshot to the earlier `VLAarchtests` and `VLAarchtests2` work: + +- `VLAarchtests`: earlier architecture-search and benchmark-debugging work. +- `VLAarchtests2`: larger exploratory branch with frequent model changes, mixed benchmark artifacts, and several legacy results that needed manual reinterpretation. +- `VLAarchtests3`: cleaned export focused on the final handoff state, the adapter refactor, the validated tests, the current checkpoints, and the reports needed to continue from here. + +## What Was Done + +The main engineering outcome was a refactor from a monolithic elastic policy into a cleaner `trunk + structured adapter + no-op fallback` stack. + +The final exported code contains: + +- a clean wrapped-policy interface with `trunk_only`, `adapter_noop`, and `adapter_active` modes, +- a structured elastic-occlusion adapter with: + - reveal-state prediction, + - task-routed reveal/retrieve proposal families, + - retrieve-feasibility gating, + - a lightweight reveal-state transition model, +- explicit tests that protect: + - no-op equivalence, + - generic-task fallback, + - benchmark protocol identity, + - unsafe retrieve blocking, + - cloth-specific selection behavior. + +The most important debugging pass was in the planner/gating logic. The original active path could reveal forever or retrieve too early. The final planner fixes made it: + +- summarize scene readiness at the scene level rather than worst-candidate level, +- hard-mask unsafe retrieve candidates, +- switch from reveal to retrieve once feasibility is met, +- use task-specific bag and cloth readiness criteria, +- prefer reveal macros early and retrieve later. + +## What Was Actually Evaluated + +Two different kinds of evidence are included. + +### 1. Trusted General-Task Anchor + +This was kept narrow on purpose because only `dual_push_buttons` was trusted on this setup. + +Trusted anchor evidence: + +- official AnyBimanual local anchor summary on `dual_push_buttons`: + - `25` episodes + - success `0.96` +- live rerun on this RunPod: + - `5` episodes + - scores `[0, 100, 100, 0, 0]` + - mean score `40.0` + +Interpretation: + +- the official trunk path is real and non-trivial on the one stable anchor task, +- this does **not** mean the local custom CLIP trunk was competitive broadly, +- this does **not** validate the other unstable RLBench target-like tasks. + +### 2. Reveal/Retrieve Proxy Benchmark + +This benchmark is useful for mechanism debugging, but it is **not** a real robot/physics benchmark. + +The final reported held-out smoke benchmark used: + +- `12` foliage episodes, +- `12` bag episodes, +- `12` cloth episodes, +- `36` total episodes, +- separate held-out procedural seeds from the adapter train/val splits. + +Results: + +- non-intervention / matched no-op: + - mean success `0.000` + - foliage `0.000` + - bag `0.000` + - cloth `0.000` + - visibility integral `2.275` + - corridor availability `0.0312` + - disturbance cost `0.7433` + +- intervention / adapter active: + - mean success `0.6667` + - foliage `0.6667` + - bag `0.7500` + - cloth `0.5833` + - visibility integral `19.9503` + - corridor availability `0.7974` + - disturbance cost `0.2835` + - reocclusion rate `0.00278` + - planner regret `0.1586` + +The active policy did really intervene on these tasks. It did not just fall back silently to the trunk: + +- all recorded selections on the final held-out smoke run were non-base candidates, +- typical successful pattern: + - foliage: reveal (`pin_canopy`) then `retrieve`, + - bag: reveal (`widen_mouth`) then `retrieve`, + - cloth: reveal (`separate_layer`) then `retrieve`. + +## Important Limitation + +The reveal/retrieve proxy is a procedural synthetic environment, not a contact-rich robot simulator. + +It has: + +- synthetic RGB-D renders, +- internal latent state, +- hand-coded transition rules, +- scripted teacher/oracle supervision. + +It does **not** have: + +- rigid-body or deformable physics, +- actual robot kinematics, +- true contact/grasp simulation, +- a fair end-to-end manipulation distribution for a pretrained trunk. + +Therefore: + +- the proxy result is useful to validate adapter logic, +- the proxy result is **not** sufficient evidence that the trunk or the full system would outperform real baselines on RLBench or on the future custom benchmark. + +## What Was Learned + +The work supports the following conclusions: + +- the structured adapter idea is still alive, +- the explicit reveal-state variables are worth keeping, +- task-routed reveal macros matter, +- retrieve-feasibility gating matters, +- the no-op fallback path for general tasks is sound, +- the old heavy memory/world-model story is not where the strongest evidence lives. + +The work does **not** yet justify: + +- a claim of broad general-task superiority, +- a claim that the current proxy benchmark is a fair end-to-end benchmark, +- a claim that the architecture is validated on realistic target-like sim tasks. + +## Was The Adapter Trained? + +Yes. + +The final proxy adapter checkpoint was trained with: + +- frozen trunk, +- adapter-only updates, +- trained components: + - reveal/state head, + - proposal prior, + - transition model, + - planner/reranker. + +Proxy training data: + +- train: `128` episodes per proxy family, +- val: `32` episodes per proxy family, +- proxy families: + - foliage, + - bag, + - cloth. + +The final headline smoke benchmark was not run on those train/val episodes. It used separate held-out seeds. + +## Was This A Perfect Fairness Story? + +No. + +What is fair in the current export: + +- matched active vs no-op comparisons on the same wrapped checkpoint, +- held-out procedural seeds for the final proxy benchmark, +- exact no-op and generic-task fallback tests. + +What is still missing for a stronger paper-quality comparison: + +1. same-initialization `trunk_only` fine-tuned on the same proxy data, +2. same-initialization `trunk + adapter` fine-tuned on the same proxy data, +3. comparison on held-out proxy seeds, +4. comparison on stable real-sim tasks. + +## What Is Left To Do + +The main remaining work is on real sim benchmarks, not more abstract proxy optimization. + +Priority list: + +1. Train a fair control: + - same initialization, + - `trunk_only` fine-tuned on the same reveal/retrieve proxy data, + - compare against `trunk + adapter`. + +2. Attach the adapter directly to a strong public trunk: + - official AnyBimanual, + - official PerAct2 / RVT, + - or 3D FlowMatch Actor if practical. + +3. Validate on stable real-sim tasks: + - do not trust unstable RLBench tasks with infeasible waypoints, + - rebuild a trustworthy target-like evaluation subset, + - keep `dual_push_buttons` as a regression anchor only. + +4. Add a deformable / garment benchmark: + - this is the most relevant public step toward the future suitcase/clothes benchmark. + +5. Only after that: + - revisit larger RLBench sweeps, + - or collect custom teleop data. + +## Repository Layout + +- `code/` + - cleaned code snapshot used for the handoff +- `artifacts/outputs/` + - current adapter checkpoints and training outputs +- `artifacts/reports/` + - evaluation and debugging reports +- `artifacts/data/reveal_proxy/` + - proxy train/val datasets used by this stage +- `legacy/` + - exact older checkpoints and summaries that the current work depends on +- `docs/` + - audit, iteration, and completion reports from this handoff +- `setup/` + - same-machine environment notes and helper scripts + +## Recommended Use Of This Repo + +Use this repo as: + +- the archival handoff state, +- the codebase to continue adapter work from, +- the source of the current checkpoints and benchmark reports, +- the baseline package before moving to real sim validation. + +Do **not** use it as evidence that the architecture is already validated on realistic manipulation benchmarks. That validation is what should happen next. diff --git a/history/VLAarchtests_previous_README.md b/history/VLAarchtests_previous_README.md new file mode 100644 index 0000000000000000000000000000000000000000..3f8b2f526fd2306bf9548aa1b4b699d9d074f256 --- /dev/null +++ b/history/VLAarchtests_previous_README.md @@ -0,0 +1,172 @@ +--- +tags: + - robotics + - vision-language-action + - bimanual-manipulation + - rlbench + - rgbd +--- + +# VLAarchtests + +Bundle uploaded from `/workspace` runpod sessions dated `2026-03-25 UTC` and `2026-03-26 UTC`. + +## Top-Level Contents + +- `code/reveal_vla_bimanual/` + - project code used for the proxy and RLBench runs in this bundle +- `artifacts/data/reveal_proxy/` + - proxy dataset bundles used by the handoff runs +- `artifacts/outputs/r3d/` + - previously uploaded R3D proxy outputs already present in the bundle +- `artifacts/outputs/r3d_handoff/` + - handoff proxy checkpoints +- `artifacts/outputs/r3d_handoff_phase/` + - phase-supervised handoff proxy checkpoints +- `artifacts/outputs/rlbench_current/` + - RLBench checkpoints from the current session +- `artifacts/reports/` + - proxy and RLBench result files copied from `/workspace/reports` +- `environment/` + - same-machine setup files and validation helpers +- `tests/` + - local test suite +- `handoff/instructions.md` + - instruction file used for the handoff work +- `MODEL_INDEX.md` + - checkpoint and result index +- `results/session_results_20260326.md` + - raw result tables for the `2026-03-25/26` work + +## Code Added Or Updated + +### Core model, memory, planner, and dataset paths + +- `code/reveal_vla_bimanual/models/backbones.py` +- `code/reveal_vla_bimanual/models/multiview_fusion.py` +- `code/reveal_vla_bimanual/models/observation_memory.py` +- `code/reveal_vla_bimanual/models/reveal_head.py` +- `code/reveal_vla_bimanual/models/world_model.py` +- `code/reveal_vla_bimanual/models/action_decoder.py` +- `code/reveal_vla_bimanual/models/planner.py` +- `code/reveal_vla_bimanual/models/policy.py` +- `code/reveal_vla_bimanual/train/losses.py` +- `code/reveal_vla_bimanual/sim_reveal/dataset.py` +- `code/reveal_vla_bimanual/sim_reveal/procedural_envs.py` +- `code/reveal_vla_bimanual/sim_rlbench/dataset.py` + +### Training and evaluation paths + +- `code/reveal_vla_bimanual/train/run_rlbench_experiment.py` +- `code/reveal_vla_bimanual/eval/run_reveal_benchmark.py` +- `code/reveal_vla_bimanual/eval/run_ablations.py` +- `code/reveal_vla_bimanual/eval/run_teacher_audit.py` +- `code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py` +- `code/reveal_vla_bimanual/eval/run_rlbench_knn_eval.py` + +### Added or updated training configs + +- `code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact.yaml` +- `code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial.yaml` +- `code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase.yaml` +- `code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial_phase.yaml` +- `code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_clip_current_valid9.yaml` +- `code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_clip_current_common23.yaml` +- `code/reveal_vla_bimanual/train/configs/rlbench_lift_ball_backbone_only_clip_current_wide.yaml` +- `code/reveal_vla_bimanual/train/configs/rlbench_lift_ball_backbone_only_clip_step1.yaml` +- `code/reveal_vla_bimanual/train/configs/rlbench_push_box_backbone_only_clip_step1.yaml` + +### Test files + +The staged `tests/` directory contains `32` test modules plus `conftest.py`, including: + +- geometry and camera rotation coverage +- phase-label and candidate-ranking coverage +- planner gradient-flow and reocclusion gating coverage +- world-model null-rollout, field-consistency, and task-adapter coverage +- proxy scripted benchmark and teacher-audit coverage + +## Verification + +- local test command: + - `PYTHONPATH=/workspace/VLAarchtests_work/code/reveal_vla_bimanual python -m pytest -q /workspace/VLAarchtests_work/tests` +- result: + - `33 passed` + +## Raw Result Files + +### Proxy and handoff results + +- `artifacts/reports/reveal_smoke_mod/reveal_benchmark.json` +- `artifacts/reports/reveal_smoke_nogeom/reveal_benchmark.json` +- `artifacts/reports/reveal_smoke_noplanner/reveal_benchmark.json` +- `artifacts/reports/reveal_handoff_compare_serious/reveal_benchmark.json` +- `artifacts/reports/reveal_handoff_compare_serious_compact/reveal_benchmark.json` +- `artifacts/reports/reveal_phase_compare_serious_compact/reveal_benchmark.json` +- `artifacts/reports/reveal_phase_compare_serious_spatial_compactwm/reveal_benchmark.json` +- `artifacts/reports/reveal_phase_ablations_compact/ablations.json` +- `artifacts/reports/reveal_teacher_audit_serious/teacher_audit.json` + +### RLBench result files + +- `artifacts/reports/rlbench_dual_buttons_baseline_len100_ep1_ik_rescale/rollout_eval.json` +- `artifacts/reports/rlbench_dual_buttons_common23_len100_ep1_ik_rescale/rollout_eval.json` +- `artifacts/reports/rlbench_push_box_common23_len100_ep1_ik_rescale/rollout_eval.json` +- `artifacts/reports/rlbench_lift_ball_wide_len160_ep1_ik_c1/rollout_eval.json` +- `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1/rollout_eval.json` +- `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1_s005/rollout_eval.json` +- `artifacts/reports/rlbench_push_box_knn_step1_ep1/rollout_eval.json` +- `artifacts/reports/rlbench_push_box_knn_step1_ep5/rollout_eval.json` +- `artifacts/reports/rlbench_push_box_knn_step1_ep5_top1_dense/rollout_eval.json` + +## Raw Result Tables + +### Proxy serious runs + +| Artifact | File | Raw values | +| --- | --- | --- | +| spatial handoff vs released baseline | `artifacts/reports/reveal_handoff_compare_serious/reveal_benchmark.json` | baseline mean success `0.5833`, handoff mean success `0.2167` | +| spatial-trained checkpoint with compact world model vs released baseline | `artifacts/reports/reveal_handoff_compare_serious_compact/reveal_benchmark.json` | baseline mean success `0.5833`, handoff mean success `0.5200` | +| compact-phase vs released baseline | `artifacts/reports/reveal_phase_compare_serious_compact/reveal_benchmark.json` | baseline mean success `0.5833`, compact-phase mean success `0.5133` | +| spatial-phase with compact world model vs released baseline | `artifacts/reports/reveal_phase_compare_serious_spatial_compactwm/reveal_benchmark.json` | baseline mean success `0.5833`, spatial-phase compact-world-model mean success `0.4933` | + +### Proxy ablations + +| Artifact | File | Raw values | +| --- | --- | --- | +| compact-phase ablations | `artifacts/reports/reveal_phase_ablations_compact/ablations.json` | full `0.5133`, `no_geometry` `0.5133`, `no_spatial_memory` `0.4967`, `compact_world_model` `0.5133`, `no_planner` `0.4333`, `gaussian_candidates_only` `0.4667`, `no_task_head` `0.5133`, `no_support_mode_conditioning` `0.5133` | + +### RLBench direct-policy runs + +| Artifact | File | Raw values | +| --- | --- | --- | +| lift-ball wide checkpoint, one-step replanning | `artifacts/reports/rlbench_lift_ball_wide_len160_ep1_ik_c1/rollout_eval.json` | mean success `0.0`, mean return `0.0`, path recoveries `[148]`, noop fallbacks `[11]` | +| push-box step-1 checkpoint, one-step replanning | `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1/rollout_eval.json` | mean success `0.0`, mean return `0.0`, path recoveries `[177]`, noop fallbacks `[0]` | +| push-box step-1 checkpoint, one-step replanning, `delta_scale=0.05` | `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1_s005/rollout_eval.json` | mean success `0.0`, mean return `0.0`, path recoveries `[180]`, noop fallbacks `[0]` | + +### RLBench retrieval runs + +| Artifact | File | Raw values | +| --- | --- | --- | +| push-box kNN, `bank_stride=4`, `top_k=5`, `time_window=8`, `episodes=1` | `artifacts/reports/rlbench_push_box_knn_step1_ep1/rollout_eval.json` | mean success `1.0`, mean return `1.0`, bank size `2815` | +| push-box kNN, `bank_stride=4`, `top_k=5`, `time_window=8`, `episodes=5` | `artifacts/reports/rlbench_push_box_knn_step1_ep5/rollout_eval.json` | successes `[0.0, 1.0, 0.0, 0.0, 0.0]`, mean success `0.2`, bank size `2815` | +| push-box kNN, `bank_stride=1`, `top_k=1`, `time_window=4`, `episodes=5` | `artifacts/reports/rlbench_push_box_knn_step1_ep5_top1_dense/rollout_eval.json` | successes `[0.0, 0.0, 1.0, 1.0, 0.0]`, mean success `0.4`, bank size `11259` | + +## Environment Recreation Files + +- `environment/setup_same_machine.sh` +- `environment/validate_same_machine.sh` +- `environment/run_peract2_13_rollouts.sh` +- `environment/runtime_env_vars.sh` +- `environment/hardware_snapshot.txt` +- `environment/glxinfo_B.txt` +- `environment/upstream_revisions.txt` +- `environment/system_packages_same_machine.txt` +- `environment/rlbench_env_export.yaml` +- `environment/rlbench_env_explicit.txt` +- `environment/rlbench_pip_freeze.txt` +- `environment/reveal_env_export.yaml` +- `environment/reveal_env_explicit.txt` +- `environment/reveal_pip_freeze.txt` + +Detailed raw tables for the `2026-03-25/26` work are in `results/session_results_20260326.md`. diff --git a/reports/anchor_dual_push_smoke_ep1/original_trunk/rollout_eval.md b/reports/anchor_dual_push_smoke_ep1/original_trunk/rollout_eval.md new file mode 100644 index 0000000000000000000000000000000000000000..d3dd498a2d88ac7704e094105b724417170a9ae7 --- /dev/null +++ b/reports/anchor_dual_push_smoke_ep1/original_trunk/rollout_eval.md @@ -0,0 +1,14 @@ +# RLBench Rollout Eval + +- Checkpoint: `/workspace/workspace/VLAarchtests2/outputs/rlbench_dual_push/rlbench_dual_push_backbone_only_clip_chunk8_weighted_seed17/checkpoint_best.pt` +- Plan requested: `False` +- Plan applied: `False` +- Support-mode conditioning: `True` +- Task conditioning: `True` +- Geometry enabled: `True` +- World-model mode: `checkpoint_default` +- Mean success: `0.000` + +## Per-task + +- `bimanual_dual_push_buttons`: mean_success=0.000, returns=[0.0] diff --git a/reports/anchor_dual_push_smoke_ep1/original_trunk/rollout_eval.partial.json b/reports/anchor_dual_push_smoke_ep1/original_trunk/rollout_eval.partial.json new file mode 100644 index 0000000000000000000000000000000000000000..61ba34f05a66de514618314239df6aad027722ae --- /dev/null +++ b/reports/anchor_dual_push_smoke_ep1/original_trunk/rollout_eval.partial.json @@ -0,0 +1,280 @@ +{ + "checkpoint": "/workspace/workspace/VLAarchtests2/outputs/rlbench_dual_push/rlbench_dual_push_backbone_only_clip_chunk8_weighted_seed17/checkpoint_best.pt", + "plan_requested": false, + "plan_applied": false, + "planner_mode": "trainable", + "support_mode_conditioning": true, + "task_conditioning": true, + "geometry_enabled": true, + "world_model_mode": "checkpoint_default", + "episodes_per_task": 1, + "episode_length": 25, + "resolution": 256, + "reset_retries": 20, + "arm_mode": "planning", + "delta_scale": 1.0, + "cameras": [ + "front", + "wrist_left", + "wrist_right" + ], + "tasks": { + "bimanual_dual_push_buttons": { + "task_class": "BimanualDualPushButtons", + "successes": [ + 0.0 + ], + "returns": [ + 0.0 + ], + "path_recoveries": [ + 0 + ], + "noop_fallbacks": [ + 0 + ], + "reset_retries": [ + 0 + ], + "episode_traces": [ + { + "language_goal": "push the olive and the orange buttons", + "steps": [ + { + "timestep": 0, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 1, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 2, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 3, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 4, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 5, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 6, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 7, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 8, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 9, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 10, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 11, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 12, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 13, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 14, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 15, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 16, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 17, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 18, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 19, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 20, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 21, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 22, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 23, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + }, + { + "timestep": 24, + "chosen_macro_mode": null, + "planner_scores": null, + "predicted_reocclusion": null, + "support_mode_conditioning": true, + "path_recoveries": 0, + "noop_fallbacks": 0 + } + ], + "success": 0.0, + "return": 0.0, + "path_recoveries": 0, + "noop_fallbacks": 0 + } + ], + "mean_success": 0.0, + "mean_return": 0.0 + } + }, + "mean_success": 0.0 +} \ No newline at end of file diff --git a/reports/maniskill_bag_bridge_eval_less_bonus_2seed_manual_summary.json b/reports/maniskill_bag_bridge_eval_less_bonus_2seed_manual_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..536dd0207913a810a8a2cbc19ec782dcb538d00d --- /dev/null +++ b/reports/maniskill_bag_bridge_eval_less_bonus_2seed_manual_summary.json @@ -0,0 +1,12 @@ +{ + "delta_active_vs_trunk": 0.07999999999999996, + "delta_ci_run_bootstrap": [ + 0.0, + 0.15999999999999998 + ], + "means": { + "adapter_active_ft": 0.48, + "adapter_noop": 0.04, + "trunk_only_ft": 0.4 + } +} \ No newline at end of file diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/default/adapter_active_ft_seed23.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/default/adapter_active_ft_seed23.json new file mode 100644 index 0000000000000000000000000000000000000000..c753ef7f5e9446486b6033f2f4612b678bd136d3 --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/default/adapter_active_ft_seed23.json @@ -0,0 +1,131 @@ +{ + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "adapter_mode": "adapter_active_ft", + "episodes": 8, + "successes": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "success_rate": 0.125, + "intervention_rate": 0.0, + "non_base_selection_rate": 0.0, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0, + "disturbance_proxy": 0.17255997391683714, + "episode_records": [ + { + "episode_seed": 170089, + "success": true, + "steps": 2, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.38047979133469717 + }, + { + "episode_seed": 170091, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170092, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170095, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170099, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170100, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170102, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170103, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + } + ], + "eval_protocol": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "eval_mode": "adapter_active_ft", + "seed": 17, + "episodes": 8, + "resolution": 224, + "cameras": [ + "front", + "left", + "right" + ], + "observation_stack": "rgb_triplicate_zero_depth", + "action_horizon": 8, + "action_space": "widowx_delta_pose", + "same_test_episodes": true + }, + "proxy_notes": "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the cloth region and must be revealed and extracted to the open side of the table.", + "train_spec": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "model_variant": "adapter_active_ft", + "seed": 23, + "train_demos": 32, + "val_demos": 8, + "init_checkpoint_group": "/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt", + "optimizer": "adamw", + "learning_rate": 0.0001, + "lr_schedule": "constant", + "batch_size": 4, + "augmentations": "none", + "early_stopping_metric": "val_total", + "max_gradient_steps": 174, + "unfreeze_scope": "fusion_memory_decoder", + "dataset_split_id": "cloth_bridge_smoke_v1_seed17", + "same_data_policy": true, + "same_init_policy": true + } +} diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/default/adapter_noop_seed23.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/default/adapter_noop_seed23.json new file mode 100644 index 0000000000000000000000000000000000000000..1cec4a225fd226d13344b7c17b2d81e6453214b3 --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/default/adapter_noop_seed23.json @@ -0,0 +1,110 @@ +{ + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "adapter_mode": "adapter_noop", + "episodes": 8, + "successes": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "success_rate": 0.125, + "intervention_rate": 0.0, + "non_base_selection_rate": 0.0, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0, + "disturbance_proxy": 0.17255997391683714, + "episode_records": [ + { + "episode_seed": 170089, + "success": true, + "steps": 2, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.38047979133469717 + }, + { + "episode_seed": 170091, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170092, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170095, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170099, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170100, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170102, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170103, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + } + ], + "eval_protocol": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "eval_mode": "adapter_noop", + "seed": 17, + "episodes": 8, + "resolution": 224, + "cameras": [ + "front", + "left", + "right" + ], + "observation_stack": "rgb_triplicate_zero_depth", + "action_horizon": 8, + "action_space": "widowx_delta_pose", + "same_test_episodes": true + }, + "proxy_notes": "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the cloth region and must be revealed and extracted to the open side of the table." +} diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/default/public_benchmark_package_summary.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/default/public_benchmark_package_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..19962ea897793eed009cd720c55bd5a42880dcb4 --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/default/public_benchmark_package_summary.json @@ -0,0 +1,60 @@ +{ + "anchor_pass": true, + "available_tracks": [ + "cloth_track" + ], + "ci_above_zero_tracks": [], + "headline_pass": false, + "package_name": "public_reveal_retrieve_package_v1", + "sign_of_life_pass": false, + "sign_of_life_track_count": 0, + "sign_of_life_tracks": [], + "target_macro_average_delta": -0.125, + "tracks": { + "cloth_track": { + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "delta_active_vs_trunk": -0.125, + "delta_active_vs_trunk_ci95": [ + -0.5, + 0.25 + ], + "delta_noop_vs_trunk": -0.125, + "modes": { + "adapter_active_ft": { + "disturbance_proxy": 0.17255997391683714, + "intervention_rate": 0.0, + "mean_success": 0.125, + "non_base_selection_rate": 0.0, + "num_runs": 1, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0 + }, + "adapter_noop": { + "disturbance_proxy": 0.17255997391683714, + "intervention_rate": 0.0, + "mean_success": 0.125, + "non_base_selection_rate": 0.0, + "num_runs": 1, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0 + }, + "trunk_only_ft": { + "disturbance_proxy": 0.24077834880777765, + "intervention_rate": 0.0, + "mean_success": 0.25, + "non_base_selection_rate": 0.0, + "num_runs": 1, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0 + } + }, + "notes": "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the cloth region and must be revealed and extracted to the open side of the table.", + "public_source": "ManiSkill public scene proxy: PutSpoonOnTableClothInScene-v1", + "role": "target", + "signs_of_life": false, + "suite": "maniskill3", + "target_behavior": "reveal the spoon from under the cloth and retrieve it to the open area", + "task_family": "cloth_retrieval_proxy" + } + } +} diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/default/public_benchmark_package_summary.md b/reports/maniskill_cloth_bridge_val_sweep_seed23/default/public_benchmark_package_summary.md new file mode 100644 index 0000000000000000000000000000000000000000..4d26c84eee398fc2433fcb89270d273ade7e8ba8 --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/default/public_benchmark_package_summary.md @@ -0,0 +1,15 @@ +# ManiSkill Cloth Retrieval Smoke Summary + +- benchmark_task: PutSpoonUnderClothRetrievalProxy-v1 +- target_macro_average_delta: -0.125 +- headline_pass: False +- sign_of_life_pass: False + +## cloth_track +- delta_active_vs_trunk: -0.125 +- delta_noop_vs_trunk: -0.125 +- signs_of_life: False +- delta_active_vs_trunk_ci95: [-0.500, 0.250] +- trunk_only_ft: mean_success=0.250 +- adapter_noop: mean_success=0.125 +- adapter_active_ft: mean_success=0.125 diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/default/trunk_only_ft_seed23.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/default/trunk_only_ft_seed23.json new file mode 100644 index 0000000000000000000000000000000000000000..f32314435d421b794654efa954e909577ea9bd8f --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/default/trunk_only_ft_seed23.json @@ -0,0 +1,131 @@ +{ + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "adapter_mode": "trunk_only_ft", + "episodes": 8, + "successes": [ + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0 + ], + "success_rate": 0.25, + "intervention_rate": 0.0, + "non_base_selection_rate": 0.0, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0, + "disturbance_proxy": 0.24077834880777765, + "episode_records": [ + { + "episode_seed": 170089, + "success": true, + "steps": 2, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.38047979133469717 + }, + { + "episode_seed": 170091, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170092, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170095, + "success": true, + "steps": 2, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.2957469991275242 + }, + { + "episode_seed": 170099, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170100, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170102, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170103, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + } + ], + "eval_protocol": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "eval_mode": "trunk_only_ft", + "seed": 17, + "episodes": 8, + "resolution": 224, + "cameras": [ + "front", + "left", + "right" + ], + "observation_stack": "rgb_triplicate_zero_depth", + "action_horizon": 8, + "action_space": "widowx_delta_pose", + "same_test_episodes": true + }, + "proxy_notes": "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the cloth region and must be revealed and extracted to the open side of the table.", + "train_spec": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "model_variant": "trunk_only_ft", + "seed": 23, + "train_demos": 32, + "val_demos": 8, + "init_checkpoint_group": "/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt", + "optimizer": "adamw", + "learning_rate": 0.0001, + "lr_schedule": "constant", + "batch_size": 4, + "augmentations": "none", + "early_stopping_metric": "val_total", + "max_gradient_steps": 174, + "unfreeze_scope": "fusion_memory_decoder", + "dataset_split_id": "cloth_bridge_smoke_v1_seed17", + "same_data_policy": true, + "same_init_policy": true + } +} diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/adapter_active_ft_seed23.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/adapter_active_ft_seed23.json new file mode 100644 index 0000000000000000000000000000000000000000..45f6571b5d97844cf07c51a9495bbc9ef376ef4d --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/adapter_active_ft_seed23.json @@ -0,0 +1,131 @@ +{ + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "adapter_mode": "adapter_active_ft", + "episodes": 8, + "successes": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "success_rate": 0.125, + "intervention_rate": 0.2, + "non_base_selection_rate": 0.06666666666666667, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0, + "disturbance_proxy": 0.14130997391683714, + "episode_records": [ + { + "episode_seed": 170089, + "success": true, + "steps": 2, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.38047979133469717 + }, + { + "episode_seed": 170091, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170092, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170095, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170099, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170100, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170102, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170103, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + } + ], + "eval_protocol": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "eval_mode": "adapter_active_ft", + "seed": 17, + "episodes": 8, + "resolution": 224, + "cameras": [ + "front", + "left", + "right" + ], + "observation_stack": "rgb_triplicate_zero_depth", + "action_horizon": 8, + "action_space": "widowx_delta_pose", + "same_test_episodes": true + }, + "proxy_notes": "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the cloth region and must be revealed and extracted to the open side of the table.", + "train_spec": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "model_variant": "adapter_active_ft", + "seed": 23, + "train_demos": 32, + "val_demos": 8, + "init_checkpoint_group": "/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt", + "optimizer": "adamw", + "learning_rate": 0.0001, + "lr_schedule": "constant", + "batch_size": 4, + "augmentations": "none", + "early_stopping_metric": "val_total", + "max_gradient_steps": 174, + "unfreeze_scope": "fusion_memory_decoder", + "dataset_split_id": "cloth_bridge_smoke_v1_seed17", + "same_data_policy": true, + "same_init_policy": true + } +} diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/adapter_noop_seed23.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/adapter_noop_seed23.json new file mode 100644 index 0000000000000000000000000000000000000000..1cec4a225fd226d13344b7c17b2d81e6453214b3 --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/adapter_noop_seed23.json @@ -0,0 +1,110 @@ +{ + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "adapter_mode": "adapter_noop", + "episodes": 8, + "successes": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "success_rate": 0.125, + "intervention_rate": 0.0, + "non_base_selection_rate": 0.0, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0, + "disturbance_proxy": 0.17255997391683714, + "episode_records": [ + { + "episode_seed": 170089, + "success": true, + "steps": 2, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.38047979133469717 + }, + { + "episode_seed": 170091, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170092, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170095, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170099, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170100, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170102, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170103, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + } + ], + "eval_protocol": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "eval_mode": "adapter_noop", + "seed": 17, + "episodes": 8, + "resolution": 224, + "cameras": [ + "front", + "left", + "right" + ], + "observation_stack": "rgb_triplicate_zero_depth", + "action_horizon": 8, + "action_space": "widowx_delta_pose", + "same_test_episodes": true + }, + "proxy_notes": "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the cloth region and must be revealed and extracted to the open side of the table." +} diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/public_benchmark_package_summary.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/public_benchmark_package_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..aea3709c4df6bea36b42cabf74de9366fb5de937 --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/public_benchmark_package_summary.json @@ -0,0 +1,60 @@ +{ + "anchor_pass": true, + "available_tracks": [ + "cloth_track" + ], + "ci_above_zero_tracks": [], + "headline_pass": false, + "package_name": "public_reveal_retrieve_package_v1", + "sign_of_life_pass": false, + "sign_of_life_track_count": 0, + "sign_of_life_tracks": [], + "target_macro_average_delta": -0.125, + "tracks": { + "cloth_track": { + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "delta_active_vs_trunk": -0.125, + "delta_active_vs_trunk_ci95": [ + -0.5, + 0.25 + ], + "delta_noop_vs_trunk": -0.125, + "modes": { + "adapter_active_ft": { + "disturbance_proxy": 0.14130997391683714, + "intervention_rate": 0.2, + "mean_success": 0.125, + "non_base_selection_rate": 0.06666666666666667, + "num_runs": 1, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0 + }, + "adapter_noop": { + "disturbance_proxy": 0.17255997391683714, + "intervention_rate": 0.0, + "mean_success": 0.125, + "non_base_selection_rate": 0.0, + "num_runs": 1, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0 + }, + "trunk_only_ft": { + "disturbance_proxy": 0.24077834880777765, + "intervention_rate": 0.0, + "mean_success": 0.25, + "non_base_selection_rate": 0.0, + "num_runs": 1, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0 + } + }, + "notes": "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the cloth region and must be revealed and extracted to the open side of the table.", + "public_source": "ManiSkill public scene proxy: PutSpoonOnTableClothInScene-v1", + "role": "target", + "signs_of_life": false, + "suite": "maniskill3", + "target_behavior": "reveal the spoon from under the cloth and retrieve it to the open area", + "task_family": "cloth_retrieval_proxy" + } + } +} diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/public_benchmark_package_summary.md b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/public_benchmark_package_summary.md new file mode 100644 index 0000000000000000000000000000000000000000..4d26c84eee398fc2433fcb89270d273ade7e8ba8 --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/public_benchmark_package_summary.md @@ -0,0 +1,15 @@ +# ManiSkill Cloth Retrieval Smoke Summary + +- benchmark_task: PutSpoonUnderClothRetrievalProxy-v1 +- target_macro_average_delta: -0.125 +- headline_pass: False +- sign_of_life_pass: False + +## cloth_track +- delta_active_vs_trunk: -0.125 +- delta_noop_vs_trunk: -0.125 +- signs_of_life: False +- delta_active_vs_trunk_ci95: [-0.500, 0.250] +- trunk_only_ft: mean_success=0.250 +- adapter_noop: mean_success=0.125 +- adapter_active_ft: mean_success=0.125 diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/trunk_only_ft_seed23.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/trunk_only_ft_seed23.json new file mode 100644 index 0000000000000000000000000000000000000000..f32314435d421b794654efa954e909577ea9bd8f --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh/trunk_only_ft_seed23.json @@ -0,0 +1,131 @@ +{ + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "adapter_mode": "trunk_only_ft", + "episodes": 8, + "successes": [ + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0 + ], + "success_rate": 0.25, + "intervention_rate": 0.0, + "non_base_selection_rate": 0.0, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0, + "disturbance_proxy": 0.24077834880777765, + "episode_records": [ + { + "episode_seed": 170089, + "success": true, + "steps": 2, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.38047979133469717 + }, + { + "episode_seed": 170091, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170092, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170095, + "success": true, + "steps": 2, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.2957469991275242 + }, + { + "episode_seed": 170099, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170100, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170102, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170103, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + } + ], + "eval_protocol": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "eval_mode": "trunk_only_ft", + "seed": 17, + "episodes": 8, + "resolution": 224, + "cameras": [ + "front", + "left", + "right" + ], + "observation_stack": "rgb_triplicate_zero_depth", + "action_horizon": 8, + "action_space": "widowx_delta_pose", + "same_test_episodes": true + }, + "proxy_notes": "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the cloth region and must be revealed and extracted to the open side of the table.", + "train_spec": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "model_variant": "trunk_only_ft", + "seed": 23, + "train_demos": 32, + "val_demos": 8, + "init_checkpoint_group": "/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt", + "optimizer": "adamw", + "learning_rate": 0.0001, + "lr_schedule": "constant", + "batch_size": 4, + "augmentations": "none", + "early_stopping_metric": "val_total", + "max_gradient_steps": 174, + "unfreeze_scope": "fusion_memory_decoder", + "dataset_split_id": "cloth_bridge_smoke_v1_seed17", + "same_data_policy": true, + "same_init_policy": true + } +} diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/adapter_active_ft_seed23.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/adapter_active_ft_seed23.json new file mode 100644 index 0000000000000000000000000000000000000000..45f6571b5d97844cf07c51a9495bbc9ef376ef4d --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/adapter_active_ft_seed23.json @@ -0,0 +1,131 @@ +{ + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "adapter_mode": "adapter_active_ft", + "episodes": 8, + "successes": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "success_rate": 0.125, + "intervention_rate": 0.2, + "non_base_selection_rate": 0.06666666666666667, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0, + "disturbance_proxy": 0.14130997391683714, + "episode_records": [ + { + "episode_seed": 170089, + "success": true, + "steps": 2, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.38047979133469717 + }, + { + "episode_seed": 170091, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170092, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170095, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170099, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170100, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170102, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170103, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + } + ], + "eval_protocol": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "eval_mode": "adapter_active_ft", + "seed": 17, + "episodes": 8, + "resolution": 224, + "cameras": [ + "front", + "left", + "right" + ], + "observation_stack": "rgb_triplicate_zero_depth", + "action_horizon": 8, + "action_space": "widowx_delta_pose", + "same_test_episodes": true + }, + "proxy_notes": "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the cloth region and must be revealed and extracted to the open side of the table.", + "train_spec": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "model_variant": "adapter_active_ft", + "seed": 23, + "train_demos": 32, + "val_demos": 8, + "init_checkpoint_group": "/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt", + "optimizer": "adamw", + "learning_rate": 0.0001, + "lr_schedule": "constant", + "batch_size": 4, + "augmentations": "none", + "early_stopping_metric": "val_total", + "max_gradient_steps": 174, + "unfreeze_scope": "fusion_memory_decoder", + "dataset_split_id": "cloth_bridge_smoke_v1_seed17", + "same_data_policy": true, + "same_init_policy": true + } +} diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/adapter_noop_seed23.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/adapter_noop_seed23.json new file mode 100644 index 0000000000000000000000000000000000000000..1cec4a225fd226d13344b7c17b2d81e6453214b3 --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/adapter_noop_seed23.json @@ -0,0 +1,110 @@ +{ + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "adapter_mode": "adapter_noop", + "episodes": 8, + "successes": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "success_rate": 0.125, + "intervention_rate": 0.0, + "non_base_selection_rate": 0.0, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0, + "disturbance_proxy": 0.17255997391683714, + "episode_records": [ + { + "episode_seed": 170089, + "success": true, + "steps": 2, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.38047979133469717 + }, + { + "episode_seed": 170091, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170092, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170095, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170099, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170100, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170102, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170103, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + } + ], + "eval_protocol": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "eval_mode": "adapter_noop", + "seed": 17, + "episodes": 8, + "resolution": 224, + "cameras": [ + "front", + "left", + "right" + ], + "observation_stack": "rgb_triplicate_zero_depth", + "action_horizon": 8, + "action_space": "widowx_delta_pose", + "same_test_episodes": true + }, + "proxy_notes": "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the cloth region and must be revealed and extracted to the open side of the table." +} diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/public_benchmark_package_summary.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/public_benchmark_package_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..aea3709c4df6bea36b42cabf74de9366fb5de937 --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/public_benchmark_package_summary.json @@ -0,0 +1,60 @@ +{ + "anchor_pass": true, + "available_tracks": [ + "cloth_track" + ], + "ci_above_zero_tracks": [], + "headline_pass": false, + "package_name": "public_reveal_retrieve_package_v1", + "sign_of_life_pass": false, + "sign_of_life_track_count": 0, + "sign_of_life_tracks": [], + "target_macro_average_delta": -0.125, + "tracks": { + "cloth_track": { + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "delta_active_vs_trunk": -0.125, + "delta_active_vs_trunk_ci95": [ + -0.5, + 0.25 + ], + "delta_noop_vs_trunk": -0.125, + "modes": { + "adapter_active_ft": { + "disturbance_proxy": 0.14130997391683714, + "intervention_rate": 0.2, + "mean_success": 0.125, + "non_base_selection_rate": 0.06666666666666667, + "num_runs": 1, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0 + }, + "adapter_noop": { + "disturbance_proxy": 0.17255997391683714, + "intervention_rate": 0.0, + "mean_success": 0.125, + "non_base_selection_rate": 0.0, + "num_runs": 1, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0 + }, + "trunk_only_ft": { + "disturbance_proxy": 0.24077834880777765, + "intervention_rate": 0.0, + "mean_success": 0.25, + "non_base_selection_rate": 0.0, + "num_runs": 1, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0 + } + }, + "notes": "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the cloth region and must be revealed and extracted to the open side of the table.", + "public_source": "ManiSkill public scene proxy: PutSpoonOnTableClothInScene-v1", + "role": "target", + "signs_of_life": false, + "suite": "maniskill3", + "target_behavior": "reveal the spoon from under the cloth and retrieve it to the open area", + "task_family": "cloth_retrieval_proxy" + } + } +} diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/public_benchmark_package_summary.md b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/public_benchmark_package_summary.md new file mode 100644 index 0000000000000000000000000000000000000000..4d26c84eee398fc2433fcb89270d273ade7e8ba8 --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/public_benchmark_package_summary.md @@ -0,0 +1,15 @@ +# ManiSkill Cloth Retrieval Smoke Summary + +- benchmark_task: PutSpoonUnderClothRetrievalProxy-v1 +- target_macro_average_delta: -0.125 +- headline_pass: False +- sign_of_life_pass: False + +## cloth_track +- delta_active_vs_trunk: -0.125 +- delta_noop_vs_trunk: -0.125 +- signs_of_life: False +- delta_active_vs_trunk_ci95: [-0.500, 0.250] +- trunk_only_ft: mean_success=0.250 +- adapter_noop: mean_success=0.125 +- adapter_active_ft: mean_success=0.125 diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/trunk_only_ft_seed23.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/trunk_only_ft_seed23.json new file mode 100644 index 0000000000000000000000000000000000000000..f32314435d421b794654efa954e909577ea9bd8f --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/low_thresh_less_bonus/trunk_only_ft_seed23.json @@ -0,0 +1,131 @@ +{ + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "adapter_mode": "trunk_only_ft", + "episodes": 8, + "successes": [ + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0 + ], + "success_rate": 0.25, + "intervention_rate": 0.0, + "non_base_selection_rate": 0.0, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0, + "disturbance_proxy": 0.24077834880777765, + "episode_records": [ + { + "episode_seed": 170089, + "success": true, + "steps": 2, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.38047979133469717 + }, + { + "episode_seed": 170091, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170092, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170095, + "success": true, + "steps": 2, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.2957469991275242 + }, + { + "episode_seed": 170099, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170100, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170102, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170103, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + } + ], + "eval_protocol": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "eval_mode": "trunk_only_ft", + "seed": 17, + "episodes": 8, + "resolution": 224, + "cameras": [ + "front", + "left", + "right" + ], + "observation_stack": "rgb_triplicate_zero_depth", + "action_horizon": 8, + "action_space": "widowx_delta_pose", + "same_test_episodes": true + }, + "proxy_notes": "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the cloth region and must be revealed and extracted to the open side of the table.", + "train_spec": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "model_variant": "trunk_only_ft", + "seed": 23, + "train_demos": 32, + "val_demos": 8, + "init_checkpoint_group": "/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt", + "optimizer": "adamw", + "learning_rate": 0.0001, + "lr_schedule": "constant", + "batch_size": 4, + "augmentations": "none", + "early_stopping_metric": "val_total", + "max_gradient_steps": 174, + "unfreeze_scope": "fusion_memory_decoder", + "dataset_split_id": "cloth_bridge_smoke_v1_seed17", + "same_data_policy": true, + "same_init_policy": true + } +} diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/summary.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/summary.json new file mode 100644 index 0000000000000000000000000000000000000000..f04a988c01c524eb7c0ad3a60c3fbd0ebdebe46e --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/summary.json @@ -0,0 +1,105 @@ +{ + "default": { + "active": 0.125, + "args": [], + "delta": -0.125, + "intervention_rate": 0.0, + "non_base_selection_rate": 0.0, + "noop": 0.125, + "signs_of_life": false, + "trunk": 0.25 + }, + "low_thresh": { + "active": 0.125, + "args": [ + "--adapter-confidence-threshold", + "0.45", + "--retrieve-access-threshold", + "0.30", + "--retrieve-persistence-threshold", + "0.30", + "--retrieve-support-threshold", + "0.20", + "--planner-retrieve-stage-access-threshold", + "0.30", + "--planner-retrieve-stage-reveal-threshold", + "0.20", + "--planner-retrieve-stage-persistence-threshold", + "0.25", + "--planner-retrieve-stage-support-threshold", + "0.15" + ], + "delta": -0.125, + "intervention_rate": 0.2, + "non_base_selection_rate": 0.06666666666666667, + "noop": 0.125, + "signs_of_life": false, + "trunk": 0.25 + }, + "low_thresh_less_bonus": { + "active": 0.125, + "args": [ + "--adapter-confidence-threshold", + "0.45", + "--retrieve-access-threshold", + "0.30", + "--retrieve-persistence-threshold", + "0.30", + "--retrieve-support-threshold", + "0.20", + "--planner-retrieve-stage-access-threshold", + "0.30", + "--planner-retrieve-stage-reveal-threshold", + "0.20", + "--planner-retrieve-stage-persistence-threshold", + "0.25", + "--planner-retrieve-stage-support-threshold", + "0.15", + "--planner-mode-preference-bonus", + "1.5", + "--planner-premature-retrieve-penalty", + "2.0", + "--planner-premature-insert-penalty", + "1.5" + ], + "delta": -0.125, + "intervention_rate": 0.2, + "non_base_selection_rate": 0.06666666666666667, + "noop": 0.125, + "signs_of_life": false, + "trunk": 0.25 + }, + "very_low_thresh_less_bonus": { + "active": 0.125, + "args": [ + "--adapter-confidence-threshold", + "0.30", + "--retrieve-access-threshold", + "0.20", + "--retrieve-persistence-threshold", + "0.20", + "--retrieve-support-threshold", + "0.10", + "--planner-retrieve-stage-access-threshold", + "0.20", + "--planner-retrieve-stage-reveal-threshold", + "0.10", + "--planner-retrieve-stage-persistence-threshold", + "0.15", + "--planner-retrieve-stage-support-threshold", + "0.10", + "--planner-mode-preference-bonus", + "1.5", + "--planner-premature-retrieve-penalty", + "2.0", + "--planner-premature-insert-penalty", + "1.5" + ], + "delta": -0.125, + "intervention_rate": 1.0, + "non_base_selection_rate": 0.5333333333333333, + "noop": 0.125, + "signs_of_life": false, + "trunk": 0.25 + } +} \ No newline at end of file diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/very_low_thresh_less_bonus/adapter_active_ft_seed23.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/very_low_thresh_less_bonus/adapter_active_ft_seed23.json new file mode 100644 index 0000000000000000000000000000000000000000..f41cebef0f0f2c98a7b5eb31152aea7406b36122 --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/very_low_thresh_less_bonus/adapter_active_ft_seed23.json @@ -0,0 +1,131 @@ +{ + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "adapter_mode": "adapter_active_ft", + "episodes": 8, + "successes": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "success_rate": 0.125, + "intervention_rate": 1.0, + "non_base_selection_rate": 0.5333333333333333, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0, + "disturbance_proxy": 0.14130997391683714, + "episode_records": [ + { + "episode_seed": 170089, + "success": true, + "steps": 2, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.38047979133469717 + }, + { + "episode_seed": 170091, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170092, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170095, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170099, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + }, + { + "episode_seed": 170100, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170102, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25 + }, + { + "episode_seed": 170103, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0 + } + ], + "eval_protocol": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "role": "target", + "eval_mode": "adapter_active_ft", + "seed": 17, + "episodes": 8, + "resolution": 224, + "cameras": [ + "front", + "left", + "right" + ], + "observation_stack": "rgb_triplicate_zero_depth", + "action_horizon": 8, + "action_space": "widowx_delta_pose", + "same_test_episodes": true + }, + "proxy_notes": "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the cloth region and must be revealed and extracted to the open side of the table.", + "train_spec": { + "track_id": "cloth_track", + "suite": "maniskill3", + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "model_variant": "adapter_active_ft", + "seed": 23, + "train_demos": 32, + "val_demos": 8, + "init_checkpoint_group": "/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt", + "optimizer": "adamw", + "learning_rate": 0.0001, + "lr_schedule": "constant", + "batch_size": 4, + "augmentations": "none", + "early_stopping_metric": "val_total", + "max_gradient_steps": 174, + "unfreeze_scope": "fusion_memory_decoder", + "dataset_split_id": "cloth_bridge_smoke_v1_seed17", + "same_data_policy": true, + "same_init_policy": true + } +} diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/very_low_thresh_less_bonus/public_benchmark_package_summary.json b/reports/maniskill_cloth_bridge_val_sweep_seed23/very_low_thresh_less_bonus/public_benchmark_package_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..d75e350104dd20ae75631438ba8124ca930e55fc --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/very_low_thresh_less_bonus/public_benchmark_package_summary.json @@ -0,0 +1,60 @@ +{ + "anchor_pass": true, + "available_tracks": [ + "cloth_track" + ], + "ci_above_zero_tracks": [], + "headline_pass": false, + "package_name": "public_reveal_retrieve_package_v1", + "sign_of_life_pass": false, + "sign_of_life_track_count": 0, + "sign_of_life_tracks": [], + "target_macro_average_delta": -0.125, + "tracks": { + "cloth_track": { + "benchmark_task": "PutSpoonUnderClothRetrievalProxy-v1", + "delta_active_vs_trunk": -0.125, + "delta_active_vs_trunk_ci95": [ + -0.5, + 0.25 + ], + "delta_noop_vs_trunk": -0.125, + "modes": { + "adapter_active_ft": { + "disturbance_proxy": 0.14130997391683714, + "intervention_rate": 1.0, + "mean_success": 0.125, + "non_base_selection_rate": 0.5333333333333333, + "num_runs": 1, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0 + }, + "adapter_noop": { + "disturbance_proxy": 0.17255997391683714, + "intervention_rate": 0.0, + "mean_success": 0.125, + "non_base_selection_rate": 0.0, + "num_runs": 1, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0 + }, + "trunk_only_ft": { + "disturbance_proxy": 0.24077834880777765, + "intervention_rate": 0.0, + "mean_success": 0.25, + "non_base_selection_rate": 0.0, + "num_runs": 1, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0 + } + }, + "notes": "Public ManiSkill bridge scene with custom retrieval initialization. The spoon is placed under the cloth region and must be revealed and extracted to the open side of the table.", + "public_source": "ManiSkill public scene proxy: PutSpoonOnTableClothInScene-v1", + "role": "target", + "signs_of_life": false, + "suite": "maniskill3", + "target_behavior": "reveal the spoon from under the cloth and retrieve it to the open area", + "task_family": "cloth_retrieval_proxy" + } + } +} diff --git a/reports/maniskill_cloth_bridge_val_sweep_seed23/very_low_thresh_less_bonus/public_benchmark_package_summary.md b/reports/maniskill_cloth_bridge_val_sweep_seed23/very_low_thresh_less_bonus/public_benchmark_package_summary.md new file mode 100644 index 0000000000000000000000000000000000000000..4d26c84eee398fc2433fcb89270d273ade7e8ba8 --- /dev/null +++ b/reports/maniskill_cloth_bridge_val_sweep_seed23/very_low_thresh_less_bonus/public_benchmark_package_summary.md @@ -0,0 +1,15 @@ +# ManiSkill Cloth Retrieval Smoke Summary + +- benchmark_task: PutSpoonUnderClothRetrievalProxy-v1 +- target_macro_average_delta: -0.125 +- headline_pass: False +- sign_of_life_pass: False + +## cloth_track +- delta_active_vs_trunk: -0.125 +- delta_noop_vs_trunk: -0.125 +- signs_of_life: False +- delta_active_vs_trunk_ci95: [-0.500, 0.250] +- trunk_only_ft: mean_success=0.250 +- adapter_noop: mean_success=0.125 +- adapter_active_ft: mean_success=0.125 diff --git a/reports/maniskill_pickclutter_smoke_v4_evalprobe_fromv3/public_benchmark_package_summary.json b/reports/maniskill_pickclutter_smoke_v4_evalprobe_fromv3/public_benchmark_package_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..69a7341e98dbd5721e61f1d5647a2c48fad1d2ea --- /dev/null +++ b/reports/maniskill_pickclutter_smoke_v4_evalprobe_fromv3/public_benchmark_package_summary.json @@ -0,0 +1,64 @@ +{ + "anchor_pass": true, + "available_tracks": [ + "occlusion_track" + ], + "ci_above_zero_tracks": [ + "occlusion_track" + ], + "headline_pass": true, + "package_name": "public_reveal_retrieve_package_v1", + "sign_of_life_pass": false, + "sign_of_life_track_count": 1, + "sign_of_life_tracks": [ + "occlusion_track" + ], + "target_macro_average_delta": 0.5600000061094761, + "tracks": { + "occlusion_track": { + "benchmark_task": "PickClutterYCB-v1", + "delta_active_vs_trunk": 0.5600000061094761, + "delta_active_vs_trunk_ci95": [ + 0.40000003576278687, + 0.7000000476837158 + ], + "delta_noop_vs_trunk": 0.0, + "modes": { + "adapter_active_ft": { + "disturbance_proxy": 0.269595600056264, + "intervention_rate": 1.0, + "mean_success": 0.6200000047683716, + "non_base_selection_rate": 1.0, + "num_runs": 1, + "steps_to_first_reveal_or_access": 4.0, + "steps_to_retrieve": 1.0 + }, + "adapter_noop": { + "disturbance_proxy": 0.3563998530939129, + "intervention_rate": 0.0, + "mean_success": 0.05999999865889549, + "non_base_selection_rate": 0.0, + "num_runs": 1, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0 + }, + "trunk_only_ft": { + "disturbance_proxy": 0.35624525584544925, + "intervention_rate": 0.0, + "mean_success": 0.05999999865889549, + "non_base_selection_rate": 0.0, + "num_runs": 1, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0 + } + }, + "notes": "Closest maintained public occluded retrieval task. Treat as the canopy / dense occlusion proxy.", + "public_source": "https://maniskill.readthedocs.io/en/latest/tasks/table_top_gripper/index.html", + "role": "target", + "signs_of_life": true, + "suite": "maniskill3", + "target_behavior": "retrieve a target object from dense occluding clutter", + "task_family": "dense_occluded_retrieval" + } + } +} diff --git a/reports/maniskill_pickclutter_smoke_v4_evalprobe_fromv3/trunk_only_ft_seed17.json b/reports/maniskill_pickclutter_smoke_v4_evalprobe_fromv3/trunk_only_ft_seed17.json new file mode 100644 index 0000000000000000000000000000000000000000..c3fa0549259059e5e13353dfaede4472fc723342 --- /dev/null +++ b/reports/maniskill_pickclutter_smoke_v4_evalprobe_fromv3/trunk_only_ft_seed17.json @@ -0,0 +1,508 @@ +{ + "track_id": "occlusion_track", + "suite": "maniskill3", + "benchmark_task": "PickClutterYCB-v1", + "role": "target", + "adapter_mode": "trunk_only_ft", + "episodes": 50, + "successes": [ + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "success_rate": 0.06, + "intervention_rate": 0.0, + "non_base_selection_rate": 0.0, + "steps_to_first_reveal_or_access": 1.0, + "steps_to_retrieve": 4.0, + "disturbance_proxy": 0.35624525584544925, + "episode_records": [ + { + "episode_seed": 172000, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.28559969730049295 + }, + { + "episode_seed": 172001, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.29001158314992637 + }, + { + "episode_seed": 172002, + "success": true, + "steps": 1, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.06032368095836205 + }, + { + "episode_seed": 172003, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.23861962295340078 + }, + { + "episode_seed": 172004, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.03519570753780732 + }, + { + "episode_seed": 172005, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.3575204389510607 + }, + { + "episode_seed": 172006, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.1659683484007246 + }, + { + "episode_seed": 172007, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.4146749790165716 + }, + { + "episode_seed": 172008, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.06392001453389996 + }, + { + "episode_seed": 172009, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.6996338509148421 + }, + { + "episode_seed": 172010, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.10689976753049545 + }, + { + "episode_seed": 172011, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.0016430780521566508 + }, + { + "episode_seed": 172012, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.1448332825461624 + }, + { + "episode_seed": 172013, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.14780844103161989 + }, + { + "episode_seed": 172014, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.06201380724719842 + }, + { + "episode_seed": 172015, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.5304453840174878 + }, + { + "episode_seed": 172016, + "success": true, + "steps": 1, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 1.0 + }, + { + "episode_seed": 172017, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.10272666234643069 + }, + { + "episode_seed": 172018, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.1404018832372511 + }, + { + "episode_seed": 172019, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.8198742053409427 + }, + { + "episode_seed": 172020, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.1396820557128413 + }, + { + "episode_seed": 172021, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.2780825978326007 + }, + { + "episode_seed": 172022, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.2770370769421504 + }, + { + "episode_seed": 172023, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.6787033615992542 + }, + { + "episode_seed": 172024, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.8743992599497308 + }, + { + "episode_seed": 172025, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.5640193824369 + }, + { + "episode_seed": 172026, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.14783363982945374 + }, + { + "episode_seed": 172027, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.5881037823615909 + }, + { + "episode_seed": 172028, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.06078995146632238 + }, + { + "episode_seed": 172029, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 2.7627698386822882e-05 + }, + { + "episode_seed": 172030, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.571195980759569 + }, + { + "episode_seed": 172031, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.3971142250075399 + }, + { + "episode_seed": 172032, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.45050146941049607 + }, + { + "episode_seed": 172033, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.23038665830648952 + }, + { + "episode_seed": 172034, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.345361904592058 + }, + { + "episode_seed": 172035, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.8075275744968867 + }, + { + "episode_seed": 172036, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.1419961905008904 + }, + { + "episode_seed": 172037, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.1756078436771844 + }, + { + "episode_seed": 172038, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.6760036004923412 + }, + { + "episode_seed": 172039, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.5103615745563085 + }, + { + "episode_seed": 172040, + "success": true, + "steps": 1, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.8978104284286321 + }, + { + "episode_seed": 172041, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.18917643593059807 + }, + { + "episode_seed": 172042, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.40701857845038436 + }, + { + "episode_seed": 172043, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.09000785092387738 + }, + { + "episode_seed": 172044, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.25009696686076754 + }, + { + "episode_seed": 172045, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.6899098007084525 + }, + { + "episode_seed": 172046, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.2933608814023101 + }, + { + "episode_seed": 172047, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.6717945656777323 + }, + { + "episode_seed": 172048, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.5704154010675708 + }, + { + "episode_seed": 172049, + "success": false, + "steps": 4, + "first_reveal_step": 1, + "first_retrieve_step": null, + "episode_disturbance": 0.16982169012631054 + } + ], + "eval_protocol": { + "track_id": "occlusion_track", + "suite": "maniskill3", + "benchmark_task": "PickClutterYCB-v1", + "role": "target", + "eval_mode": "trunk_only_ft", + "seed": 17, + "episodes": 50, + "resolution": 224, + "cameras": [ + "front", + "left", + "right" + ], + "observation_stack": "rgbd_3cam", + "action_horizon": 8, + "action_space": "bimanual_delta_pose", + "same_test_episodes": true + }, + "train_spec": { + "track_id": "occlusion_track", + "suite": "maniskill3", + "benchmark_task": "PickClutterYCB-v1", + "model_variant": "trunk_only_ft", + "seed": 17, + "train_demos": 32, + "val_demos": 8, + "init_checkpoint_group": "/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt", + "optimizer": "adamw", + "learning_rate": 0.0001, + "lr_schedule": "constant", + "batch_size": 4, + "augmentations": "none", + "early_stopping_metric": "val_total", + "max_gradient_steps": 144, + "unfreeze_scope": "fusion_memory_decoder", + "dataset_split_id": "pickclutter_smoke_v3_seed17", + "same_data_policy": true, + "same_init_policy": true + } +} diff --git a/reports/repaired_dual_push_chunk8_ep3/rollout_eval.md b/reports/repaired_dual_push_chunk8_ep3/rollout_eval.md new file mode 100644 index 0000000000000000000000000000000000000000..f3b369ad14f9a8f1d695ffd834c2ee7d4e861a2f --- /dev/null +++ b/reports/repaired_dual_push_chunk8_ep3/rollout_eval.md @@ -0,0 +1,14 @@ +# RLBench Rollout Eval + +- Checkpoint: `/workspace/workspace/VLAarchtests2/outputs/rlbench_dual_push/rlbench_dual_push_backbone_only_clip_chunk8_weighted_seed17/checkpoint_best.pt` +- Plan requested: `False` +- Plan applied: `False` +- Support-mode conditioning: `True` +- Task conditioning: `True` +- Geometry enabled: `True` +- World-model mode: `checkpoint_default` +- Mean success: `0.000` + +## Per-task + +- `bimanual_dual_push_buttons`: error=The call failed on the V-REP side. Return value: -1