Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proposal_alignment_diagnostics_smoke/proposal_alignment_diagnostics.json +358 -0
- code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/status.md +58 -0
- code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/summary.json +42 -0
- code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter3/default/reveal_benchmark.json +0 -0
- code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter3/ignore_proposal_logits_in_shortlist/reveal_benchmark.json +0 -0
- code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/bag_fixed_default/reveal_benchmark.json +0 -0
- code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/bag_fixed_default/reveal_benchmark.md +15 -0
- code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/cloth_fixed_default/reveal_benchmark.json +0 -0
- code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/cloth_fixed_default/reveal_benchmark.md +15 -0
- code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/foliage_fixed_default/reveal_benchmark.json +0 -0
- code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/foliage_fixed_default/reveal_benchmark.md +15 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/.gitignore +12 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/README.md +84 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/adapter_stack.md +67 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/upstream_pins.md +24 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/xorg.rtx6000.conf +32 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/envs/reveal310.yaml +38 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/envs/rlbench310.yaml +50 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__init__.py +3 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/ablations.py +7 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/compare_rlbench_sweeps.py +143 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/compose_task_routed_proxy_summary.py +100 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/protocols.py +41 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_anybimanual_anchor_eval.py +179 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pyproject.toml +32 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__init__.py +15 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/base.py +32 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/build_task_specialized_episode_specs.py +73 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/dataset.py +634 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/generate_dataset.py +50 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/isaac_smoke.py +29 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/isaac_wrapper.py +16 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/labels.py +61 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/procedural_envs.py +1389 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/teachers.py +41 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/base.yaml +20 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/model/backbone_only.yaml +26 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_adapter_wrapped_clip_base_fast.yaml +92 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_adapter_wrapped_clip_rank_only_rebuild128.yaml +91 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_backbone_only_clip.yaml +97 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_backbone_only_smoke.yaml +100 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_nodepth.yaml +16 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_noplanner.yaml +16 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_norolesym.yaml +16 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_nowm.yaml +16 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage1_clip.yaml +72 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage1_dummy.yaml +75 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage2_clip.yaml +18 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage2_dummy.yaml +17 -0
- code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd.yaml +18 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proposal_alignment_diagnostics_smoke/proposal_alignment_diagnostics.json
ADDED
|
@@ -0,0 +1,358 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt",
|
| 3 |
+
"dataset_path": "/workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase.pt",
|
| 4 |
+
"teacher_macro_names_by_task": {
|
| 5 |
+
"foliage": [
|
| 6 |
+
"teacher",
|
| 7 |
+
"pin_canopy",
|
| 8 |
+
"maintain_gap",
|
| 9 |
+
"premature_retrieve",
|
| 10 |
+
"reveal_with_release",
|
| 11 |
+
"wrong_side_reveal",
|
| 12 |
+
"foliage_immediate_reocclusion",
|
| 13 |
+
"over_disturbance"
|
| 14 |
+
]
|
| 15 |
+
},
|
| 16 |
+
"overall": {
|
| 17 |
+
"samples": 8,
|
| 18 |
+
"teacher_oracle_top1_accuracy": 1.0,
|
| 19 |
+
"proposal_teacher_utility_spearman": 0.9880235200593538,
|
| 20 |
+
"slotwise_reconstruction_mse": [
|
| 21 |
+
0.030513444915413857,
|
| 22 |
+
0.12480158358812332,
|
| 23 |
+
0.12676900625228882,
|
| 24 |
+
0.23568345606327057,
|
| 25 |
+
0.1129273921251297,
|
| 26 |
+
0.18383629620075226,
|
| 27 |
+
0.11721545457839966,
|
| 28 |
+
0.24060414731502533
|
| 29 |
+
],
|
| 30 |
+
"slotwise_best_match_mse": [
|
| 31 |
+
0.030513444915413857,
|
| 32 |
+
0.08985880762338638,
|
| 33 |
+
0.07267005741596222,
|
| 34 |
+
0.08659016340970993,
|
| 35 |
+
0.08734967559576035,
|
| 36 |
+
0.08924143761396408,
|
| 37 |
+
0.0867096558213234,
|
| 38 |
+
0.09597492218017578
|
| 39 |
+
],
|
| 40 |
+
"diagonal_reconstruction_mse": 0.14654386043548584,
|
| 41 |
+
"best_match_reconstruction_mse": 0.0798635184764862,
|
| 42 |
+
"teacher_slot_coverage_rate": [
|
| 43 |
+
1.0,
|
| 44 |
+
0.25,
|
| 45 |
+
0.0,
|
| 46 |
+
0.0,
|
| 47 |
+
0.375,
|
| 48 |
+
0.0,
|
| 49 |
+
0.0,
|
| 50 |
+
0.0
|
| 51 |
+
],
|
| 52 |
+
"proposal_slot_teacher_confusion": [
|
| 53 |
+
[
|
| 54 |
+
8,
|
| 55 |
+
0,
|
| 56 |
+
0,
|
| 57 |
+
0,
|
| 58 |
+
0,
|
| 59 |
+
0,
|
| 60 |
+
0,
|
| 61 |
+
0
|
| 62 |
+
],
|
| 63 |
+
[
|
| 64 |
+
6,
|
| 65 |
+
2,
|
| 66 |
+
0,
|
| 67 |
+
0,
|
| 68 |
+
0,
|
| 69 |
+
0,
|
| 70 |
+
0,
|
| 71 |
+
0
|
| 72 |
+
],
|
| 73 |
+
[
|
| 74 |
+
8,
|
| 75 |
+
0,
|
| 76 |
+
0,
|
| 77 |
+
0,
|
| 78 |
+
0,
|
| 79 |
+
0,
|
| 80 |
+
0,
|
| 81 |
+
0
|
| 82 |
+
],
|
| 83 |
+
[
|
| 84 |
+
8,
|
| 85 |
+
0,
|
| 86 |
+
0,
|
| 87 |
+
0,
|
| 88 |
+
0,
|
| 89 |
+
0,
|
| 90 |
+
0,
|
| 91 |
+
0
|
| 92 |
+
],
|
| 93 |
+
[
|
| 94 |
+
5,
|
| 95 |
+
0,
|
| 96 |
+
0,
|
| 97 |
+
0,
|
| 98 |
+
3,
|
| 99 |
+
0,
|
| 100 |
+
0,
|
| 101 |
+
0
|
| 102 |
+
],
|
| 103 |
+
[
|
| 104 |
+
7,
|
| 105 |
+
1,
|
| 106 |
+
0,
|
| 107 |
+
0,
|
| 108 |
+
0,
|
| 109 |
+
0,
|
| 110 |
+
0,
|
| 111 |
+
0
|
| 112 |
+
],
|
| 113 |
+
[
|
| 114 |
+
5,
|
| 115 |
+
0,
|
| 116 |
+
0,
|
| 117 |
+
0,
|
| 118 |
+
3,
|
| 119 |
+
0,
|
| 120 |
+
0,
|
| 121 |
+
0
|
| 122 |
+
],
|
| 123 |
+
[
|
| 124 |
+
8,
|
| 125 |
+
0,
|
| 126 |
+
0,
|
| 127 |
+
0,
|
| 128 |
+
0,
|
| 129 |
+
0,
|
| 130 |
+
0,
|
| 131 |
+
0
|
| 132 |
+
]
|
| 133 |
+
],
|
| 134 |
+
"proposal_slot_best_teacher_slot": [
|
| 135 |
+
{
|
| 136 |
+
"proposal_slot": 0,
|
| 137 |
+
"best_teacher_slot": 0,
|
| 138 |
+
"best_teacher_slot_rate": 1.0,
|
| 139 |
+
"support": 8
|
| 140 |
+
},
|
| 141 |
+
{
|
| 142 |
+
"proposal_slot": 1,
|
| 143 |
+
"best_teacher_slot": 0,
|
| 144 |
+
"best_teacher_slot_rate": 0.75,
|
| 145 |
+
"support": 8
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"proposal_slot": 2,
|
| 149 |
+
"best_teacher_slot": 0,
|
| 150 |
+
"best_teacher_slot_rate": 1.0,
|
| 151 |
+
"support": 8
|
| 152 |
+
},
|
| 153 |
+
{
|
| 154 |
+
"proposal_slot": 3,
|
| 155 |
+
"best_teacher_slot": 0,
|
| 156 |
+
"best_teacher_slot_rate": 1.0,
|
| 157 |
+
"support": 8
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"proposal_slot": 4,
|
| 161 |
+
"best_teacher_slot": 0,
|
| 162 |
+
"best_teacher_slot_rate": 0.625,
|
| 163 |
+
"support": 8
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"proposal_slot": 5,
|
| 167 |
+
"best_teacher_slot": 0,
|
| 168 |
+
"best_teacher_slot_rate": 0.875,
|
| 169 |
+
"support": 8
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"proposal_slot": 6,
|
| 173 |
+
"best_teacher_slot": 0,
|
| 174 |
+
"best_teacher_slot_rate": 0.625,
|
| 175 |
+
"support": 8
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"proposal_slot": 7,
|
| 179 |
+
"best_teacher_slot": 0,
|
| 180 |
+
"best_teacher_slot_rate": 1.0,
|
| 181 |
+
"support": 8
|
| 182 |
+
}
|
| 183 |
+
],
|
| 184 |
+
"proposal_candidate_pairwise_l2": 2.573041468858719
|
| 185 |
+
},
|
| 186 |
+
"by_task": {
|
| 187 |
+
"foliage": {
|
| 188 |
+
"samples": 8,
|
| 189 |
+
"teacher_oracle_top1_accuracy": 1.0,
|
| 190 |
+
"proposal_teacher_utility_spearman": 0.9880235200593538,
|
| 191 |
+
"slotwise_reconstruction_mse": [
|
| 192 |
+
0.030513444915413857,
|
| 193 |
+
0.12480158358812332,
|
| 194 |
+
0.12676900625228882,
|
| 195 |
+
0.23568345606327057,
|
| 196 |
+
0.1129273921251297,
|
| 197 |
+
0.18383629620075226,
|
| 198 |
+
0.11721545457839966,
|
| 199 |
+
0.24060414731502533
|
| 200 |
+
],
|
| 201 |
+
"slotwise_best_match_mse": [
|
| 202 |
+
0.030513444915413857,
|
| 203 |
+
0.08985880762338638,
|
| 204 |
+
0.07267005741596222,
|
| 205 |
+
0.08659016340970993,
|
| 206 |
+
0.08734967559576035,
|
| 207 |
+
0.08924143761396408,
|
| 208 |
+
0.0867096558213234,
|
| 209 |
+
0.09597492218017578
|
| 210 |
+
],
|
| 211 |
+
"diagonal_reconstruction_mse": 0.14654386043548584,
|
| 212 |
+
"best_match_reconstruction_mse": 0.0798635184764862,
|
| 213 |
+
"teacher_slot_coverage_rate": [
|
| 214 |
+
1.0,
|
| 215 |
+
0.25,
|
| 216 |
+
0.0,
|
| 217 |
+
0.0,
|
| 218 |
+
0.375,
|
| 219 |
+
0.0,
|
| 220 |
+
0.0,
|
| 221 |
+
0.0
|
| 222 |
+
],
|
| 223 |
+
"proposal_slot_teacher_confusion": [
|
| 224 |
+
[
|
| 225 |
+
8,
|
| 226 |
+
0,
|
| 227 |
+
0,
|
| 228 |
+
0,
|
| 229 |
+
0,
|
| 230 |
+
0,
|
| 231 |
+
0,
|
| 232 |
+
0
|
| 233 |
+
],
|
| 234 |
+
[
|
| 235 |
+
6,
|
| 236 |
+
2,
|
| 237 |
+
0,
|
| 238 |
+
0,
|
| 239 |
+
0,
|
| 240 |
+
0,
|
| 241 |
+
0,
|
| 242 |
+
0
|
| 243 |
+
],
|
| 244 |
+
[
|
| 245 |
+
8,
|
| 246 |
+
0,
|
| 247 |
+
0,
|
| 248 |
+
0,
|
| 249 |
+
0,
|
| 250 |
+
0,
|
| 251 |
+
0,
|
| 252 |
+
0
|
| 253 |
+
],
|
| 254 |
+
[
|
| 255 |
+
8,
|
| 256 |
+
0,
|
| 257 |
+
0,
|
| 258 |
+
0,
|
| 259 |
+
0,
|
| 260 |
+
0,
|
| 261 |
+
0,
|
| 262 |
+
0
|
| 263 |
+
],
|
| 264 |
+
[
|
| 265 |
+
5,
|
| 266 |
+
0,
|
| 267 |
+
0,
|
| 268 |
+
0,
|
| 269 |
+
3,
|
| 270 |
+
0,
|
| 271 |
+
0,
|
| 272 |
+
0
|
| 273 |
+
],
|
| 274 |
+
[
|
| 275 |
+
7,
|
| 276 |
+
1,
|
| 277 |
+
0,
|
| 278 |
+
0,
|
| 279 |
+
0,
|
| 280 |
+
0,
|
| 281 |
+
0,
|
| 282 |
+
0
|
| 283 |
+
],
|
| 284 |
+
[
|
| 285 |
+
5,
|
| 286 |
+
0,
|
| 287 |
+
0,
|
| 288 |
+
0,
|
| 289 |
+
3,
|
| 290 |
+
0,
|
| 291 |
+
0,
|
| 292 |
+
0
|
| 293 |
+
],
|
| 294 |
+
[
|
| 295 |
+
8,
|
| 296 |
+
0,
|
| 297 |
+
0,
|
| 298 |
+
0,
|
| 299 |
+
0,
|
| 300 |
+
0,
|
| 301 |
+
0,
|
| 302 |
+
0
|
| 303 |
+
]
|
| 304 |
+
],
|
| 305 |
+
"proposal_slot_best_teacher_slot": [
|
| 306 |
+
{
|
| 307 |
+
"proposal_slot": 0,
|
| 308 |
+
"best_teacher_slot": 0,
|
| 309 |
+
"best_teacher_slot_rate": 1.0,
|
| 310 |
+
"support": 8
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"proposal_slot": 1,
|
| 314 |
+
"best_teacher_slot": 0,
|
| 315 |
+
"best_teacher_slot_rate": 0.75,
|
| 316 |
+
"support": 8
|
| 317 |
+
},
|
| 318 |
+
{
|
| 319 |
+
"proposal_slot": 2,
|
| 320 |
+
"best_teacher_slot": 0,
|
| 321 |
+
"best_teacher_slot_rate": 1.0,
|
| 322 |
+
"support": 8
|
| 323 |
+
},
|
| 324 |
+
{
|
| 325 |
+
"proposal_slot": 3,
|
| 326 |
+
"best_teacher_slot": 0,
|
| 327 |
+
"best_teacher_slot_rate": 1.0,
|
| 328 |
+
"support": 8
|
| 329 |
+
},
|
| 330 |
+
{
|
| 331 |
+
"proposal_slot": 4,
|
| 332 |
+
"best_teacher_slot": 0,
|
| 333 |
+
"best_teacher_slot_rate": 0.625,
|
| 334 |
+
"support": 8
|
| 335 |
+
},
|
| 336 |
+
{
|
| 337 |
+
"proposal_slot": 5,
|
| 338 |
+
"best_teacher_slot": 0,
|
| 339 |
+
"best_teacher_slot_rate": 0.875,
|
| 340 |
+
"support": 8
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"proposal_slot": 6,
|
| 344 |
+
"best_teacher_slot": 0,
|
| 345 |
+
"best_teacher_slot_rate": 0.625,
|
| 346 |
+
"support": 8
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"proposal_slot": 7,
|
| 350 |
+
"best_teacher_slot": 0,
|
| 351 |
+
"best_teacher_slot_rate": 1.0,
|
| 352 |
+
"support": 8
|
| 353 |
+
}
|
| 354 |
+
],
|
| 355 |
+
"proposal_candidate_pairwise_l2": 2.573041468858719
|
| 356 |
+
}
|
| 357 |
+
}
|
| 358 |
+
}
|
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/status.md
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# RVT Overlap Branch Status
|
| 2 |
+
|
| 3 |
+
## Code Changes
|
| 4 |
+
|
| 5 |
+
- added RVT residual output adapter in `/workspace/VLAarchtests/code/reveal_vla_bimanual/models/rvt_backbone.py`
|
| 6 |
+
- added RVT frozen-stage config in `/workspace/VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17.yaml`
|
| 7 |
+
- added RVT upper-layer stage config in `/workspace/VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_rvt_100demo_unfreeze_top2_seed17.yaml`
|
| 8 |
+
- made RLBench init checkpoint loading skip incompatible shapes when `init_strict=false` in `/workspace/VLAarchtests/code/reveal_vla_bimanual/train/run_rlbench_experiment.py`
|
| 9 |
+
- added offline RLBench pickle bootstrap in `/workspace/VLAarchtests/code/reveal_vla_bimanual/sim_rlbench/dataset.py`
|
| 10 |
+
- added overlap task alias mapping in `/workspace/VLAarchtests/code/reveal_vla_bimanual/sim_rlbench/task_resolver.py`
|
| 11 |
+
- added branch summary script in `/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/summarize_rvt_overlap_branch.py`
|
| 12 |
+
- added branch runner in `/workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_rvt_overlap_branch.sh`
|
| 13 |
+
|
| 14 |
+
## Verification
|
| 15 |
+
|
| 16 |
+
- `python -m py_compile` passed for:
|
| 17 |
+
- `models/rvt_backbone.py`
|
| 18 |
+
- `train/run_rlbench_experiment.py`
|
| 19 |
+
- `sim_rlbench/dataset.py`
|
| 20 |
+
- `sim_rlbench/task_resolver.py`
|
| 21 |
+
- `eval/summarize_rvt_overlap_branch.py`
|
| 22 |
+
- `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_rvt_overlap_branch.sh` passed
|
| 23 |
+
- `pytest -q` passed for:
|
| 24 |
+
- `/workspace/VLAarchtests/tests/test_rvt_backbone_forward.py`
|
| 25 |
+
- `/workspace/VLAarchtests/tests/test_rlbench_dataset_rgbd_geometry.py`
|
| 26 |
+
- `/workspace/VLAarchtests/tests/test_eval_toggle_paths_work.py`
|
| 27 |
+
- `/workspace/VLAarchtests/tests/test_rlbench_init_checkpoint.py`
|
| 28 |
+
- `/workspace/VLAarchtests/tests/test_rlbench_pickle_bootstrap.py`
|
| 29 |
+
- `/workspace/VLAarchtests/tests/test_rlbench_task_resolver_aliases.py`
|
| 30 |
+
- `/workspace/VLAarchtests/tests/test_summarize_rvt_overlap_branch.py`
|
| 31 |
+
|
| 32 |
+
## Stage 1 Train
|
| 33 |
+
|
| 34 |
+
- checkpoint: `/workspace/outputs/rlbench_rvt_branch/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17/checkpoint_best.pt`
|
| 35 |
+
- train summary: `/workspace/outputs/rlbench_rvt_branch/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17/summary.json`
|
| 36 |
+
- final train total: `0.043179353826920445`
|
| 37 |
+
- final val total: `0.039591669984665984`
|
| 38 |
+
- train seconds: `2261.2839448451996`
|
| 39 |
+
|
| 40 |
+
## Stage 1 Overlap Eval
|
| 41 |
+
|
| 42 |
+
- rollout summary: `/workspace/reports/rvt_overlap_branch_20260330/evals/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17_noplan_split/rollout_eval.json`
|
| 43 |
+
- `bimanual_push_box`: mean_success=`0.0`, mean_return=`0.0`
|
| 44 |
+
- `bimanual_lift_ball`: mean_success=`0.0`, mean_return=`0.0`
|
| 45 |
+
- `bimanual_dual_push_buttons`: mean_success=`0.0`, mean_return=`0.0`
|
| 46 |
+
- stage1 mean_success=`0.0`
|
| 47 |
+
|
| 48 |
+
## Gate
|
| 49 |
+
|
| 50 |
+
- local AnyBimanual overlap floor: `0.16`
|
| 51 |
+
- public AnyBimanual overlap best: `0.6933333333333334`
|
| 52 |
+
- stage1 clears local floor: `false`
|
| 53 |
+
- stage2 run: `false`
|
| 54 |
+
|
| 55 |
+
## Summary Artifact
|
| 56 |
+
|
| 57 |
+
- `/workspace/VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/summary.json`
|
| 58 |
+
- `/workspace/VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/summary.md`
|
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/summary.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"anybimanual_local_overlap_floor": {
|
| 3 |
+
"path": "/workspace/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json",
|
| 4 |
+
"step": 1000,
|
| 5 |
+
"mean_success": 0.16,
|
| 6 |
+
"per_task_success": {
|
| 7 |
+
"coordinated_push_box": 0.0,
|
| 8 |
+
"coordinated_lift_ball": 0.0,
|
| 9 |
+
"dual_push_buttons": 0.48
|
| 10 |
+
}
|
| 11 |
+
},
|
| 12 |
+
"anybimanual_public_best_overlap": {
|
| 13 |
+
"path": "/workspace/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json",
|
| 14 |
+
"step": 60000,
|
| 15 |
+
"mean_success": 0.6933333333333334,
|
| 16 |
+
"per_task_success": {
|
| 17 |
+
"coordinated_push_box": 0.8,
|
| 18 |
+
"coordinated_lift_ball": 0.32,
|
| 19 |
+
"dual_push_buttons": 0.96
|
| 20 |
+
}
|
| 21 |
+
},
|
| 22 |
+
"stage1_frozen": {
|
| 23 |
+
"path": "/workspace/reports/rvt_overlap_branch_20260330/evals/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17_noplan_split/rollout_eval.json",
|
| 24 |
+
"checkpoint": "/workspace/outputs/rlbench_rvt_branch/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17/checkpoint_best.pt",
|
| 25 |
+
"mean_success": 0.0,
|
| 26 |
+
"per_task_success": {
|
| 27 |
+
"bimanual_push_box": 0.0,
|
| 28 |
+
"bimanual_lift_ball": 0.0,
|
| 29 |
+
"bimanual_dual_push_buttons": 0.0
|
| 30 |
+
},
|
| 31 |
+
"per_task_return": {
|
| 32 |
+
"bimanual_push_box": 0.0,
|
| 33 |
+
"bimanual_lift_ball": 0.0,
|
| 34 |
+
"bimanual_dual_push_buttons": 0.0
|
| 35 |
+
}
|
| 36 |
+
},
|
| 37 |
+
"stage2_unfreeze_top2": null,
|
| 38 |
+
"gates": {
|
| 39 |
+
"stage1_clears_local_floor": false,
|
| 40 |
+
"stage2_clears_local_floor": false
|
| 41 |
+
}
|
| 42 |
+
}
|
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter3/default/reveal_benchmark.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter3/ignore_proposal_logits_in_shortlist/reveal_benchmark.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/bag_fixed_default/reveal_benchmark.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/bag_fixed_default/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## selector_finetune_iter8
|
| 4 |
+
- controller: model
|
| 5 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter8_seed17/checkpoint_best.pt
|
| 6 |
+
- episodes: 100.000
|
| 7 |
+
- mean_success: 0.410
|
| 8 |
+
- visibility_integral: 38.913
|
| 9 |
+
- corridor_availability: 0.816
|
| 10 |
+
- reocclusion_rate: 0.020
|
| 11 |
+
- disturbance_cost: 0.515
|
| 12 |
+
- premature_retrieve_rate: 0.109
|
| 13 |
+
- reocclusion_after_reveal_rate: 0.780
|
| 14 |
+
- planner_regret: 0.127
|
| 15 |
+
- bag_success: 0.410
|
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/cloth_fixed_default/reveal_benchmark.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/cloth_fixed_default/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## selector_finetune_iter8
|
| 4 |
+
- controller: model
|
| 5 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter8_seed17/checkpoint_best.pt
|
| 6 |
+
- episodes: 100.000
|
| 7 |
+
- mean_success: 0.590
|
| 8 |
+
- visibility_integral: 37.920
|
| 9 |
+
- corridor_availability: 0.928
|
| 10 |
+
- reocclusion_rate: 0.000
|
| 11 |
+
- disturbance_cost: 0.206
|
| 12 |
+
- premature_retrieve_rate: 0.113
|
| 13 |
+
- reocclusion_after_reveal_rate: 0.000
|
| 14 |
+
- planner_regret: 0.167
|
| 15 |
+
- cloth_success: 0.590
|
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/foliage_fixed_default/reveal_benchmark.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/foliage_fixed_default/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## selector_finetune_iter8
|
| 4 |
+
- controller: model
|
| 5 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter8_seed17/checkpoint_best.pt
|
| 6 |
+
- episodes: 100.000
|
| 7 |
+
- mean_success: 0.400
|
| 8 |
+
- visibility_integral: 44.134
|
| 9 |
+
- corridor_availability: 0.847
|
| 10 |
+
- reocclusion_rate: 0.034
|
| 11 |
+
- disturbance_cost: 0.302
|
| 12 |
+
- premature_retrieve_rate: 0.110
|
| 13 |
+
- reocclusion_after_reveal_rate: 0.570
|
| 14 |
+
- planner_regret: 0.093
|
| 15 |
+
- foliage_success: 0.400
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/.gitignore
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
.DS_Store
|
| 4 |
+
.mypy_cache/
|
| 5 |
+
.pytest_cache/
|
| 6 |
+
.ruff_cache/
|
| 7 |
+
.venv/
|
| 8 |
+
artifacts/
|
| 9 |
+
outputs/
|
| 10 |
+
logs/
|
| 11 |
+
wandb/
|
| 12 |
+
reports/
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/README.md
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# reveal_vla_bimanual
|
| 2 |
+
|
| 3 |
+
Simulation-first prototype for a language-conditioned bimanual reveal-and-retrieve policy under elastic occlusion.
|
| 4 |
+
|
| 5 |
+
This repo is not a generalist VLA backbone in the RT-2 / OpenVLA / Octo sense. The current contribution is the reveal-state machinery layered on top of a frozen vision-language encoder.
|
| 6 |
+
|
| 7 |
+
This repo is structured around five top-level modules:
|
| 8 |
+
|
| 9 |
+
- `sim_rlbench/`: RLBench2 / PerAct2 wrappers, dataset hooks, camera setup, and benchmark evaluation helpers.
|
| 10 |
+
- `sim_reveal/`: reveal-proxy environments, scripted teachers, and privileged label extraction.
|
| 11 |
+
- `models/`: shared backbone wrappers, multi-view fusion, bimanual decoder, reveal-state head, world model, and planner.
|
| 12 |
+
- `train/`: trainers, losses, checkpointing, and Hydra/YAML configs.
|
| 13 |
+
- `eval/`: benchmark scripts, ablations, metrics, plots, and report generation.
|
| 14 |
+
|
| 15 |
+
Current bootstrap priorities:
|
| 16 |
+
|
| 17 |
+
1. Reproduce the RLBench2 / PerAct2 stack with a fixed 3-camera interface.
|
| 18 |
+
2. Stand up a backbone-only 3-camera policy in the same training/eval harness.
|
| 19 |
+
3. Add reveal-state supervision and short-horizon planning for synthetic reveal proxies.
|
| 20 |
+
|
| 21 |
+
Upstream dependencies are kept in `/workspace/third_party` and pinned in `docs/upstream_pins.md`.
|
| 22 |
+
|
| 23 |
+
## RLBench env A
|
| 24 |
+
|
| 25 |
+
The RLBench / PerAct2 stack is pinned to Python 3.10 and lives in `/workspace/envs/rlbench`.
|
| 26 |
+
|
| 27 |
+
Bring it up with:
|
| 28 |
+
|
| 29 |
+
```bash
|
| 30 |
+
/workspace/reveal_vla_bimanual/scripts/setup_env_a_rlbench.sh
|
| 31 |
+
/workspace/reveal_vla_bimanual/scripts/setup_rlbench_headless_x.sh
|
| 32 |
+
/workspace/reveal_vla_bimanual/scripts/start_rlbench_x.sh
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
Verify GPU GL on the headless display:
|
| 36 |
+
|
| 37 |
+
```bash
|
| 38 |
+
DISPLAY=:99 glxinfo -B
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
Run the RLBench launch/reset/step smoke test:
|
| 42 |
+
|
| 43 |
+
```bash
|
| 44 |
+
env \
|
| 45 |
+
DISPLAY=:99 \
|
| 46 |
+
XDG_RUNTIME_DIR=/tmp/runtime-root \
|
| 47 |
+
COPPELIASIM_ROOT=/workspace/assets/coppeliasim_v4_1_0 \
|
| 48 |
+
LD_LIBRARY_PATH=/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu:/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu/nvidia:/workspace/assets/coppeliasim_v4_1_0 \
|
| 49 |
+
QT_QPA_PLATFORM_PLUGIN_PATH=/workspace/assets/coppeliasim_v4_1_0 \
|
| 50 |
+
/workspace/.tools/micromamba/bin/micromamba run \
|
| 51 |
+
-r /workspace/.micromamba \
|
| 52 |
+
-p /workspace/envs/rlbench \
|
| 53 |
+
python -m sim_rlbench.launch_smoke --headless
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
The working benchmark interface is fixed to three cameras only:
|
| 57 |
+
|
| 58 |
+
- `front`
|
| 59 |
+
- `wrist_left`
|
| 60 |
+
- `wrist_right`
|
| 61 |
+
|
| 62 |
+
The smoke test covers launch, bimanual task reset, canonical observation extraction, and one bimanual action step in `headless=True`, which is the same mode used by the upstream PerAct2-style training stack.
|
| 63 |
+
|
| 64 |
+
Generate the PerAct2-compatible train command for the fixed 3-camera interface with:
|
| 65 |
+
|
| 66 |
+
```bash
|
| 67 |
+
micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
|
| 68 |
+
python -m sim_rlbench.smoke_test --print-train-command
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
Download the published PerAct2 demos into `/workspace/data/rlbench2` with checksum verification:
|
| 72 |
+
|
| 73 |
+
```bash
|
| 74 |
+
micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
|
| 75 |
+
python -m sim_rlbench.dataset_download --resolution 256 --splits train
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
If you want the archives unpacked directly into the demo root expected by RLBench, add `--extract`:
|
| 79 |
+
|
| 80 |
+
```bash
|
| 81 |
+
apt-get install -y squashfs-tools
|
| 82 |
+
micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
|
| 83 |
+
python -m sim_rlbench.dataset_download --resolution 256 --splits train --extract
|
| 84 |
+
```
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/adapter_stack.md
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Adapter Stack
|
| 2 |
+
|
| 3 |
+
This repo now contains a no-op-safe `trunk + adapter` path alongside the legacy monolithic policies.
|
| 4 |
+
|
| 5 |
+
## Main classes
|
| 6 |
+
|
| 7 |
+
- `models/policy.py`
|
| 8 |
+
- `FoundationTrunkPolicy`
|
| 9 |
+
- `ElasticOcclusionAdapter`
|
| 10 |
+
- `AdapterWrappedPolicy`
|
| 11 |
+
|
| 12 |
+
- `models/backbones.py`
|
| 13 |
+
- `NoOpAdapterCompatibleTrunkOutput`
|
| 14 |
+
- `TrunkInterface`
|
| 15 |
+
|
| 16 |
+
- `models/action_decoder.py`
|
| 17 |
+
- `TaskRoutedProposalPrior`
|
| 18 |
+
|
| 19 |
+
- `models/planner.py`
|
| 20 |
+
- `ElasticFeasibilityGate`
|
| 21 |
+
- `ResidualActionReranker`
|
| 22 |
+
- `AdapterPlanner`
|
| 23 |
+
|
| 24 |
+
- `models/world_model.py`
|
| 25 |
+
- `LightweightRevealStateTransitionModel`
|
| 26 |
+
|
| 27 |
+
- `models/observation_memory.py`
|
| 28 |
+
- `RevealStateCache`
|
| 29 |
+
|
| 30 |
+
## Trainer modes
|
| 31 |
+
|
| 32 |
+
`train/trainer.py` now supports:
|
| 33 |
+
|
| 34 |
+
- `policy_type: adapter_wrapped`
|
| 35 |
+
- `policy_type: foundation_trunk`
|
| 36 |
+
|
| 37 |
+
Relevant trainer fields:
|
| 38 |
+
|
| 39 |
+
- `training_regime`
|
| 40 |
+
- `eval_mode`
|
| 41 |
+
- `adapter_mode`
|
| 42 |
+
- `adapter_use_transition_model`
|
| 43 |
+
- `adapter_use_task_conditioning`
|
| 44 |
+
|
| 45 |
+
## Guardrail tests
|
| 46 |
+
|
| 47 |
+
New tests:
|
| 48 |
+
|
| 49 |
+
- `tests/test_trunk_noop_equivalence.py`
|
| 50 |
+
- `tests/test_adapter_gate_blocks_unsafe_retrieve.py`
|
| 51 |
+
- `tests/test_task_specific_loss_masking.py`
|
| 52 |
+
- `tests/test_cloth_specific_metrics_affect_selection.py`
|
| 53 |
+
- `tests/test_general_eval_protocol_is_identical.py`
|
| 54 |
+
|
| 55 |
+
## Config templates
|
| 56 |
+
|
| 57 |
+
- `train/configs/proxy_adapter_wrapped_clip_base.yaml`
|
| 58 |
+
- `train/configs/proxy_adapter_wrapped_clip_rank_only.yaml`
|
| 59 |
+
- `train/configs/proxy_adapter_wrapped_clip_noop_eval.yaml`
|
| 60 |
+
|
| 61 |
+
## Benchmark wrappers
|
| 62 |
+
|
| 63 |
+
- `scripts/run_anchor_adapter_ablations.sh`
|
| 64 |
+
- `scripts/run_proxy_adapter_ablations.sh`
|
| 65 |
+
- `scripts/run_target_like_adapter_subset.sh`
|
| 66 |
+
|
| 67 |
+
All new configs and scripts default to `~/workspace` outputs and reports.
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/upstream_pins.md
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Upstream Pins
|
| 2 |
+
|
| 3 |
+
Pinned on 2026-03-22 in `/workspace/third_party`.
|
| 4 |
+
|
| 5 |
+
Mandatory benchmark stack:
|
| 6 |
+
|
| 7 |
+
- `peract_bimanual`: `bb0232a6ba3fe116566e9568f0c7af980ed6703d`
|
| 8 |
+
- `RLBench`: `8af748c51287989294e00c9c670e3330a0e35ed5`
|
| 9 |
+
- `PyRep`: `b8bd1d7a3182adcd570d001649c0849047ebf197`
|
| 10 |
+
- `YARR`: `6822ff78602c77878b27d4cfe759ce029c67bffb`
|
| 11 |
+
|
| 12 |
+
Optional published baseline:
|
| 13 |
+
|
| 14 |
+
- `AnyBimanual`: `76024e48b0e9489101459e85bc909c126ec581b4`
|
| 15 |
+
|
| 16 |
+
Reveal-proxy stack candidate:
|
| 17 |
+
|
| 18 |
+
- `IsaacLab`: `v2.3.1` was cloned for inspection, but it targets Python 3.11 and Isaac Sim 5.x.
|
| 19 |
+
- For the frozen project scope of Python 3.10 on Ubuntu 22.04, env B should stay on an Isaac Sim 4.5-compatible Isaac Lab release instead of the latest branch.
|
| 20 |
+
|
| 21 |
+
Notes:
|
| 22 |
+
|
| 23 |
+
- `peract_bimanual` defaults to 6 cameras and older Python/Torch pins. This repo overrides camera selection and environment creation rather than running the upstream install scripts unchanged.
|
| 24 |
+
- RLBench headless execution on this RunPod host will require an X server setup because the base image does not currently ship `X`, `xvfb`, or `nvidia-xconfig`.
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/xorg.rtx6000.conf
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Section "ServerLayout"
|
| 2 |
+
Identifier "Layout0"
|
| 3 |
+
Screen 0 "Screen0"
|
| 4 |
+
EndSection
|
| 5 |
+
|
| 6 |
+
Section "Monitor"
|
| 7 |
+
Identifier "Monitor0"
|
| 8 |
+
VendorName "Unknown"
|
| 9 |
+
ModelName "Unknown"
|
| 10 |
+
Option "DPMS"
|
| 11 |
+
EndSection
|
| 12 |
+
|
| 13 |
+
Section "Device"
|
| 14 |
+
Identifier "Device0"
|
| 15 |
+
Driver "nvidia"
|
| 16 |
+
VendorName "NVIDIA Corporation"
|
| 17 |
+
BusID "PCI:65:0:0"
|
| 18 |
+
Option "AllowEmptyInitialConfiguration" "True"
|
| 19 |
+
Option "ProbeAllGpus" "False"
|
| 20 |
+
EndSection
|
| 21 |
+
|
| 22 |
+
Section "Screen"
|
| 23 |
+
Identifier "Screen0"
|
| 24 |
+
Device "Device0"
|
| 25 |
+
Monitor "Monitor0"
|
| 26 |
+
DefaultDepth 24
|
| 27 |
+
Option "AllowEmptyInitialConfiguration" "True"
|
| 28 |
+
SubSection "Display"
|
| 29 |
+
Depth 24
|
| 30 |
+
Virtual 1280 1024
|
| 31 |
+
EndSubSection
|
| 32 |
+
EndSection
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/envs/reveal310.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: reveal310
|
| 2 |
+
channels:
|
| 3 |
+
- pytorch
|
| 4 |
+
- nvidia
|
| 5 |
+
- conda-forge
|
| 6 |
+
dependencies:
|
| 7 |
+
- python=3.10
|
| 8 |
+
- pip
|
| 9 |
+
- git
|
| 10 |
+
- cmake
|
| 11 |
+
- ninja
|
| 12 |
+
- make
|
| 13 |
+
- gxx_linux-64
|
| 14 |
+
- pkg-config
|
| 15 |
+
- numpy=1.26.*
|
| 16 |
+
- pandas=2.2.*
|
| 17 |
+
- scipy=1.13.*
|
| 18 |
+
- matplotlib=3.8.*
|
| 19 |
+
- pyyaml=6.*
|
| 20 |
+
- imageio
|
| 21 |
+
- trimesh
|
| 22 |
+
- networkx
|
| 23 |
+
- psutil
|
| 24 |
+
- tqdm
|
| 25 |
+
- pytorch=2.3.1
|
| 26 |
+
- torchvision=0.18.1
|
| 27 |
+
- torchaudio=2.3.1
|
| 28 |
+
- pytorch-cuda=12.1
|
| 29 |
+
- pip:
|
| 30 |
+
- accelerate==0.31.0
|
| 31 |
+
- einops==0.8.0
|
| 32 |
+
- hydra-core==1.3.2
|
| 33 |
+
- omegaconf==2.3.0
|
| 34 |
+
- safetensors==0.4.3
|
| 35 |
+
- tensorboard==2.16.2
|
| 36 |
+
- timm==1.0.7
|
| 37 |
+
- transformers==4.41.2
|
| 38 |
+
- wandb==0.18.0
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/envs/rlbench310.yaml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: rlbench310
|
| 2 |
+
channels:
|
| 3 |
+
- pytorch
|
| 4 |
+
- nvidia
|
| 5 |
+
- conda-forge
|
| 6 |
+
dependencies:
|
| 7 |
+
- python=3.10
|
| 8 |
+
- pip
|
| 9 |
+
- git
|
| 10 |
+
- cmake
|
| 11 |
+
- cffi
|
| 12 |
+
- ninja
|
| 13 |
+
- make
|
| 14 |
+
- gxx_linux-64
|
| 15 |
+
- pkg-config
|
| 16 |
+
- numpy=1.26.*
|
| 17 |
+
- pandas=2.2.*
|
| 18 |
+
- scipy=1.13.*
|
| 19 |
+
- matplotlib=3.8.*
|
| 20 |
+
- pyyaml=6.*
|
| 21 |
+
- h5py
|
| 22 |
+
- imageio
|
| 23 |
+
- pillow
|
| 24 |
+
- psutil
|
| 25 |
+
- tqdm
|
| 26 |
+
- trimesh
|
| 27 |
+
- pytorch=2.3.1
|
| 28 |
+
- torchvision=0.18.1
|
| 29 |
+
- torchaudio=2.3.1
|
| 30 |
+
- pytorch-cuda=12.1
|
| 31 |
+
- pip:
|
| 32 |
+
- accelerate==0.31.0
|
| 33 |
+
- absl-py==2.1.0
|
| 34 |
+
- clip @ git+https://github.com/openai/CLIP.git
|
| 35 |
+
- einops==0.8.0
|
| 36 |
+
- ftfy==6.2.0
|
| 37 |
+
- gym==0.26.2
|
| 38 |
+
- hydra-core==1.3.2
|
| 39 |
+
- natsort==8.4.0
|
| 40 |
+
- omegaconf==2.3.0
|
| 41 |
+
- perceiver-pytorch==0.8.8
|
| 42 |
+
- pyrender==0.1.45
|
| 43 |
+
- pytorch-lamb==1.0.0
|
| 44 |
+
- regex==2024.5.15
|
| 45 |
+
- rich==13.9.4
|
| 46 |
+
- rich-click==1.8.9
|
| 47 |
+
- safetensors==0.4.3
|
| 48 |
+
- tensorboard==2.16.2
|
| 49 |
+
- transformers==4.41.2
|
| 50 |
+
- wandb==0.18.0
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from eval.metrics import BenchmarkMetrics
|
| 2 |
+
|
| 3 |
+
__all__ = ["BenchmarkMetrics"]
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/ablations.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MANDATORY_ABLATIONS: tuple[str, ...] = (
|
| 2 |
+
"no_planner",
|
| 3 |
+
"no_spatial_memory",
|
| 4 |
+
"no_task_head",
|
| 5 |
+
"no_geometry",
|
| 6 |
+
"no_camera_pose",
|
| 7 |
+
)
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/compare_rlbench_sweeps.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Any
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def _load_summary(path: Path) -> dict[str, Any]:
|
| 10 |
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
| 11 |
+
task_scores = {
|
| 12 |
+
task_name: float(task_payload.get("mean_success", 0.0))
|
| 13 |
+
for task_name, task_payload in payload.get("tasks", {}).items()
|
| 14 |
+
}
|
| 15 |
+
task_returns = {
|
| 16 |
+
task_name: float(task_payload.get("mean_return", 0.0))
|
| 17 |
+
for task_name, task_payload in payload.get("tasks", {}).items()
|
| 18 |
+
}
|
| 19 |
+
task_path_recoveries = {
|
| 20 |
+
task_name: float(sum(task_payload.get("path_recoveries", [])) / max(1, len(task_payload.get("path_recoveries", []))))
|
| 21 |
+
for task_name, task_payload in payload.get("tasks", {}).items()
|
| 22 |
+
}
|
| 23 |
+
task_noop_fallbacks = {
|
| 24 |
+
task_name: float(sum(task_payload.get("noop_fallbacks", [])) / max(1, len(task_payload.get("noop_fallbacks", []))))
|
| 25 |
+
for task_name, task_payload in payload.get("tasks", {}).items()
|
| 26 |
+
}
|
| 27 |
+
return {
|
| 28 |
+
"path": str(path),
|
| 29 |
+
"checkpoint": payload.get("checkpoint"),
|
| 30 |
+
"mean_success": float(payload.get("mean_success", 0.0)),
|
| 31 |
+
"mean_return": float(sum(task_returns.values()) / max(1, len(task_returns))),
|
| 32 |
+
"mean_path_recoveries": float(sum(task_path_recoveries.values()) / max(1, len(task_path_recoveries))),
|
| 33 |
+
"mean_noop_fallbacks": float(sum(task_noop_fallbacks.values()) / max(1, len(task_noop_fallbacks))),
|
| 34 |
+
"plan_requested": bool(payload.get("plan_requested", False)),
|
| 35 |
+
"plan_applied": bool(payload.get("plan_applied", False)),
|
| 36 |
+
"no_planner": bool(payload.get("no_planner", False)),
|
| 37 |
+
"no_geometry": bool(payload.get("no_geometry", False)),
|
| 38 |
+
"disable_task_conditioning": bool(payload.get("disable_task_conditioning", False)),
|
| 39 |
+
"compact_world_model": bool(payload.get("compact_world_model", False)),
|
| 40 |
+
"task_scores": task_scores,
|
| 41 |
+
"task_returns": task_returns,
|
| 42 |
+
"task_path_recoveries": task_path_recoveries,
|
| 43 |
+
"task_noop_fallbacks": task_noop_fallbacks,
|
| 44 |
+
"error_tasks": list(payload.get("error_tasks", [])),
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _pairwise_delta(reference: dict[str, Any], candidate: dict[str, Any]) -> dict[str, Any]:
|
| 49 |
+
shared_tasks = sorted(set(reference["task_scores"]) & set(candidate["task_scores"]))
|
| 50 |
+
if not shared_tasks:
|
| 51 |
+
return {
|
| 52 |
+
"shared_task_count": 0,
|
| 53 |
+
"mean_success_delta": 0.0,
|
| 54 |
+
"mean_return_delta": 0.0,
|
| 55 |
+
"mean_path_recoveries_delta": 0.0,
|
| 56 |
+
"mean_noop_fallbacks_delta": 0.0,
|
| 57 |
+
"per_task_delta": {},
|
| 58 |
+
}
|
| 59 |
+
per_task_delta = {
|
| 60 |
+
task_name: float(candidate["task_scores"][task_name] - reference["task_scores"][task_name])
|
| 61 |
+
for task_name in shared_tasks
|
| 62 |
+
}
|
| 63 |
+
return {
|
| 64 |
+
"shared_task_count": len(shared_tasks),
|
| 65 |
+
"mean_success_delta": float(candidate["mean_success"] - reference["mean_success"]),
|
| 66 |
+
"mean_return_delta": float(candidate["mean_return"] - reference["mean_return"]),
|
| 67 |
+
"mean_path_recoveries_delta": float(candidate["mean_path_recoveries"] - reference["mean_path_recoveries"]),
|
| 68 |
+
"mean_noop_fallbacks_delta": float(candidate["mean_noop_fallbacks"] - reference["mean_noop_fallbacks"]),
|
| 69 |
+
"per_task_delta": per_task_delta,
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def _markdown_lines(reference_label: str, comparison: dict[str, Any]) -> list[str]:
|
| 74 |
+
lines = [
|
| 75 |
+
"# RLBench Sweep Comparison",
|
| 76 |
+
"",
|
| 77 |
+
f"- Reference: `{reference_label}`",
|
| 78 |
+
"",
|
| 79 |
+
"## Runs",
|
| 80 |
+
"",
|
| 81 |
+
]
|
| 82 |
+
for label, payload in comparison["runs"].items():
|
| 83 |
+
lines.append(
|
| 84 |
+
f"- `{label}`: mean_success={payload['mean_success']:.3f}, "
|
| 85 |
+
f"mean_return={payload['mean_return']:.3f}, "
|
| 86 |
+
f"mean_path_recoveries={payload['mean_path_recoveries']:.3f}, "
|
| 87 |
+
f"mean_noop_fallbacks={payload['mean_noop_fallbacks']:.3f}, "
|
| 88 |
+
f"plan_applied={payload['plan_applied']}, "
|
| 89 |
+
f"errors={len(payload['error_tasks'])}, "
|
| 90 |
+
f"path=`{payload['path']}`"
|
| 91 |
+
)
|
| 92 |
+
lines.extend(["", "## Pairwise Deltas", ""])
|
| 93 |
+
for label, payload in comparison["pairwise_against_reference"].items():
|
| 94 |
+
lines.append(
|
| 95 |
+
f"- `{label}`: mean_success_delta={payload['mean_success_delta']:.3f}, "
|
| 96 |
+
f"mean_return_delta={payload['mean_return_delta']:.3f}, "
|
| 97 |
+
f"mean_path_recoveries_delta={payload['mean_path_recoveries_delta']:.3f}, "
|
| 98 |
+
f"mean_noop_fallbacks_delta={payload['mean_noop_fallbacks_delta']:.3f}, "
|
| 99 |
+
f"shared_tasks={payload['shared_task_count']}"
|
| 100 |
+
)
|
| 101 |
+
return lines
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def main() -> None:
|
| 105 |
+
parser = argparse.ArgumentParser()
|
| 106 |
+
parser.add_argument("--run", action="append", required=True, help="label=/abs/path/to/rollout_eval.json")
|
| 107 |
+
parser.add_argument("--reference-label", required=True)
|
| 108 |
+
parser.add_argument("--output-dir", required=True)
|
| 109 |
+
args = parser.parse_args()
|
| 110 |
+
|
| 111 |
+
runs: dict[str, dict[str, Any]] = {}
|
| 112 |
+
for item in args.run:
|
| 113 |
+
label, raw_path = item.split("=", 1)
|
| 114 |
+
runs[label] = _load_summary(Path(raw_path).resolve())
|
| 115 |
+
|
| 116 |
+
if args.reference_label not in runs:
|
| 117 |
+
raise ValueError(f"Missing reference label {args.reference_label!r} in provided runs.")
|
| 118 |
+
|
| 119 |
+
reference = runs[args.reference_label]
|
| 120 |
+
comparison = {
|
| 121 |
+
"reference_label": args.reference_label,
|
| 122 |
+
"runs": runs,
|
| 123 |
+
"pairwise_against_reference": {
|
| 124 |
+
label: _pairwise_delta(reference, payload)
|
| 125 |
+
for label, payload in runs.items()
|
| 126 |
+
if label != args.reference_label
|
| 127 |
+
},
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
output_dir = Path(args.output_dir).resolve()
|
| 131 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 132 |
+
(output_dir / "rlbench_comparison.json").write_text(
|
| 133 |
+
json.dumps(comparison, indent=2),
|
| 134 |
+
encoding="utf-8",
|
| 135 |
+
)
|
| 136 |
+
(output_dir / "rlbench_comparison.md").write_text(
|
| 137 |
+
"\n".join(_markdown_lines(args.reference_label, comparison)) + "\n",
|
| 138 |
+
encoding="utf-8",
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
if __name__ == "__main__":
|
| 143 |
+
main()
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/compose_task_routed_proxy_summary.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Any
|
| 7 |
+
|
| 8 |
+
from eval.metrics import summarize_episode_records
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def _load_benchmark_payload(path: Path) -> dict[str, Any]:
|
| 12 |
+
return json.loads(path.read_text(encoding="utf-8"))
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _infer_label(payload: dict[str, Any]) -> str:
|
| 16 |
+
labels = [key for key in payload.keys() if key != "benchmark_config"]
|
| 17 |
+
if len(labels) != 1:
|
| 18 |
+
raise ValueError(f"Expected exactly one model label in benchmark JSON, found {labels}.")
|
| 19 |
+
return labels[0]
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _load_records(path: Path, label: str | None) -> tuple[str, list[dict[str, Any]]]:
|
| 23 |
+
payload = _load_benchmark_payload(path)
|
| 24 |
+
resolved_label = label or _infer_label(payload)
|
| 25 |
+
if resolved_label not in payload:
|
| 26 |
+
raise KeyError(f"Missing label {resolved_label!r} in benchmark JSON {path}.")
|
| 27 |
+
return resolved_label, list(payload[resolved_label].get("episode_records", []))
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _mean_metrics(summary: dict[str, Any]) -> dict[str, float]:
|
| 31 |
+
return {
|
| 32 |
+
"visibility_integral": float(summary.get("visibility_integral", 0.0)),
|
| 33 |
+
"corridor_availability": float(summary.get("corridor_availability", 0.0)),
|
| 34 |
+
"reocclusion_rate": float(summary.get("reocclusion_rate", 0.0)),
|
| 35 |
+
"disturbance_cost": float(summary.get("disturbance_cost", 0.0)),
|
| 36 |
+
"premature_retrieve_rate": float(summary.get("premature_retrieve_rate", 0.0)),
|
| 37 |
+
"reocclusion_after_reveal_rate": float(summary.get("reocclusion_after_reveal_rate", 0.0)),
|
| 38 |
+
"planner_regret": float(summary.get("planner_regret", 0.0)),
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def main() -> None:
|
| 43 |
+
parser = argparse.ArgumentParser()
|
| 44 |
+
parser.add_argument("--source", action="append", required=True, help="task_name=/abs/path/to/reveal_benchmark.json")
|
| 45 |
+
parser.add_argument("--label", action="append", default=[], help="task_name=model_label within the benchmark JSON")
|
| 46 |
+
parser.add_argument("--output-dir", required=True)
|
| 47 |
+
args = parser.parse_args()
|
| 48 |
+
|
| 49 |
+
labels_by_task: dict[str, str] = {}
|
| 50 |
+
for item in args.label:
|
| 51 |
+
task_name, label = item.split("=", maxsplit=1)
|
| 52 |
+
labels_by_task[task_name] = label
|
| 53 |
+
|
| 54 |
+
routing_policy: dict[str, str] = {}
|
| 55 |
+
sources: dict[str, str] = {}
|
| 56 |
+
combined_records: list[dict[str, Any]] = []
|
| 57 |
+
|
| 58 |
+
for item in args.source:
|
| 59 |
+
task_name, raw_path = item.split("=", maxsplit=1)
|
| 60 |
+
path = Path(raw_path).resolve()
|
| 61 |
+
label, records = _load_records(path, labels_by_task.get(task_name))
|
| 62 |
+
task_records = [record for record in records if str(record.get("task_name")) == task_name]
|
| 63 |
+
routing_policy[task_name] = label
|
| 64 |
+
sources[task_name] = str(path)
|
| 65 |
+
combined_records.extend(task_records)
|
| 66 |
+
|
| 67 |
+
summary = summarize_episode_records(combined_records)
|
| 68 |
+
per_task_success = summary.get("per_task_success", {})
|
| 69 |
+
payload = {
|
| 70 |
+
"controller": "task_routed_checkpoint_selection",
|
| 71 |
+
"routing_policy": routing_policy,
|
| 72 |
+
"per_task_success": per_task_success,
|
| 73 |
+
"mean_success": float(sum(per_task_success.values()) / max(1, len(per_task_success))),
|
| 74 |
+
"mean_metrics": _mean_metrics(summary),
|
| 75 |
+
"sources": sources,
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
output_dir = Path(args.output_dir).resolve()
|
| 79 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 80 |
+
(output_dir / "summary.json").write_text(json.dumps(payload, indent=2), encoding="utf-8")
|
| 81 |
+
lines = [
|
| 82 |
+
"# Task-Routed Proxy Controller",
|
| 83 |
+
"",
|
| 84 |
+
"- routing rule: "
|
| 85 |
+
+ ", ".join(f"`{task} -> {label}`" for task, label in routing_policy.items()),
|
| 86 |
+
f"- mean success: `{payload['mean_success']:.4f}`",
|
| 87 |
+
"",
|
| 88 |
+
"## Per-Task Success",
|
| 89 |
+
"",
|
| 90 |
+
]
|
| 91 |
+
for task_name, score in per_task_success.items():
|
| 92 |
+
lines.append(f"- {task_name}: `{score:.2f}`")
|
| 93 |
+
lines.extend(["", "## Sources", ""])
|
| 94 |
+
for task_name, source in sources.items():
|
| 95 |
+
lines.append(f"- {task_name}: `{source}`")
|
| 96 |
+
(output_dir / "summary.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
if __name__ == "__main__":
|
| 100 |
+
main()
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/protocols.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Sequence
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def build_eval_protocol(
|
| 7 |
+
*,
|
| 8 |
+
eval_mode: str,
|
| 9 |
+
task_subset: Sequence[str],
|
| 10 |
+
seed: int = 17,
|
| 11 |
+
episodes: int = 25,
|
| 12 |
+
episode_length: int = 120,
|
| 13 |
+
resolution: int = 256,
|
| 14 |
+
cameras: Sequence[str] = ("front", "left_wrist", "right_wrist"),
|
| 15 |
+
) -> dict[str, object]:
|
| 16 |
+
return {
|
| 17 |
+
"eval_mode": str(eval_mode),
|
| 18 |
+
"task_subset": tuple(str(task) for task in task_subset),
|
| 19 |
+
"seed": int(seed),
|
| 20 |
+
"episodes": int(episodes),
|
| 21 |
+
"episode_length": int(episode_length),
|
| 22 |
+
"resolution": int(resolution),
|
| 23 |
+
"cameras": tuple(str(camera) for camera in cameras),
|
| 24 |
+
"observation_stack": "rgbd_3cam",
|
| 25 |
+
"action_horizon": 8,
|
| 26 |
+
"action_space": "bimanual_delta_pose",
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def protocol_identity_signature(protocol: dict[str, object]) -> tuple[object, ...]:
|
| 31 |
+
return (
|
| 32 |
+
protocol["task_subset"],
|
| 33 |
+
protocol["seed"],
|
| 34 |
+
protocol["episodes"],
|
| 35 |
+
protocol["episode_length"],
|
| 36 |
+
protocol["resolution"],
|
| 37 |
+
protocol["cameras"],
|
| 38 |
+
protocol["observation_stack"],
|
| 39 |
+
protocol["action_horizon"],
|
| 40 |
+
protocol["action_space"],
|
| 41 |
+
)
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_anybimanual_anchor_eval.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import csv
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
import re
|
| 8 |
+
import shutil
|
| 9 |
+
import subprocess
|
| 10 |
+
import sys
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
|
| 13 |
+
from models.action_decoder import infer_task_name_from_text
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def _infer_task_families(tasks: list[str], task_name: str) -> list[str]:
|
| 17 |
+
families = [infer_task_name_from_text(task_name)]
|
| 18 |
+
families.extend(infer_task_name_from_text(task) for task in tasks)
|
| 19 |
+
return sorted(set(families))
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _parse_episode_scores(stdout: str) -> list[float]:
|
| 23 |
+
scores: list[float] = []
|
| 24 |
+
pattern = re.compile(r"Episode\s+\d+\s+\|\s+Score:\s*([0-9]+(?:\.[0-9]+)?)")
|
| 25 |
+
for match in pattern.finditer(stdout):
|
| 26 |
+
scores.append(float(match.group(1)))
|
| 27 |
+
return scores
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _parse_final_score(stdout: str) -> float | None:
|
| 31 |
+
match = re.search(r"Final Score:\s*([0-9]+(?:\.[0-9]+)?)", stdout)
|
| 32 |
+
if match is None:
|
| 33 |
+
return None
|
| 34 |
+
return float(match.group(1))
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _parse_eval_data_score(eval_csv_path: Path) -> float | None:
|
| 38 |
+
if not eval_csv_path.exists():
|
| 39 |
+
return None
|
| 40 |
+
with eval_csv_path.open("r", encoding="utf-8") as handle:
|
| 41 |
+
reader = csv.DictReader(handle)
|
| 42 |
+
last_row: dict[str, str] | None = None
|
| 43 |
+
for row in reader:
|
| 44 |
+
last_row = row
|
| 45 |
+
if last_row is None:
|
| 46 |
+
return None
|
| 47 |
+
value = last_row.get("eval_envs/return")
|
| 48 |
+
if value is None or value == "":
|
| 49 |
+
return None
|
| 50 |
+
return float(value)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def _bootstrap_eval_seed_dir(
|
| 54 |
+
*,
|
| 55 |
+
framework_logdir: Path,
|
| 56 |
+
task_name: str,
|
| 57 |
+
method: str,
|
| 58 |
+
) -> None:
|
| 59 |
+
seed_dir = framework_logdir / task_name / method / "seed0"
|
| 60 |
+
if (seed_dir / "config.yaml").exists():
|
| 61 |
+
return
|
| 62 |
+
release_seed_dir = Path("/workspace/baselines/AnyBimanual_release_eval_live") / task_name / method / "seed0"
|
| 63 |
+
if not (release_seed_dir / "config.yaml").exists():
|
| 64 |
+
return
|
| 65 |
+
seed_dir.mkdir(parents=True, exist_ok=True)
|
| 66 |
+
shutil.copy2(release_seed_dir / "config.yaml", seed_dir / "config.yaml")
|
| 67 |
+
release_weights_dir = release_seed_dir / "weights"
|
| 68 |
+
if release_weights_dir.exists() and not (seed_dir / "weights").exists():
|
| 69 |
+
os.symlink(release_weights_dir, seed_dir / "weights", target_is_directory=True)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def main() -> None:
|
| 73 |
+
parser = argparse.ArgumentParser()
|
| 74 |
+
parser.add_argument("--task-name", required=True, help="Existing AnyBimanual logdir task name, e.g. perlf_release_dual_push_buttons_smoke1")
|
| 75 |
+
parser.add_argument("--tasks", nargs="+", required=True, help="RLBench task module names, e.g. dual_push_buttons")
|
| 76 |
+
parser.add_argument("--adapter-mode", choices=("trunk_only", "adapter_noop", "adapter_active"), default="trunk_only")
|
| 77 |
+
parser.add_argument("--episodes", type=int, default=1)
|
| 78 |
+
parser.add_argument("--eval-type", default="60000")
|
| 79 |
+
parser.add_argument("--output-dir", required=True)
|
| 80 |
+
parser.add_argument("--framework-logdir", default=None)
|
| 81 |
+
parser.add_argument("--demo-path", default="/workspace/baselines/AnyBimanual_subset3_demo_root")
|
| 82 |
+
parser.add_argument("--method", default="PERACT_BC")
|
| 83 |
+
parser.add_argument("--gpu", type=int, default=0)
|
| 84 |
+
args = parser.parse_args()
|
| 85 |
+
|
| 86 |
+
output_dir = Path(args.output_dir)
|
| 87 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 88 |
+
framework_logdir = Path(args.framework_logdir) if args.framework_logdir else (output_dir / "anybimanual_logdir")
|
| 89 |
+
framework_logdir.mkdir(parents=True, exist_ok=True)
|
| 90 |
+
_bootstrap_eval_seed_dir(
|
| 91 |
+
framework_logdir=framework_logdir,
|
| 92 |
+
task_name=args.task_name,
|
| 93 |
+
method=args.method,
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
task_families = _infer_task_families(list(args.tasks), args.task_name)
|
| 97 |
+
passthrough_only = all(family == "generic" for family in task_families)
|
| 98 |
+
if args.adapter_mode == "adapter_active" and not passthrough_only:
|
| 99 |
+
raise NotImplementedError(
|
| 100 |
+
"The AnyBimanual anchor bridge only supports generic-task pass-through active mode for now. "
|
| 101 |
+
f"Resolved task families: {task_families}"
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
command = [
|
| 105 |
+
sys.executable,
|
| 106 |
+
"/workspace/third_party/AnyBimanual/eval.py",
|
| 107 |
+
f"method={args.method}",
|
| 108 |
+
f"framework.logdir={framework_logdir}",
|
| 109 |
+
"framework.start_seed=0",
|
| 110 |
+
f"framework.eval_type={args.eval_type}",
|
| 111 |
+
f"framework.eval_episodes={args.episodes}",
|
| 112 |
+
"framework.eval_envs=1",
|
| 113 |
+
f"framework.gpu={args.gpu}",
|
| 114 |
+
f"rlbench.task_name={args.task_name}",
|
| 115 |
+
f"rlbench.tasks=[{','.join(args.tasks)}]",
|
| 116 |
+
f"rlbench.demo_path={args.demo_path}",
|
| 117 |
+
"rlbench.headless=True",
|
| 118 |
+
"rlbench.gripper_mode=BimanualDiscrete",
|
| 119 |
+
"rlbench.arm_action_mode=BimanualEndEffectorPoseViaPlanning",
|
| 120 |
+
"rlbench.action_mode=BimanualMoveArmThenGripper",
|
| 121 |
+
]
|
| 122 |
+
env = os.environ.copy()
|
| 123 |
+
env.setdefault("DISPLAY", ":99")
|
| 124 |
+
env.setdefault("XDG_RUNTIME_DIR", "/workspace/runtime")
|
| 125 |
+
env.setdefault("COPPELIASIM_ROOT", "/workspace/assets/coppeliasim_v4_1_0")
|
| 126 |
+
env.setdefault("QT_QPA_PLATFORM_PLUGIN_PATH", env["COPPELIASIM_ROOT"])
|
| 127 |
+
env["LD_LIBRARY_PATH"] = f"{env['COPPELIASIM_ROOT']}:{env.get('LD_LIBRARY_PATH', '')}".rstrip(":")
|
| 128 |
+
pythonpath_items = [
|
| 129 |
+
"/workspace/third_party/RLBench",
|
| 130 |
+
"/workspace/third_party/YARR",
|
| 131 |
+
"/workspace/third_party/AnyBimanual",
|
| 132 |
+
"/workspace/reveal_vla_bimanual",
|
| 133 |
+
]
|
| 134 |
+
existing_pythonpath = env.get("PYTHONPATH", "")
|
| 135 |
+
env["PYTHONPATH"] = ":".join(pythonpath_items + ([existing_pythonpath] if existing_pythonpath else []))
|
| 136 |
+
|
| 137 |
+
stdout_path = output_dir / "stdout.txt"
|
| 138 |
+
stderr_path = output_dir / "stderr.txt"
|
| 139 |
+
with stdout_path.open("w", encoding="utf-8") as stdout_handle, stderr_path.open("w", encoding="utf-8") as stderr_handle:
|
| 140 |
+
completed = subprocess.run(
|
| 141 |
+
command,
|
| 142 |
+
env=env,
|
| 143 |
+
text=True,
|
| 144 |
+
stdout=stdout_handle,
|
| 145 |
+
stderr=stderr_handle,
|
| 146 |
+
close_fds=True,
|
| 147 |
+
check=False,
|
| 148 |
+
)
|
| 149 |
+
stdout = stdout_path.read_text(encoding="utf-8")
|
| 150 |
+
stderr = stderr_path.read_text(encoding="utf-8")
|
| 151 |
+
scores = _parse_episode_scores(stdout)
|
| 152 |
+
eval_csv_path = framework_logdir / args.task_name / args.method / "seed0" / "eval_data.csv"
|
| 153 |
+
final_score = _parse_eval_data_score(eval_csv_path)
|
| 154 |
+
if final_score is None:
|
| 155 |
+
final_score = _parse_final_score(stdout)
|
| 156 |
+
payload = {
|
| 157 |
+
"adapter_mode": args.adapter_mode,
|
| 158 |
+
"task_name": args.task_name,
|
| 159 |
+
"tasks": list(args.tasks),
|
| 160 |
+
"task_families": task_families,
|
| 161 |
+
"passthrough_only": passthrough_only,
|
| 162 |
+
"passthrough_reason": "generic_task_family" if passthrough_only else "unsupported_active_family",
|
| 163 |
+
"episodes_requested": int(args.episodes),
|
| 164 |
+
"episode_scores": scores,
|
| 165 |
+
"mean_score": final_score if final_score is not None else ((sum(scores) / float(len(scores))) if scores else 0.0),
|
| 166 |
+
"final_score": final_score,
|
| 167 |
+
"subprocess_returncode": int(completed.returncode),
|
| 168 |
+
"eval_csv_path": str(eval_csv_path),
|
| 169 |
+
"command": command,
|
| 170 |
+
}
|
| 171 |
+
(output_dir / "command.txt").write_text(" ".join(command) + "\n", encoding="utf-8")
|
| 172 |
+
(output_dir / "summary.json").write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
| 173 |
+
print(json.dumps(payload, indent=2))
|
| 174 |
+
if completed.returncode != 0:
|
| 175 |
+
raise SystemExit(completed.returncode)
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
if __name__ == "__main__":
|
| 179 |
+
main()
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pyproject.toml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["setuptools>=68", "wheel"]
|
| 3 |
+
build-backend = "setuptools.build_meta"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "reveal-vla-bimanual"
|
| 7 |
+
version = "0.1.0"
|
| 8 |
+
description = "Language-conditioned bimanual reveal-and-retrieve policy prototype"
|
| 9 |
+
readme = "README.md"
|
| 10 |
+
requires-python = ">=3.10,<3.11"
|
| 11 |
+
dependencies = [
|
| 12 |
+
"accelerate>=0.31.0",
|
| 13 |
+
"einops>=0.7.0",
|
| 14 |
+
"hydra-core>=1.3.2",
|
| 15 |
+
"matplotlib>=3.8.0",
|
| 16 |
+
"numpy>=1.26,<2.0",
|
| 17 |
+
"omegaconf>=2.3.0",
|
| 18 |
+
"pandas>=2.2.0",
|
| 19 |
+
"pyyaml>=6.0.1",
|
| 20 |
+
"safetensors>=0.4.3",
|
| 21 |
+
"tensorboard>=2.16.2",
|
| 22 |
+
"timm>=1.0.7",
|
| 23 |
+
"torch>=2.3.0",
|
| 24 |
+
"torchvision>=0.18.0",
|
| 25 |
+
"transformers>=4.41.0",
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
[tool.setuptools]
|
| 29 |
+
include-package-data = true
|
| 30 |
+
|
| 31 |
+
[tool.setuptools.packages.find]
|
| 32 |
+
include = ["sim_rlbench*", "sim_reveal*", "models*", "train*", "eval*", "pytorch3d*"]
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sim_reveal.base import RevealProxyConfig, RevealState, SupportMode
|
| 2 |
+
from sim_reveal.procedural_envs import ProceduralRevealEnv, available_proxy_names, make_proxy_env
|
| 3 |
+
from sim_reveal.proxy_specs import BAG_PROXY, CLOTH_PROXY, FOLIAGE_PROXY
|
| 4 |
+
|
| 5 |
+
__all__ = [
|
| 6 |
+
"BAG_PROXY",
|
| 7 |
+
"CLOTH_PROXY",
|
| 8 |
+
"FOLIAGE_PROXY",
|
| 9 |
+
"ProceduralRevealEnv",
|
| 10 |
+
"RevealProxyConfig",
|
| 11 |
+
"RevealState",
|
| 12 |
+
"SupportMode",
|
| 13 |
+
"available_proxy_names",
|
| 14 |
+
"make_proxy_env",
|
| 15 |
+
]
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/base.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass, field
|
| 4 |
+
from enum import IntEnum
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class SupportMode(IntEnum):
|
| 10 |
+
HOLD = 0
|
| 11 |
+
TRANSFER = 1
|
| 12 |
+
PASSIVE = 2
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@dataclass
|
| 16 |
+
class RevealState:
|
| 17 |
+
support_mode_logits: np.ndarray
|
| 18 |
+
corridor_logits: np.ndarray
|
| 19 |
+
persistence_horizon: np.ndarray
|
| 20 |
+
disturbance_cost: np.ndarray
|
| 21 |
+
belief_map: np.ndarray | None = None
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
@dataclass
|
| 25 |
+
class RevealProxyConfig:
|
| 26 |
+
name: str
|
| 27 |
+
num_templates: int = 32
|
| 28 |
+
rollout_horizon: int = 5
|
| 29 |
+
max_steps: int = 80
|
| 30 |
+
disturbance_key: str = "disturbance_cost"
|
| 31 |
+
success_key: str = "retrieval_success"
|
| 32 |
+
metadata: dict[str, str] = field(default_factory=dict)
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/build_task_specialized_episode_specs.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
from sim_reveal.proxy_specs import TASK_ID_BY_NAME, TASK_NAME_BY_PROXY_NAME
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def _proxy_name_for_task(task_name: str) -> str:
|
| 11 |
+
for proxy_name, mapped_task_name in TASK_NAME_BY_PROXY_NAME.items():
|
| 12 |
+
if mapped_task_name == task_name:
|
| 13 |
+
return proxy_name
|
| 14 |
+
raise KeyError(f"Unknown task name: {task_name}")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def main() -> None:
|
| 18 |
+
parser = argparse.ArgumentParser()
|
| 19 |
+
parser.add_argument("--task-name", required=True)
|
| 20 |
+
parser.add_argument("--base-seed", type=int, default=0)
|
| 21 |
+
parser.add_argument(
|
| 22 |
+
"--block",
|
| 23 |
+
action="append",
|
| 24 |
+
required=True,
|
| 25 |
+
help="stress_slice,difficulty_bin,count",
|
| 26 |
+
)
|
| 27 |
+
parser.add_argument("--output-path", required=True)
|
| 28 |
+
args = parser.parse_args()
|
| 29 |
+
|
| 30 |
+
task_name = str(args.task_name)
|
| 31 |
+
if task_name not in TASK_ID_BY_NAME:
|
| 32 |
+
raise KeyError(f"Unknown task name: {task_name}")
|
| 33 |
+
proxy_name = _proxy_name_for_task(task_name)
|
| 34 |
+
task_id = TASK_ID_BY_NAME[task_name]
|
| 35 |
+
|
| 36 |
+
specs: list[dict[str, object]] = []
|
| 37 |
+
episode_index = 0
|
| 38 |
+
for block_index, raw_block in enumerate(args.block):
|
| 39 |
+
stress_slice, difficulty_bin, raw_count = [part.strip() for part in raw_block.split(",")]
|
| 40 |
+
count = int(raw_count)
|
| 41 |
+
for sample_index in range(count):
|
| 42 |
+
specs.append(
|
| 43 |
+
{
|
| 44 |
+
"proxy_name": proxy_name,
|
| 45 |
+
"task_name": task_name,
|
| 46 |
+
"task_id": int(task_id),
|
| 47 |
+
"stress_slice": stress_slice,
|
| 48 |
+
"difficulty_bin": difficulty_bin,
|
| 49 |
+
"episode_id": episode_index,
|
| 50 |
+
"episode_index": episode_index,
|
| 51 |
+
"seed": int(args.base_seed) + block_index * 10_000 + sample_index,
|
| 52 |
+
}
|
| 53 |
+
)
|
| 54 |
+
episode_index += 1
|
| 55 |
+
|
| 56 |
+
output_path = Path(args.output_path).resolve()
|
| 57 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 58 |
+
output_path.write_text(json.dumps(specs, indent=2), encoding="utf-8")
|
| 59 |
+
print(
|
| 60 |
+
json.dumps(
|
| 61 |
+
{
|
| 62 |
+
"output_path": str(output_path),
|
| 63 |
+
"task_name": task_name,
|
| 64 |
+
"episodes": len(specs),
|
| 65 |
+
"blocks": args.block,
|
| 66 |
+
},
|
| 67 |
+
indent=2,
|
| 68 |
+
)
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
if __name__ == "__main__":
|
| 73 |
+
main()
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/dataset.py
ADDED
|
@@ -0,0 +1,634 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import pickle
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Any, Callable, Sequence
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from torch import Tensor
|
| 9 |
+
from torch.utils.data import Dataset
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
|
| 13 |
+
from sim_reveal.procedural_envs import available_proxy_names, default_camera_matrices, make_proxy_env, render_views_from_state
|
| 14 |
+
from sim_reveal.proxy_specs import task_id_from_task_name, task_name_from_proxy_name
|
| 15 |
+
|
| 16 |
+
NOLEAK_PROXY_DATASET_VERSION = "reveal_proxy_v5_noleak_actionhist"
|
| 17 |
+
RGBD_PROXY_DATASET_VERSION = "reveal_proxy_v6_rgbd_elastic_state"
|
| 18 |
+
LEGACY_PRIVILEGED_RENDER_KEYS = frozenset(
|
| 19 |
+
{
|
| 20 |
+
"target_template",
|
| 21 |
+
"support_mode",
|
| 22 |
+
"visibility",
|
| 23 |
+
"actor_template",
|
| 24 |
+
"actor_progress",
|
| 25 |
+
"corridor_current",
|
| 26 |
+
}
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def dataset_uses_rgbd(dataset_version: Any) -> bool:
|
| 31 |
+
version = str(dataset_version or "")
|
| 32 |
+
return version.startswith(RGBD_PROXY_DATASET_VERSION)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _assert_noleak_sample(sample: dict[str, Any]) -> None:
|
| 36 |
+
render_state = sample.get("render_state", {})
|
| 37 |
+
leaked_keys = sorted(LEGACY_PRIVILEGED_RENDER_KEYS.intersection(render_state))
|
| 38 |
+
if leaked_keys:
|
| 39 |
+
joined = ", ".join(leaked_keys)
|
| 40 |
+
raise ValueError(
|
| 41 |
+
"Legacy leaked proxy sample detected. Rebuild the dataset with the current "
|
| 42 |
+
f"sim_reveal/procedural_envs.py. Privileged render keys found: {joined}"
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def collect_teacher_dataset(
|
| 47 |
+
proxy_names: Sequence[str] | None = None,
|
| 48 |
+
episodes_per_proxy: int = 32,
|
| 49 |
+
resolution: int = 96,
|
| 50 |
+
seed: int = 0,
|
| 51 |
+
chunk_horizon: int = 8,
|
| 52 |
+
rollout_horizon: int = 5,
|
| 53 |
+
history_steps: int = 2,
|
| 54 |
+
planner_candidates: int = 4,
|
| 55 |
+
dataset_version: str = NOLEAK_PROXY_DATASET_VERSION,
|
| 56 |
+
episode_specs: Sequence[dict[str, Any]] | None = None,
|
| 57 |
+
proposal_target_builder: Callable[[Any, dict[str, Any], dict[str, Any]], dict[str, Any] | None] | None = None,
|
| 58 |
+
) -> dict[str, Any]:
|
| 59 |
+
proxy_names = tuple(proxy_names or available_proxy_names())
|
| 60 |
+
samples: list[dict[str, Any]] = []
|
| 61 |
+
summary: dict[str, dict[str, float]] = {}
|
| 62 |
+
if episode_specs is None:
|
| 63 |
+
resolved_episode_specs = []
|
| 64 |
+
for proxy_offset, proxy_name in enumerate(proxy_names):
|
| 65 |
+
for episode_idx in range(episodes_per_proxy):
|
| 66 |
+
resolved_episode_specs.append(
|
| 67 |
+
{
|
| 68 |
+
"proxy_name": proxy_name,
|
| 69 |
+
"episode_id": episode_idx,
|
| 70 |
+
"seed": seed + proxy_offset * 10_000 + episode_idx,
|
| 71 |
+
"stress_slice": "nominal",
|
| 72 |
+
"difficulty_bin": "medium",
|
| 73 |
+
}
|
| 74 |
+
)
|
| 75 |
+
else:
|
| 76 |
+
resolved_episode_specs = list(episode_specs)
|
| 77 |
+
|
| 78 |
+
specs_by_proxy: dict[str, list[dict[str, Any]]] = {proxy_name: [] for proxy_name in proxy_names}
|
| 79 |
+
for spec in resolved_episode_specs:
|
| 80 |
+
proxy_name = str(spec["proxy_name"])
|
| 81 |
+
specs_by_proxy.setdefault(proxy_name, []).append(spec)
|
| 82 |
+
|
| 83 |
+
for proxy_name in proxy_names:
|
| 84 |
+
proxy_specs = specs_by_proxy.get(proxy_name, [])
|
| 85 |
+
proxy_samples = 0
|
| 86 |
+
proxy_success = 0
|
| 87 |
+
for episode_idx, episode_spec in enumerate(proxy_specs):
|
| 88 |
+
episode_seed = int(episode_spec.get("seed", seed + episode_idx))
|
| 89 |
+
env = make_proxy_env(
|
| 90 |
+
proxy_name=proxy_name,
|
| 91 |
+
resolution=resolution,
|
| 92 |
+
seed=episode_seed,
|
| 93 |
+
rollout_horizon=rollout_horizon,
|
| 94 |
+
stress_slice=str(episode_spec.get("stress_slice", "nominal")),
|
| 95 |
+
difficulty_bin=str(episode_spec.get("difficulty_bin", "medium")),
|
| 96 |
+
)
|
| 97 |
+
observation, privileged_state = env.reset(seed=episode_seed)
|
| 98 |
+
history_buffer: list[dict[str, Any]] = []
|
| 99 |
+
while True:
|
| 100 |
+
action_chunk, rollout = env.teacher_chunk_and_rollout(
|
| 101 |
+
chunk_horizon=chunk_horizon,
|
| 102 |
+
rollout_horizon=rollout_horizon,
|
| 103 |
+
)
|
| 104 |
+
observation = env.get_observation(privileged_state)
|
| 105 |
+
candidate_action_chunks, candidate_outcomes = env.sample_candidate_action_chunks(
|
| 106 |
+
teacher_chunk=action_chunk,
|
| 107 |
+
num_candidates=planner_candidates,
|
| 108 |
+
rollout_horizon=rollout_horizon,
|
| 109 |
+
)
|
| 110 |
+
padded_history_render_states = []
|
| 111 |
+
padded_history_proprio = []
|
| 112 |
+
padded_history_actions = []
|
| 113 |
+
padded_history_camera_intrinsics = []
|
| 114 |
+
padded_history_camera_extrinsics = []
|
| 115 |
+
padded_history_camera_valid_mask = []
|
| 116 |
+
history_count = min(history_steps, len(history_buffer))
|
| 117 |
+
pad_count = history_steps - history_count
|
| 118 |
+
if history_count > 0:
|
| 119 |
+
recent_history = history_buffer[-history_count:]
|
| 120 |
+
else:
|
| 121 |
+
recent_history = []
|
| 122 |
+
for _ in range(pad_count):
|
| 123 |
+
padded_history_render_states.append(env.render_state(privileged_state))
|
| 124 |
+
padded_history_proprio.append(np.zeros_like(observation["proprio"], dtype=np.float32))
|
| 125 |
+
padded_history_actions.append(np.zeros((action_chunk.shape[-1],), dtype=np.float32))
|
| 126 |
+
padded_history_camera_intrinsics.append(np.zeros((3, 3, 3), dtype=np.float32))
|
| 127 |
+
padded_history_camera_extrinsics.append(np.zeros((3, 4, 4), dtype=np.float32))
|
| 128 |
+
padded_history_camera_valid_mask.append(np.zeros((3,), dtype=np.float32))
|
| 129 |
+
for item in recent_history:
|
| 130 |
+
padded_history_render_states.append(item["render_state"])
|
| 131 |
+
padded_history_proprio.append(item["proprio"])
|
| 132 |
+
padded_history_actions.append(item["action"])
|
| 133 |
+
padded_history_camera_intrinsics.append(item["camera_intrinsics"])
|
| 134 |
+
padded_history_camera_extrinsics.append(item["camera_extrinsics"])
|
| 135 |
+
padded_history_camera_valid_mask.append(item["camera_valid_mask"])
|
| 136 |
+
task_name = str(observation.get("task_name", task_name_from_proxy_name(proxy_name)))
|
| 137 |
+
task_id = int(observation.get("task_id", task_id_from_task_name(task_name)))
|
| 138 |
+
sample = {
|
| 139 |
+
"dataset_version": dataset_version,
|
| 140 |
+
"proxy_name": proxy_name,
|
| 141 |
+
"episode_id": int(episode_spec.get("episode_id", episode_idx)),
|
| 142 |
+
"episode_seed": episode_seed,
|
| 143 |
+
"task_name": task_name,
|
| 144 |
+
"task_id": task_id,
|
| 145 |
+
"stress_slice": str(observation.get("stress_slice", episode_spec.get("stress_slice", "nominal"))),
|
| 146 |
+
"difficulty_bin": str(observation.get("difficulty_bin", episode_spec.get("difficulty_bin", "medium"))),
|
| 147 |
+
"episode_metadata": dict(observation.get("episode_metadata", {})),
|
| 148 |
+
"render_state": env.render_state(privileged_state),
|
| 149 |
+
"camera_intrinsics": observation.get("camera_intrinsics", default_camera_matrices()[0]).astype("float32"),
|
| 150 |
+
"camera_extrinsics": observation.get("camera_extrinsics", default_camera_matrices()[1]).astype("float32"),
|
| 151 |
+
"camera_valid_mask": observation.get("camera_valid_mask", np.ones((3,), dtype=np.float32)).astype("float32"),
|
| 152 |
+
"proprio": observation["proprio"].astype("float32"),
|
| 153 |
+
"language_goal": observation["text"],
|
| 154 |
+
"action_chunk": action_chunk.astype("float32"),
|
| 155 |
+
"support_mode": int(privileged_state["support_mode"]),
|
| 156 |
+
"phase": int(privileged_state.get("phase_label", 0)),
|
| 157 |
+
"subgoal_progress": float(privileged_state.get("subgoal_progress", 0.0)),
|
| 158 |
+
"corridor_feasible": privileged_state["corridor_feasible"].astype("float32"),
|
| 159 |
+
"persistence_horizon": privileged_state["persistence_horizon"].astype("float32"),
|
| 160 |
+
"disturbance_cost": float(privileged_state["disturbance_cost"]),
|
| 161 |
+
"belief_map": privileged_state["belief_map"].astype("float32"),
|
| 162 |
+
"visibility_map": privileged_state["visibility_map"].astype("float32"),
|
| 163 |
+
"clearance_map": privileged_state["clearance_map"].astype("float32"),
|
| 164 |
+
"occluder_contact_map": privileged_state["occluder_contact_map"].astype("float32"),
|
| 165 |
+
"grasp_affordance_map": privileged_state["grasp_affordance_map"].astype("float32"),
|
| 166 |
+
"support_stability": float(privileged_state["support_stability"]),
|
| 167 |
+
"support_stability_map": privileged_state["support_stability_map"].astype("float32"),
|
| 168 |
+
"reocclusion_target": float(privileged_state["reocclusion_target"]),
|
| 169 |
+
"reocclusion_map": privileged_state["reocclusion_map"].astype("float32"),
|
| 170 |
+
"gap_width": float(privileged_state.get("gap_width", 0.0)),
|
| 171 |
+
"damage_proxy": float(privileged_state.get("damage_proxy", 0.0)),
|
| 172 |
+
"release_collapse_rate": float(privileged_state.get("release_collapse_rate", 0.0)),
|
| 173 |
+
"target_visibility_confidence": float(privileged_state.get("target_visibility_confidence", 0.0)),
|
| 174 |
+
"mouth_aperture": float(privileged_state.get("mouth_aperture", 0.0)),
|
| 175 |
+
"hold_quality": float(privileged_state.get("hold_quality", 0.0)),
|
| 176 |
+
"rim_slip_risk": float(privileged_state.get("rim_slip_risk", 0.0)),
|
| 177 |
+
"insertable_actor_corridor": float(privileged_state.get("insertable_actor_corridor", 0.0)),
|
| 178 |
+
"layer_separation_quality": float(privileged_state.get("layer_separation_quality", 0.0)),
|
| 179 |
+
"fold_preservation": float(privileged_state.get("fold_preservation", 0.0)),
|
| 180 |
+
"insertion_corridor": float(privileged_state.get("insertion_corridor", 0.0)),
|
| 181 |
+
"top_layer_stability": float(privileged_state.get("top_layer_stability", 0.0)),
|
| 182 |
+
"lift_too_much_risk": float(privileged_state.get("lift_too_much_risk", 0.0)),
|
| 183 |
+
"rollout_support_mode": rollout["rollout_support_mode"].astype("int64"),
|
| 184 |
+
"rollout_phase": rollout.get("rollout_phase", np.zeros((rollout["rollout_support_mode"].shape[0],), dtype=np.int64)).astype("int64"),
|
| 185 |
+
"rollout_corridor_feasible": rollout["rollout_corridor_feasible"].astype("float32"),
|
| 186 |
+
"rollout_persistence_horizon": rollout["rollout_persistence_horizon"].astype("float32"),
|
| 187 |
+
"rollout_disturbance_cost": rollout["rollout_disturbance_cost"].astype("float32"),
|
| 188 |
+
"rollout_belief_map": rollout["rollout_belief_map"].astype("float32"),
|
| 189 |
+
"rollout_visibility_map": rollout["rollout_visibility_map"].astype("float32"),
|
| 190 |
+
"rollout_clearance_map": rollout["rollout_clearance_map"].astype("float32"),
|
| 191 |
+
"rollout_support_stability": rollout["rollout_support_stability"].astype("float32"),
|
| 192 |
+
"rollout_reocclusion_target": rollout["rollout_reocclusion_target"].astype("float32"),
|
| 193 |
+
"rollout_occluder_contact_map": rollout["rollout_occluder_contact_map"].astype("float32"),
|
| 194 |
+
"rollout_grasp_affordance_map": rollout["rollout_grasp_affordance_map"].astype("float32"),
|
| 195 |
+
"history_render_states": padded_history_render_states,
|
| 196 |
+
"history_proprio": np.stack(padded_history_proprio, axis=0).astype("float32")
|
| 197 |
+
if padded_history_proprio
|
| 198 |
+
else np.zeros((0, observation["proprio"].shape[0]), dtype=np.float32),
|
| 199 |
+
"history_actions": np.stack(padded_history_actions, axis=0).astype("float32")
|
| 200 |
+
if padded_history_actions
|
| 201 |
+
else np.zeros((0, action_chunk.shape[-1]), dtype=np.float32),
|
| 202 |
+
"history_camera_intrinsics": np.stack(padded_history_camera_intrinsics, axis=0).astype("float32")
|
| 203 |
+
if padded_history_camera_intrinsics
|
| 204 |
+
else np.zeros((0, 3, 3, 3), dtype=np.float32),
|
| 205 |
+
"history_camera_extrinsics": np.stack(padded_history_camera_extrinsics, axis=0).astype("float32")
|
| 206 |
+
if padded_history_camera_extrinsics
|
| 207 |
+
else np.zeros((0, 3, 4, 4), dtype=np.float32),
|
| 208 |
+
"history_camera_valid_mask": np.stack(padded_history_camera_valid_mask, axis=0).astype("float32")
|
| 209 |
+
if padded_history_camera_valid_mask
|
| 210 |
+
else np.zeros((0, 3), dtype=np.float32),
|
| 211 |
+
"candidate_action_chunks": candidate_action_chunks.astype("float32"),
|
| 212 |
+
**candidate_outcomes,
|
| 213 |
+
}
|
| 214 |
+
if proposal_target_builder is not None:
|
| 215 |
+
extra_fields = proposal_target_builder(env, observation, sample)
|
| 216 |
+
if extra_fields:
|
| 217 |
+
sample.update(extra_fields)
|
| 218 |
+
samples.append(sample)
|
| 219 |
+
proxy_samples += 1
|
| 220 |
+
executed_action = env.teacher_action().astype("float32")
|
| 221 |
+
_, _, terminated, truncated, privileged_state = env.step(executed_action)
|
| 222 |
+
history_buffer.append(
|
| 223 |
+
{
|
| 224 |
+
"render_state": env.render_state(privileged_state),
|
| 225 |
+
"proprio": env.get_observation(privileged_state)["proprio"].astype("float32"),
|
| 226 |
+
"action": executed_action,
|
| 227 |
+
"camera_intrinsics": env.get_observation(privileged_state).get("camera_intrinsics", default_camera_matrices()[0]).astype("float32"),
|
| 228 |
+
"camera_extrinsics": env.get_observation(privileged_state).get("camera_extrinsics", default_camera_matrices()[1]).astype("float32"),
|
| 229 |
+
"camera_valid_mask": env.get_observation(privileged_state).get("camera_valid_mask", np.ones((3,), dtype=np.float32)).astype("float32"),
|
| 230 |
+
}
|
| 231 |
+
)
|
| 232 |
+
if terminated:
|
| 233 |
+
proxy_success += 1
|
| 234 |
+
if terminated or truncated:
|
| 235 |
+
break
|
| 236 |
+
summary[proxy_name] = {
|
| 237 |
+
"episodes": float(len(proxy_specs)),
|
| 238 |
+
"samples": float(proxy_samples),
|
| 239 |
+
"teacher_success": proxy_success / float(max(1, len(proxy_specs))),
|
| 240 |
+
}
|
| 241 |
+
return {
|
| 242 |
+
"dataset_version": dataset_version,
|
| 243 |
+
"resolution": resolution,
|
| 244 |
+
"chunk_horizon": chunk_horizon,
|
| 245 |
+
"rollout_horizon": rollout_horizon,
|
| 246 |
+
"history_steps": history_steps,
|
| 247 |
+
"planner_candidates": planner_candidates,
|
| 248 |
+
"episode_specs": resolved_episode_specs,
|
| 249 |
+
"samples": samples,
|
| 250 |
+
"summary": summary,
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
def save_teacher_dataset(output_path: str | Path, dataset_bundle: dict[str, Any]) -> Path:
|
| 255 |
+
output_path = Path(output_path)
|
| 256 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 257 |
+
torch.save(dataset_bundle, output_path)
|
| 258 |
+
return output_path
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
def load_teacher_dataset(dataset_path: str | Path) -> dict[str, Any]:
|
| 262 |
+
return torch.load(Path(dataset_path), map_location="cpu", weights_only=False)
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
class RevealOfflineDataset(Dataset[dict[str, Any]]):
|
| 266 |
+
def __init__(self, samples: Sequence[dict[str, Any]], resolution: int = 96) -> None:
|
| 267 |
+
self.samples = list(samples)
|
| 268 |
+
self.resolution = resolution
|
| 269 |
+
self._render_cache: dict[bytes, dict[str, np.ndarray]] = {}
|
| 270 |
+
self._item_cache: dict[int, dict[str, Any]] = {}
|
| 271 |
+
|
| 272 |
+
def __len__(self) -> int:
|
| 273 |
+
return len(self.samples)
|
| 274 |
+
|
| 275 |
+
def _render_cache_key(self, sample: dict[str, Any], render_state: dict[str, Any]) -> bytes:
|
| 276 |
+
include_depth = dataset_uses_rgbd(sample.get("dataset_version"))
|
| 277 |
+
return pickle.dumps(
|
| 278 |
+
(sample["proxy_name"], self.resolution, include_depth, render_state),
|
| 279 |
+
protocol=4,
|
| 280 |
+
)
|
| 281 |
+
|
| 282 |
+
def _render_sample(self, sample: dict[str, Any], render_state: dict[str, Any]) -> dict[str, np.ndarray]:
|
| 283 |
+
cache_key = self._render_cache_key(sample, render_state)
|
| 284 |
+
cached = self._render_cache.get(cache_key)
|
| 285 |
+
if cached is not None:
|
| 286 |
+
return cached
|
| 287 |
+
include_depth = dataset_uses_rgbd(sample.get("dataset_version"))
|
| 288 |
+
rendered = render_views_from_state(
|
| 289 |
+
proxy_name=sample["proxy_name"],
|
| 290 |
+
render_state=render_state,
|
| 291 |
+
resolution=self.resolution,
|
| 292 |
+
include_depth=include_depth,
|
| 293 |
+
)
|
| 294 |
+
self._render_cache[cache_key] = rendered
|
| 295 |
+
return rendered
|
| 296 |
+
|
| 297 |
+
def __getitem__(self, index: int) -> dict[str, Any]:
|
| 298 |
+
cached_item = self._item_cache.get(index)
|
| 299 |
+
if cached_item is not None:
|
| 300 |
+
return cached_item
|
| 301 |
+
sample = self.samples[index]
|
| 302 |
+
_assert_noleak_sample(sample)
|
| 303 |
+
candidate_count = int(sample.get("candidate_action_chunks", np.zeros((0, 0, 0), dtype=np.float32)).shape[0])
|
| 304 |
+
proposal_target_count = int(
|
| 305 |
+
sample.get("proposal_target_action_chunks", np.zeros((0, 0, 0), dtype=np.float32)).shape[0]
|
| 306 |
+
)
|
| 307 |
+
images = self._render_sample(sample, sample["render_state"])
|
| 308 |
+
history_images = []
|
| 309 |
+
history_depths = []
|
| 310 |
+
history_depth_valid = []
|
| 311 |
+
for history_state in sample.get("history_render_states", []):
|
| 312 |
+
rendered = self._render_sample(sample, history_state)
|
| 313 |
+
history_images.append(
|
| 314 |
+
torch.stack(
|
| 315 |
+
[
|
| 316 |
+
torch.from_numpy(rendered["front"]),
|
| 317 |
+
torch.from_numpy(rendered["wrist_left"]),
|
| 318 |
+
torch.from_numpy(rendered["wrist_right"]),
|
| 319 |
+
],
|
| 320 |
+
dim=0,
|
| 321 |
+
)
|
| 322 |
+
)
|
| 323 |
+
if dataset_uses_rgbd(sample.get("dataset_version")):
|
| 324 |
+
history_depths.append(
|
| 325 |
+
torch.stack(
|
| 326 |
+
[
|
| 327 |
+
torch.from_numpy(rendered["front_depth"]),
|
| 328 |
+
torch.from_numpy(rendered["wrist_left_depth"]),
|
| 329 |
+
torch.from_numpy(rendered["wrist_right_depth"]),
|
| 330 |
+
],
|
| 331 |
+
dim=0,
|
| 332 |
+
)
|
| 333 |
+
)
|
| 334 |
+
history_depth_valid.append(
|
| 335 |
+
torch.stack(
|
| 336 |
+
[
|
| 337 |
+
torch.from_numpy(rendered["front_depth_valid"]),
|
| 338 |
+
torch.from_numpy(rendered["wrist_left_depth_valid"]),
|
| 339 |
+
torch.from_numpy(rendered["wrist_right_depth_valid"]),
|
| 340 |
+
],
|
| 341 |
+
dim=0,
|
| 342 |
+
)
|
| 343 |
+
)
|
| 344 |
+
stacked = torch.from_numpy(
|
| 345 |
+
torch.stack(
|
| 346 |
+
[
|
| 347 |
+
torch.from_numpy(images["front"]),
|
| 348 |
+
torch.from_numpy(images["wrist_left"]),
|
| 349 |
+
torch.from_numpy(images["wrist_right"]),
|
| 350 |
+
],
|
| 351 |
+
dim=0,
|
| 352 |
+
).numpy()
|
| 353 |
+
).permute(0, 3, 1, 2).float() / 255.0
|
| 354 |
+
if history_images:
|
| 355 |
+
history_stacked = torch.stack(history_images, dim=0).permute(0, 1, 4, 2, 3).float() / 255.0
|
| 356 |
+
else:
|
| 357 |
+
history_stacked = torch.zeros((0, 3, 3, self.resolution, self.resolution), dtype=torch.float32)
|
| 358 |
+
if dataset_uses_rgbd(sample.get("dataset_version")):
|
| 359 |
+
depths = torch.stack(
|
| 360 |
+
[
|
| 361 |
+
torch.from_numpy(images["front_depth"]),
|
| 362 |
+
torch.from_numpy(images["wrist_left_depth"]),
|
| 363 |
+
torch.from_numpy(images["wrist_right_depth"]),
|
| 364 |
+
],
|
| 365 |
+
dim=0,
|
| 366 |
+
).unsqueeze(1).float()
|
| 367 |
+
depth_valid = torch.stack(
|
| 368 |
+
[
|
| 369 |
+
torch.from_numpy(images["front_depth_valid"]),
|
| 370 |
+
torch.from_numpy(images["wrist_left_depth_valid"]),
|
| 371 |
+
torch.from_numpy(images["wrist_right_depth_valid"]),
|
| 372 |
+
],
|
| 373 |
+
dim=0,
|
| 374 |
+
).unsqueeze(1).float()
|
| 375 |
+
if history_depths:
|
| 376 |
+
history_depths_tensor = torch.stack(history_depths, dim=0).unsqueeze(2).float()
|
| 377 |
+
history_depth_valid_tensor = torch.stack(history_depth_valid, dim=0).unsqueeze(2).float()
|
| 378 |
+
else:
|
| 379 |
+
history_depths_tensor = torch.zeros((0, 3, 1, self.resolution, self.resolution), dtype=torch.float32)
|
| 380 |
+
history_depth_valid_tensor = torch.zeros((0, 3, 1, self.resolution, self.resolution), dtype=torch.float32)
|
| 381 |
+
else:
|
| 382 |
+
depths = torch.zeros((3, 1, self.resolution, self.resolution), dtype=torch.float32)
|
| 383 |
+
depth_valid = torch.zeros_like(depths)
|
| 384 |
+
history_depths_tensor = torch.zeros((0, 3, 1, self.resolution, self.resolution), dtype=torch.float32)
|
| 385 |
+
history_depth_valid_tensor = torch.zeros_like(history_depths_tensor)
|
| 386 |
+
camera_intrinsics = sample.get("camera_intrinsics")
|
| 387 |
+
camera_extrinsics = sample.get("camera_extrinsics")
|
| 388 |
+
camera_valid_mask = sample.get("camera_valid_mask")
|
| 389 |
+
if camera_intrinsics is None or camera_extrinsics is None:
|
| 390 |
+
default_intrinsics, default_extrinsics = default_camera_matrices()
|
| 391 |
+
camera_intrinsics = default_intrinsics
|
| 392 |
+
camera_extrinsics = default_extrinsics
|
| 393 |
+
if camera_valid_mask is None:
|
| 394 |
+
camera_valid_mask = np.ones((3,), dtype=np.float32)
|
| 395 |
+
history_length = len(sample.get("history_render_states", []))
|
| 396 |
+
history_camera_intrinsics = torch.as_tensor(
|
| 397 |
+
sample.get("history_camera_intrinsics", np.zeros((history_length, 3, 3, 3), dtype=np.float32)),
|
| 398 |
+
dtype=torch.float32,
|
| 399 |
+
)
|
| 400 |
+
history_camera_extrinsics = torch.as_tensor(
|
| 401 |
+
sample.get("history_camera_extrinsics", np.zeros((history_length, 3, 4, 4), dtype=np.float32)),
|
| 402 |
+
dtype=torch.float32,
|
| 403 |
+
)
|
| 404 |
+
history_camera_valid_mask = torch.as_tensor(
|
| 405 |
+
sample.get("history_camera_valid_mask", np.zeros((history_length, 3), dtype=np.float32)),
|
| 406 |
+
dtype=torch.float32,
|
| 407 |
+
)
|
| 408 |
+
item = {
|
| 409 |
+
"images": stacked,
|
| 410 |
+
"depths": depths,
|
| 411 |
+
"depth_valid": depth_valid,
|
| 412 |
+
"history_images": history_stacked,
|
| 413 |
+
"history_depths": history_depths_tensor,
|
| 414 |
+
"history_depth_valid": history_depth_valid_tensor,
|
| 415 |
+
"history_camera_intrinsics": history_camera_intrinsics,
|
| 416 |
+
"history_camera_extrinsics": history_camera_extrinsics,
|
| 417 |
+
"history_camera_valid_mask": history_camera_valid_mask,
|
| 418 |
+
"history_proprio": torch.as_tensor(sample.get("history_proprio", []), dtype=torch.float32),
|
| 419 |
+
"history_actions": torch.as_tensor(
|
| 420 |
+
sample.get(
|
| 421 |
+
"history_actions",
|
| 422 |
+
np.zeros((len(sample.get("history_render_states", [])), sample["action_chunk"].shape[-1]), dtype=np.float32),
|
| 423 |
+
),
|
| 424 |
+
dtype=torch.float32,
|
| 425 |
+
),
|
| 426 |
+
"camera_intrinsics": torch.as_tensor(camera_intrinsics, dtype=torch.float32),
|
| 427 |
+
"camera_extrinsics": torch.as_tensor(camera_extrinsics, dtype=torch.float32),
|
| 428 |
+
"camera_valid_mask": torch.as_tensor(camera_valid_mask, dtype=torch.float32),
|
| 429 |
+
"proprio": torch.as_tensor(sample["proprio"], dtype=torch.float32),
|
| 430 |
+
"texts": sample["language_goal"],
|
| 431 |
+
"task_name": sample.get("task_name", task_name_from_proxy_name(sample["proxy_name"])),
|
| 432 |
+
"task_id": torch.as_tensor(
|
| 433 |
+
sample.get(
|
| 434 |
+
"task_id",
|
| 435 |
+
task_id_from_task_name(sample.get("task_name", task_name_from_proxy_name(sample["proxy_name"]))),
|
| 436 |
+
),
|
| 437 |
+
dtype=torch.long,
|
| 438 |
+
),
|
| 439 |
+
"stress_slice": sample.get("stress_slice", "nominal"),
|
| 440 |
+
"difficulty_bin": sample.get("difficulty_bin", "medium"),
|
| 441 |
+
"episode_metadata_json": str(sample.get("episode_metadata", {})),
|
| 442 |
+
"action_chunk": torch.as_tensor(sample["action_chunk"], dtype=torch.float32),
|
| 443 |
+
"support_mode": torch.as_tensor(sample["support_mode"], dtype=torch.long),
|
| 444 |
+
"phase": torch.as_tensor(sample.get("phase", 0), dtype=torch.long),
|
| 445 |
+
"subgoal_progress": torch.as_tensor(sample.get("subgoal_progress", 0.0), dtype=torch.float32),
|
| 446 |
+
"corridor_feasible": torch.as_tensor(sample["corridor_feasible"], dtype=torch.float32),
|
| 447 |
+
"persistence_horizon": torch.as_tensor(sample["persistence_horizon"], dtype=torch.float32),
|
| 448 |
+
"disturbance_cost": torch.as_tensor(sample["disturbance_cost"], dtype=torch.float32),
|
| 449 |
+
"belief_map": torch.as_tensor(sample["belief_map"], dtype=torch.float32).unsqueeze(0),
|
| 450 |
+
"visibility_map": torch.as_tensor(sample.get("visibility_map", np.zeros((32, 32), dtype=np.float32)), dtype=torch.float32).unsqueeze(0),
|
| 451 |
+
"clearance_map": torch.as_tensor(sample.get("clearance_map", np.zeros((2, 32, 32), dtype=np.float32)), dtype=torch.float32),
|
| 452 |
+
"occluder_contact_map": torch.as_tensor(sample.get("occluder_contact_map", np.zeros((32, 32), dtype=np.float32)), dtype=torch.float32).unsqueeze(0),
|
| 453 |
+
"grasp_affordance_map": torch.as_tensor(sample.get("grasp_affordance_map", np.zeros((32, 32), dtype=np.float32)), dtype=torch.float32).unsqueeze(0),
|
| 454 |
+
"support_stability": torch.as_tensor(sample.get("support_stability", 0.0), dtype=torch.float32),
|
| 455 |
+
"support_stability_map": torch.as_tensor(sample.get("support_stability_map", np.zeros((32, 32), dtype=np.float32)), dtype=torch.float32).unsqueeze(0),
|
| 456 |
+
"reocclusion_target": torch.as_tensor(sample.get("reocclusion_target", 0.0), dtype=torch.float32),
|
| 457 |
+
"reocclusion_map": torch.as_tensor(sample.get("reocclusion_map", np.zeros((32, 32), dtype=np.float32)), dtype=torch.float32).unsqueeze(0),
|
| 458 |
+
"gap_width": torch.as_tensor(sample.get("gap_width", 0.0), dtype=torch.float32),
|
| 459 |
+
"damage_proxy": torch.as_tensor(sample.get("damage_proxy", 0.0), dtype=torch.float32),
|
| 460 |
+
"release_collapse_rate": torch.as_tensor(sample.get("release_collapse_rate", 0.0), dtype=torch.float32),
|
| 461 |
+
"target_visibility_confidence": torch.as_tensor(sample.get("target_visibility_confidence", 0.0), dtype=torch.float32),
|
| 462 |
+
"mouth_aperture": torch.as_tensor(sample.get("mouth_aperture", 0.0), dtype=torch.float32),
|
| 463 |
+
"hold_quality": torch.as_tensor(sample.get("hold_quality", 0.0), dtype=torch.float32),
|
| 464 |
+
"rim_slip_risk": torch.as_tensor(sample.get("rim_slip_risk", 0.0), dtype=torch.float32),
|
| 465 |
+
"insertable_actor_corridor": torch.as_tensor(sample.get("insertable_actor_corridor", 0.0), dtype=torch.float32),
|
| 466 |
+
"layer_separation_quality": torch.as_tensor(sample.get("layer_separation_quality", 0.0), dtype=torch.float32),
|
| 467 |
+
"fold_preservation": torch.as_tensor(sample.get("fold_preservation", 0.0), dtype=torch.float32),
|
| 468 |
+
"insertion_corridor": torch.as_tensor(sample.get("insertion_corridor", 0.0), dtype=torch.float32),
|
| 469 |
+
"top_layer_stability": torch.as_tensor(sample.get("top_layer_stability", 0.0), dtype=torch.float32),
|
| 470 |
+
"lift_too_much_risk": torch.as_tensor(sample.get("lift_too_much_risk", 0.0), dtype=torch.float32),
|
| 471 |
+
"rollout_support_mode": torch.as_tensor(sample["rollout_support_mode"], dtype=torch.long),
|
| 472 |
+
"rollout_phase": torch.as_tensor(sample.get("rollout_phase", np.zeros((0,), dtype=np.int64)), dtype=torch.long),
|
| 473 |
+
"rollout_corridor_feasible": torch.as_tensor(sample["rollout_corridor_feasible"], dtype=torch.float32),
|
| 474 |
+
"rollout_persistence_horizon": torch.as_tensor(sample["rollout_persistence_horizon"], dtype=torch.float32),
|
| 475 |
+
"rollout_disturbance_cost": torch.as_tensor(sample["rollout_disturbance_cost"], dtype=torch.float32),
|
| 476 |
+
"rollout_belief_map": torch.as_tensor(sample.get("rollout_belief_map", np.zeros((0, 32, 32), dtype=np.float32)), dtype=torch.float32),
|
| 477 |
+
"rollout_visibility_map": torch.as_tensor(sample.get("rollout_visibility_map", np.zeros((0, 32, 32), dtype=np.float32)), dtype=torch.float32),
|
| 478 |
+
"rollout_clearance_map": torch.as_tensor(sample.get("rollout_clearance_map", np.zeros((0, 2, 32, 32), dtype=np.float32)), dtype=torch.float32),
|
| 479 |
+
"rollout_support_stability": torch.as_tensor(sample.get("rollout_support_stability", np.zeros((0,), dtype=np.float32)), dtype=torch.float32),
|
| 480 |
+
"rollout_reocclusion_target": torch.as_tensor(sample.get("rollout_reocclusion_target", np.zeros((0,), dtype=np.float32)), dtype=torch.float32),
|
| 481 |
+
"rollout_occluder_contact_map": torch.as_tensor(sample.get("rollout_occluder_contact_map", np.zeros((0, 32, 32), dtype=np.float32)), dtype=torch.float32),
|
| 482 |
+
"rollout_grasp_affordance_map": torch.as_tensor(sample.get("rollout_grasp_affordance_map", np.zeros((0, 32, 32), dtype=np.float32)), dtype=torch.float32),
|
| 483 |
+
"candidate_action_chunks": torch.as_tensor(sample["candidate_action_chunks"], dtype=torch.float32),
|
| 484 |
+
"candidate_rollout_support_mode": torch.as_tensor(sample["candidate_rollout_support_mode"], dtype=torch.long),
|
| 485 |
+
"candidate_rollout_phase": torch.as_tensor(sample.get("candidate_rollout_phase", np.zeros((0, 0), dtype=np.int64)), dtype=torch.long),
|
| 486 |
+
"candidate_rollout_corridor_feasible": torch.as_tensor(sample["candidate_rollout_corridor_feasible"], dtype=torch.float32),
|
| 487 |
+
"candidate_rollout_persistence_horizon": torch.as_tensor(sample["candidate_rollout_persistence_horizon"], dtype=torch.float32),
|
| 488 |
+
"candidate_rollout_disturbance_cost": torch.as_tensor(sample["candidate_rollout_disturbance_cost"], dtype=torch.float32),
|
| 489 |
+
"candidate_rollout_belief_map": torch.as_tensor(sample.get("candidate_rollout_belief_map", np.zeros((0, 0, 32, 32), dtype=np.float32)), dtype=torch.float32),
|
| 490 |
+
"candidate_rollout_visibility_map": torch.as_tensor(sample.get("candidate_rollout_visibility_map", np.zeros((0, 0, 32, 32), dtype=np.float32)), dtype=torch.float32),
|
| 491 |
+
"candidate_rollout_clearance_map": torch.as_tensor(sample.get("candidate_rollout_clearance_map", np.zeros((0, 0, 2, 32, 32), dtype=np.float32)), dtype=torch.float32),
|
| 492 |
+
"candidate_rollout_support_stability": torch.as_tensor(sample.get("candidate_rollout_support_stability", np.zeros((0, 0), dtype=np.float32)), dtype=torch.float32),
|
| 493 |
+
"candidate_rollout_reocclusion_target": torch.as_tensor(sample.get("candidate_rollout_reocclusion_target", np.zeros((0, 0), dtype=np.float32)), dtype=torch.float32),
|
| 494 |
+
"candidate_rollout_occluder_contact_map": torch.as_tensor(sample.get("candidate_rollout_occluder_contact_map", np.zeros((0, 0, 32, 32), dtype=np.float32)), dtype=torch.float32),
|
| 495 |
+
"candidate_rollout_grasp_affordance_map": torch.as_tensor(sample.get("candidate_rollout_grasp_affordance_map", np.zeros((0, 0, 32, 32), dtype=np.float32)), dtype=torch.float32),
|
| 496 |
+
"candidate_retrieval_success": torch.as_tensor(sample["candidate_retrieval_success"], dtype=torch.float32),
|
| 497 |
+
"candidate_final_disturbance_cost": torch.as_tensor(sample["candidate_final_disturbance_cost"], dtype=torch.float32),
|
| 498 |
+
"candidate_reocclusion_rate": torch.as_tensor(sample["candidate_reocclusion_rate"], dtype=torch.float32),
|
| 499 |
+
"candidate_visibility_integral": torch.as_tensor(sample["candidate_visibility_integral"], dtype=torch.float32),
|
| 500 |
+
"candidate_actor_feasibility_auc": torch.as_tensor(sample.get("candidate_actor_feasibility_auc", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
|
| 501 |
+
"candidate_reveal_achieved": torch.as_tensor(sample.get("candidate_reveal_achieved", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
|
| 502 |
+
"candidate_hold_persistence": torch.as_tensor(sample.get("candidate_hold_persistence", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
|
| 503 |
+
"candidate_support_stability_auc": torch.as_tensor(sample.get("candidate_support_stability_auc", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
|
| 504 |
+
"candidate_disturbance_auc": torch.as_tensor(sample.get("candidate_disturbance_auc", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
|
| 505 |
+
"candidate_macro_ids": torch.as_tensor(sample.get("candidate_macro_ids", np.zeros((candidate_count,), dtype=np.int64)), dtype=torch.long),
|
| 506 |
+
"candidate_is_hard_negative": torch.as_tensor(sample.get("candidate_is_hard_negative", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
|
| 507 |
+
"candidate_risk": torch.as_tensor(sample["candidate_risk"], dtype=torch.float32),
|
| 508 |
+
"candidate_utility": torch.as_tensor(sample["candidate_utility"], dtype=torch.float32),
|
| 509 |
+
"candidate_gap_width": torch.as_tensor(sample.get("candidate_gap_width", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
|
| 510 |
+
"candidate_damage_proxy": torch.as_tensor(sample.get("candidate_damage_proxy", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
|
| 511 |
+
"candidate_mouth_aperture": torch.as_tensor(sample.get("candidate_mouth_aperture", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
|
| 512 |
+
"candidate_hold_quality": torch.as_tensor(sample.get("candidate_hold_quality", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
|
| 513 |
+
"candidate_rim_slip_risk": torch.as_tensor(sample.get("candidate_rim_slip_risk", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
|
| 514 |
+
"candidate_fold_preservation": torch.as_tensor(sample.get("candidate_fold_preservation", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
|
| 515 |
+
"candidate_layer_separation_quality": torch.as_tensor(sample.get("candidate_layer_separation_quality", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
|
| 516 |
+
"candidate_lift_too_much_risk": torch.as_tensor(sample.get("candidate_lift_too_much_risk", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
|
| 517 |
+
"proxy_name": sample["proxy_name"],
|
| 518 |
+
"episode_id": sample["episode_id"],
|
| 519 |
+
}
|
| 520 |
+
if "proposal_target_action_chunks" in sample:
|
| 521 |
+
rollout_horizon = int(np.asarray(sample.get("rollout_support_mode", np.zeros((0,), dtype=np.int64))).shape[0])
|
| 522 |
+
belief_map_shape = tuple(np.asarray(sample.get("rollout_belief_map", np.zeros((0, 32, 32), dtype=np.float32))).shape[1:])
|
| 523 |
+
visibility_map_shape = tuple(np.asarray(sample.get("rollout_visibility_map", np.zeros((0, 32, 32), dtype=np.float32))).shape[1:])
|
| 524 |
+
clearance_map_shape = tuple(np.asarray(sample.get("rollout_clearance_map", np.zeros((0, 2, 32, 32), dtype=np.float32))).shape[1:])
|
| 525 |
+
occluder_contact_shape = tuple(
|
| 526 |
+
np.asarray(sample.get("rollout_occluder_contact_map", np.zeros((0, 32, 32), dtype=np.float32))).shape[1:]
|
| 527 |
+
)
|
| 528 |
+
grasp_affordance_shape = tuple(
|
| 529 |
+
np.asarray(sample.get("rollout_grasp_affordance_map", np.zeros((0, 32, 32), dtype=np.float32))).shape[1:]
|
| 530 |
+
)
|
| 531 |
+
item["proposal_target_action_chunks"] = torch.as_tensor(sample["proposal_target_action_chunks"], dtype=torch.float32)
|
| 532 |
+
item["proposal_target_retrieval_success"] = torch.as_tensor(
|
| 533 |
+
sample.get("proposal_target_retrieval_success", np.zeros((proposal_target_count,), dtype=np.float32)),
|
| 534 |
+
dtype=torch.float32,
|
| 535 |
+
)
|
| 536 |
+
item["proposal_target_risk"] = torch.as_tensor(
|
| 537 |
+
sample.get("proposal_target_risk", np.zeros((proposal_target_count,), dtype=np.float32)),
|
| 538 |
+
dtype=torch.float32,
|
| 539 |
+
)
|
| 540 |
+
item["proposal_target_utility"] = torch.as_tensor(
|
| 541 |
+
sample.get("proposal_target_utility", np.zeros((proposal_target_count,), dtype=np.float32)),
|
| 542 |
+
dtype=torch.float32,
|
| 543 |
+
)
|
| 544 |
+
item["proposal_target_rollout_support_mode"] = torch.as_tensor(
|
| 545 |
+
sample.get(
|
| 546 |
+
"proposal_target_rollout_support_mode",
|
| 547 |
+
np.zeros((proposal_target_count, rollout_horizon), dtype=np.int64),
|
| 548 |
+
),
|
| 549 |
+
dtype=torch.long,
|
| 550 |
+
)
|
| 551 |
+
item["proposal_target_rollout_phase"] = torch.as_tensor(
|
| 552 |
+
sample.get(
|
| 553 |
+
"proposal_target_rollout_phase",
|
| 554 |
+
np.zeros((proposal_target_count, rollout_horizon), dtype=np.int64),
|
| 555 |
+
),
|
| 556 |
+
dtype=torch.long,
|
| 557 |
+
)
|
| 558 |
+
item["proposal_target_rollout_corridor_feasible"] = torch.as_tensor(
|
| 559 |
+
sample.get(
|
| 560 |
+
"proposal_target_rollout_corridor_feasible",
|
| 561 |
+
np.zeros((proposal_target_count, rollout_horizon, 3), dtype=np.float32),
|
| 562 |
+
),
|
| 563 |
+
dtype=torch.float32,
|
| 564 |
+
)
|
| 565 |
+
item["proposal_target_rollout_persistence_horizon"] = torch.as_tensor(
|
| 566 |
+
sample.get(
|
| 567 |
+
"proposal_target_rollout_persistence_horizon",
|
| 568 |
+
np.zeros((proposal_target_count, rollout_horizon, 3), dtype=np.float32),
|
| 569 |
+
),
|
| 570 |
+
dtype=torch.float32,
|
| 571 |
+
)
|
| 572 |
+
item["proposal_target_rollout_disturbance_cost"] = torch.as_tensor(
|
| 573 |
+
sample.get(
|
| 574 |
+
"proposal_target_rollout_disturbance_cost",
|
| 575 |
+
np.zeros((proposal_target_count, rollout_horizon), dtype=np.float32),
|
| 576 |
+
),
|
| 577 |
+
dtype=torch.float32,
|
| 578 |
+
)
|
| 579 |
+
item["proposal_target_rollout_belief_map"] = torch.as_tensor(
|
| 580 |
+
sample.get(
|
| 581 |
+
"proposal_target_rollout_belief_map",
|
| 582 |
+
np.zeros((proposal_target_count, rollout_horizon, *belief_map_shape), dtype=np.float32),
|
| 583 |
+
),
|
| 584 |
+
dtype=torch.float32,
|
| 585 |
+
)
|
| 586 |
+
item["proposal_target_rollout_visibility_map"] = torch.as_tensor(
|
| 587 |
+
sample.get(
|
| 588 |
+
"proposal_target_rollout_visibility_map",
|
| 589 |
+
np.zeros((proposal_target_count, rollout_horizon, *visibility_map_shape), dtype=np.float32),
|
| 590 |
+
),
|
| 591 |
+
dtype=torch.float32,
|
| 592 |
+
)
|
| 593 |
+
item["proposal_target_rollout_clearance_map"] = torch.as_tensor(
|
| 594 |
+
sample.get(
|
| 595 |
+
"proposal_target_rollout_clearance_map",
|
| 596 |
+
np.zeros((proposal_target_count, rollout_horizon, *clearance_map_shape), dtype=np.float32),
|
| 597 |
+
),
|
| 598 |
+
dtype=torch.float32,
|
| 599 |
+
)
|
| 600 |
+
item["proposal_target_rollout_support_stability"] = torch.as_tensor(
|
| 601 |
+
sample.get(
|
| 602 |
+
"proposal_target_rollout_support_stability",
|
| 603 |
+
np.zeros((proposal_target_count, rollout_horizon), dtype=np.float32),
|
| 604 |
+
),
|
| 605 |
+
dtype=torch.float32,
|
| 606 |
+
)
|
| 607 |
+
item["proposal_target_rollout_reocclusion_target"] = torch.as_tensor(
|
| 608 |
+
sample.get(
|
| 609 |
+
"proposal_target_rollout_reocclusion_target",
|
| 610 |
+
np.zeros((proposal_target_count, rollout_horizon), dtype=np.float32),
|
| 611 |
+
),
|
| 612 |
+
dtype=torch.float32,
|
| 613 |
+
)
|
| 614 |
+
item["proposal_target_rollout_occluder_contact_map"] = torch.as_tensor(
|
| 615 |
+
sample.get(
|
| 616 |
+
"proposal_target_rollout_occluder_contact_map",
|
| 617 |
+
np.zeros((proposal_target_count, rollout_horizon, *occluder_contact_shape), dtype=np.float32),
|
| 618 |
+
),
|
| 619 |
+
dtype=torch.float32,
|
| 620 |
+
)
|
| 621 |
+
item["proposal_target_rollout_grasp_affordance_map"] = torch.as_tensor(
|
| 622 |
+
sample.get(
|
| 623 |
+
"proposal_target_rollout_grasp_affordance_map",
|
| 624 |
+
np.zeros((proposal_target_count, rollout_horizon, *grasp_affordance_shape), dtype=np.float32),
|
| 625 |
+
),
|
| 626 |
+
dtype=torch.float32,
|
| 627 |
+
)
|
| 628 |
+
self._item_cache[index] = item
|
| 629 |
+
return item
|
| 630 |
+
|
| 631 |
+
|
| 632 |
+
def dataset_from_bundle(dataset_bundle: dict[str, Any], resolution: int | None = None) -> RevealOfflineDataset:
|
| 633 |
+
resolution = resolution or int(dataset_bundle["resolution"])
|
| 634 |
+
return RevealOfflineDataset(dataset_bundle["samples"], resolution=resolution)
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/generate_dataset.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
from sim_reveal.dataset import collect_teacher_dataset, save_teacher_dataset
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def main() -> None:
|
| 11 |
+
parser = argparse.ArgumentParser()
|
| 12 |
+
parser.add_argument("--proxies", nargs="*", default=None)
|
| 13 |
+
parser.add_argument("--episodes-per-proxy", type=int, default=32)
|
| 14 |
+
parser.add_argument("--resolution", type=int, default=96)
|
| 15 |
+
parser.add_argument("--seed", type=int, default=0)
|
| 16 |
+
parser.add_argument("--chunk-horizon", type=int, default=8)
|
| 17 |
+
parser.add_argument("--rollout-horizon", type=int, default=5)
|
| 18 |
+
parser.add_argument("--history-steps", type=int, default=2)
|
| 19 |
+
parser.add_argument("--planner-candidates", type=int, default=4)
|
| 20 |
+
parser.add_argument("--episode-spec-path", default=None)
|
| 21 |
+
parser.add_argument("--output-path", default="/workspace/data/reveal_proxy/reveal_proxy_teacher.pt")
|
| 22 |
+
args = parser.parse_args()
|
| 23 |
+
episode_specs = None
|
| 24 |
+
if args.episode_spec_path:
|
| 25 |
+
episode_specs = json.loads(Path(args.episode_spec_path).read_text(encoding="utf-8"))
|
| 26 |
+
|
| 27 |
+
dataset_bundle = collect_teacher_dataset(
|
| 28 |
+
proxy_names=args.proxies,
|
| 29 |
+
episodes_per_proxy=args.episodes_per_proxy,
|
| 30 |
+
resolution=args.resolution,
|
| 31 |
+
seed=args.seed,
|
| 32 |
+
chunk_horizon=args.chunk_horizon,
|
| 33 |
+
rollout_horizon=args.rollout_horizon,
|
| 34 |
+
history_steps=args.history_steps,
|
| 35 |
+
planner_candidates=args.planner_candidates,
|
| 36 |
+
episode_specs=episode_specs,
|
| 37 |
+
)
|
| 38 |
+
output_path = save_teacher_dataset(Path(args.output_path), dataset_bundle)
|
| 39 |
+
payload = {
|
| 40 |
+
"output_path": str(output_path),
|
| 41 |
+
"resolution": dataset_bundle["resolution"],
|
| 42 |
+
"num_samples": len(dataset_bundle["samples"]),
|
| 43 |
+
"num_episode_specs": len(dataset_bundle.get("episode_specs", [])),
|
| 44 |
+
"summary": dataset_bundle["summary"],
|
| 45 |
+
}
|
| 46 |
+
print(json.dumps(payload, indent=2))
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
if __name__ == "__main__":
|
| 50 |
+
main()
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/isaac_smoke.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
|
| 6 |
+
from sim_reveal.isaac_wrapper import IsaacRevealRuntime
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def main() -> None:
|
| 10 |
+
parser = argparse.ArgumentParser()
|
| 11 |
+
parser.add_argument("--visible", action="store_true")
|
| 12 |
+
args = parser.parse_args()
|
| 13 |
+
|
| 14 |
+
runtime = IsaacRevealRuntime(headless=not args.visible)
|
| 15 |
+
try:
|
| 16 |
+
import isaacsim
|
| 17 |
+
|
| 18 |
+
payload = {
|
| 19 |
+
"headless": not args.visible,
|
| 20 |
+
"isaacsim_version": getattr(isaacsim, "__version__", "unknown"),
|
| 21 |
+
"status": "ok",
|
| 22 |
+
}
|
| 23 |
+
print(json.dumps(payload, indent=2))
|
| 24 |
+
finally:
|
| 25 |
+
runtime.close()
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
if __name__ == "__main__":
|
| 29 |
+
main()
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/isaac_wrapper.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@dataclass
|
| 7 |
+
class IsaacRevealRuntime:
|
| 8 |
+
headless: bool = True
|
| 9 |
+
|
| 10 |
+
def __post_init__(self) -> None:
|
| 11 |
+
from isaacsim import SimulationApp
|
| 12 |
+
|
| 13 |
+
self._simulation_app = SimulationApp({"headless": self.headless})
|
| 14 |
+
|
| 15 |
+
def close(self) -> None:
|
| 16 |
+
self._simulation_app.close()
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/labels.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
from sim_reveal.base import RevealState, SupportMode
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def privileged_state_to_reveal_labels(
|
| 11 |
+
state: dict[str, Any],
|
| 12 |
+
num_modes: int = 3,
|
| 13 |
+
num_templates: int = 32,
|
| 14 |
+
rollout_horizon: int = 5,
|
| 15 |
+
) -> RevealState:
|
| 16 |
+
support_mode = int(state["support_mode"])
|
| 17 |
+
support_logits = np.full((num_modes,), -4.0, dtype=np.float32)
|
| 18 |
+
support_logits[support_mode] = 4.0
|
| 19 |
+
|
| 20 |
+
corridor = np.asarray(state["corridor_feasible"], dtype=np.float32)
|
| 21 |
+
if corridor.shape != (num_modes, num_templates):
|
| 22 |
+
raise ValueError(
|
| 23 |
+
f"Expected corridor_feasible shape {(num_modes, num_templates)}, got {corridor.shape}"
|
| 24 |
+
)
|
| 25 |
+
corridor_logits = np.where(corridor > 0.5, 4.0, -4.0).astype(np.float32)
|
| 26 |
+
|
| 27 |
+
persistence = np.asarray(state["persistence_horizon"], dtype=np.float32)
|
| 28 |
+
if persistence.shape != (num_modes,):
|
| 29 |
+
raise ValueError(f"Expected persistence_horizon shape {(num_modes,)}, got {persistence.shape}")
|
| 30 |
+
persistence = np.clip(persistence, 0.0, float(rollout_horizon))
|
| 31 |
+
|
| 32 |
+
disturbance = np.asarray([state["disturbance_cost"]], dtype=np.float32)
|
| 33 |
+
belief_map = state.get("belief_map")
|
| 34 |
+
if belief_map is not None:
|
| 35 |
+
belief_map = np.asarray(belief_map, dtype=np.float32)
|
| 36 |
+
|
| 37 |
+
return RevealState(
|
| 38 |
+
support_mode_logits=support_logits,
|
| 39 |
+
corridor_logits=corridor_logits,
|
| 40 |
+
persistence_horizon=persistence,
|
| 41 |
+
disturbance_cost=disturbance,
|
| 42 |
+
belief_map=belief_map,
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def reocclusion_rate(corridor_open_history: np.ndarray) -> float:
|
| 47 |
+
corridor_open_history = np.asarray(corridor_open_history, dtype=np.float32)
|
| 48 |
+
if corridor_open_history.ndim != 1:
|
| 49 |
+
raise ValueError("corridor_open_history must be 1D.")
|
| 50 |
+
if corridor_open_history.size < 2:
|
| 51 |
+
return 0.0
|
| 52 |
+
open_then_closed = np.logical_and(corridor_open_history[:-1] > 0.5, corridor_open_history[1:] <= 0.5)
|
| 53 |
+
return float(open_then_closed.mean())
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def infer_support_mode_from_flags(holding: bool, transferred: bool) -> SupportMode:
|
| 57 |
+
if holding:
|
| 58 |
+
return SupportMode.HOLD
|
| 59 |
+
if transferred:
|
| 60 |
+
return SupportMode.TRANSFER
|
| 61 |
+
return SupportMode.PASSIVE
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/procedural_envs.py
ADDED
|
@@ -0,0 +1,1389 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Any
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
from sim_reveal.base import RevealProxyConfig, SupportMode
|
| 9 |
+
from sim_reveal.proxy_specs import (
|
| 10 |
+
BAG_PROXY,
|
| 11 |
+
CLOTH_PROXY,
|
| 12 |
+
CRITICAL_STRESS_BY_TASK_NAME,
|
| 13 |
+
FOLIAGE_PROXY,
|
| 14 |
+
SPRINT_DIFFICULTY_BINS,
|
| 15 |
+
task_id_from_task_name,
|
| 16 |
+
task_name_from_proxy_name,
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@dataclass(frozen=True)
|
| 21 |
+
class ProxyDynamics:
|
| 22 |
+
hold_decay: float
|
| 23 |
+
transfer_decay: float
|
| 24 |
+
passive_decay: float
|
| 25 |
+
disturbance_gain: float
|
| 26 |
+
settle_rate: float
|
| 27 |
+
desired_opening: float
|
| 28 |
+
preferred_mode: SupportMode
|
| 29 |
+
transfer_support_factor: float
|
| 30 |
+
passive_support_factor: float
|
| 31 |
+
visibility_bias: float
|
| 32 |
+
retrieve_visibility_threshold: float
|
| 33 |
+
palette: tuple[float, float, float]
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
PROXY_CONFIGS: dict[str, RevealProxyConfig] = {
|
| 37 |
+
FOLIAGE_PROXY.name: FOLIAGE_PROXY,
|
| 38 |
+
BAG_PROXY.name: BAG_PROXY,
|
| 39 |
+
CLOTH_PROXY.name: CLOTH_PROXY,
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
PROXY_DYNAMICS: dict[str, ProxyDynamics] = {
|
| 43 |
+
FOLIAGE_PROXY.name: ProxyDynamics(
|
| 44 |
+
hold_decay=0.02,
|
| 45 |
+
transfer_decay=0.07,
|
| 46 |
+
passive_decay=0.15,
|
| 47 |
+
disturbance_gain=0.06,
|
| 48 |
+
settle_rate=0.03,
|
| 49 |
+
desired_opening=0.60,
|
| 50 |
+
preferred_mode=SupportMode.HOLD,
|
| 51 |
+
transfer_support_factor=0.76,
|
| 52 |
+
passive_support_factor=0.42,
|
| 53 |
+
visibility_bias=0.03,
|
| 54 |
+
retrieve_visibility_threshold=0.42,
|
| 55 |
+
palette=(0.16, 0.30, 0.12),
|
| 56 |
+
),
|
| 57 |
+
BAG_PROXY.name: ProxyDynamics(
|
| 58 |
+
hold_decay=0.04,
|
| 59 |
+
transfer_decay=0.03,
|
| 60 |
+
passive_decay=0.12,
|
| 61 |
+
disturbance_gain=0.05,
|
| 62 |
+
settle_rate=0.02,
|
| 63 |
+
desired_opening=0.68,
|
| 64 |
+
preferred_mode=SupportMode.TRANSFER,
|
| 65 |
+
transfer_support_factor=0.96,
|
| 66 |
+
passive_support_factor=0.55,
|
| 67 |
+
visibility_bias=0.06,
|
| 68 |
+
retrieve_visibility_threshold=0.48,
|
| 69 |
+
palette=(0.26, 0.17, 0.10),
|
| 70 |
+
),
|
| 71 |
+
CLOTH_PROXY.name: ProxyDynamics(
|
| 72 |
+
hold_decay=0.03,
|
| 73 |
+
transfer_decay=0.05,
|
| 74 |
+
passive_decay=0.04,
|
| 75 |
+
disturbance_gain=0.04,
|
| 76 |
+
settle_rate=0.04,
|
| 77 |
+
desired_opening=0.50,
|
| 78 |
+
preferred_mode=SupportMode.PASSIVE,
|
| 79 |
+
transfer_support_factor=0.82,
|
| 80 |
+
passive_support_factor=0.90,
|
| 81 |
+
visibility_bias=0.08,
|
| 82 |
+
retrieve_visibility_threshold=0.38,
|
| 83 |
+
palette=(0.24, 0.24, 0.29),
|
| 84 |
+
),
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
PROXY_GOALS = {
|
| 88 |
+
FOLIAGE_PROXY.name: "create a gap in the foliage and retrieve the target",
|
| 89 |
+
BAG_PROXY.name: "open the bag mouth and retrieve the target object",
|
| 90 |
+
CLOTH_PROXY.name: "lift the top layer enough to retrieve the hidden object",
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def default_camera_matrices(
|
| 95 |
+
pose_jitter: float = 0.0,
|
| 96 |
+
focal_jitter: float = 0.0,
|
| 97 |
+
lateral_skew: float = 0.0,
|
| 98 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 99 |
+
intrinsics = np.asarray(
|
| 100 |
+
[
|
| 101 |
+
[[140.0, 0.0, 48.0], [0.0, 140.0, 48.0], [0.0, 0.0, 1.0]],
|
| 102 |
+
[[135.0, 0.0, 48.0], [0.0, 135.0, 48.0], [0.0, 0.0, 1.0]],
|
| 103 |
+
[[135.0, 0.0, 48.0], [0.0, 135.0, 48.0], [0.0, 0.0, 1.0]],
|
| 104 |
+
],
|
| 105 |
+
dtype=np.float32,
|
| 106 |
+
)
|
| 107 |
+
extrinsics = np.asarray(
|
| 108 |
+
[
|
| 109 |
+
np.eye(4, dtype=np.float32),
|
| 110 |
+
[[1.0, 0.0, 0.0, -0.18], [0.0, 1.0, 0.0, 0.04], [0.0, 0.0, 1.0, 0.10], [0.0, 0.0, 0.0, 1.0]],
|
| 111 |
+
[[1.0, 0.0, 0.0, 0.18], [0.0, 1.0, 0.0, 0.04], [0.0, 0.0, 1.0, 0.10], [0.0, 0.0, 0.0, 1.0]],
|
| 112 |
+
],
|
| 113 |
+
dtype=np.float32,
|
| 114 |
+
)
|
| 115 |
+
if pose_jitter != 0.0:
|
| 116 |
+
extrinsics[1, 0, 3] -= 0.6 * pose_jitter
|
| 117 |
+
extrinsics[2, 0, 3] += 0.6 * pose_jitter
|
| 118 |
+
extrinsics[1, 1, 3] += 0.25 * pose_jitter + lateral_skew
|
| 119 |
+
extrinsics[2, 1, 3] += 0.25 * pose_jitter - lateral_skew
|
| 120 |
+
extrinsics[1, 2, 3] += 0.15 * pose_jitter
|
| 121 |
+
extrinsics[2, 2, 3] += 0.10 * pose_jitter
|
| 122 |
+
if focal_jitter != 0.0:
|
| 123 |
+
intrinsics[0, 0, 0] *= 1.0 + 0.10 * focal_jitter
|
| 124 |
+
intrinsics[0, 1, 1] *= 1.0 - 0.05 * focal_jitter
|
| 125 |
+
intrinsics[1, 0, 0] *= 1.0 - 0.08 * focal_jitter
|
| 126 |
+
intrinsics[2, 1, 1] *= 1.0 + 0.08 * focal_jitter
|
| 127 |
+
intrinsics[1, 0, 2] += 3.0 * focal_jitter + 6.0 * lateral_skew
|
| 128 |
+
intrinsics[2, 0, 2] -= 3.0 * focal_jitter - 6.0 * lateral_skew
|
| 129 |
+
return intrinsics, extrinsics
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def available_proxy_names() -> tuple[str, ...]:
|
| 133 |
+
return tuple(PROXY_CONFIGS.keys())
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def make_proxy_env(
|
| 137 |
+
proxy_name: str,
|
| 138 |
+
resolution: int = 96,
|
| 139 |
+
seed: int = 0,
|
| 140 |
+
num_templates: int = 32,
|
| 141 |
+
rollout_horizon: int = 5,
|
| 142 |
+
max_steps: int | None = None,
|
| 143 |
+
stress_slice: str = "nominal",
|
| 144 |
+
difficulty_bin: str = "medium",
|
| 145 |
+
) -> "ProceduralRevealEnv":
|
| 146 |
+
return ProceduralRevealEnv(
|
| 147 |
+
proxy_name=proxy_name,
|
| 148 |
+
resolution=resolution,
|
| 149 |
+
seed=seed,
|
| 150 |
+
num_templates=num_templates,
|
| 151 |
+
rollout_horizon=rollout_horizon,
|
| 152 |
+
max_steps=max_steps,
|
| 153 |
+
stress_slice=stress_slice,
|
| 154 |
+
difficulty_bin=difficulty_bin,
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
class ProceduralRevealEnv:
|
| 159 |
+
camera_names = ("front", "wrist_left", "wrist_right")
|
| 160 |
+
|
| 161 |
+
def __init__(
|
| 162 |
+
self,
|
| 163 |
+
proxy_name: str,
|
| 164 |
+
resolution: int = 96,
|
| 165 |
+
seed: int = 0,
|
| 166 |
+
num_templates: int = 32,
|
| 167 |
+
rollout_horizon: int = 5,
|
| 168 |
+
max_steps: int | None = None,
|
| 169 |
+
stress_slice: str = "nominal",
|
| 170 |
+
difficulty_bin: str = "medium",
|
| 171 |
+
) -> None:
|
| 172 |
+
if proxy_name not in PROXY_CONFIGS:
|
| 173 |
+
raise KeyError(f"Unknown proxy: {proxy_name}")
|
| 174 |
+
if difficulty_bin not in SPRINT_DIFFICULTY_BINS:
|
| 175 |
+
raise ValueError(f"Unsupported difficulty bin: {difficulty_bin}")
|
| 176 |
+
self.proxy = PROXY_CONFIGS[proxy_name]
|
| 177 |
+
self.dynamics = PROXY_DYNAMICS[proxy_name]
|
| 178 |
+
self.proxy_name = proxy_name
|
| 179 |
+
self.task_name = task_name_from_proxy_name(proxy_name)
|
| 180 |
+
self.task_id = task_id_from_task_name(self.task_name)
|
| 181 |
+
self.critical_stress = CRITICAL_STRESS_BY_TASK_NAME[self.task_name]
|
| 182 |
+
self.stress_slice = str(stress_slice)
|
| 183 |
+
self.difficulty_bin = str(difficulty_bin)
|
| 184 |
+
self.resolution = resolution
|
| 185 |
+
self.num_templates = num_templates
|
| 186 |
+
self.rollout_horizon = rollout_horizon
|
| 187 |
+
self.max_steps = max_steps or self.proxy.max_steps
|
| 188 |
+
self.rng = np.random.default_rng(seed)
|
| 189 |
+
self.reset(seed=seed)
|
| 190 |
+
|
| 191 |
+
def clone_state(self) -> dict[str, Any]:
|
| 192 |
+
return {
|
| 193 |
+
"step_count": self.step_count,
|
| 194 |
+
"opening": self.opening,
|
| 195 |
+
"disturbance": self.disturbance,
|
| 196 |
+
"target_template": self.target_template,
|
| 197 |
+
"target_depth": self.target_depth,
|
| 198 |
+
"target_center": self.target_center,
|
| 199 |
+
"target_radius": self.target_radius,
|
| 200 |
+
"texture_phase": self.texture_phase,
|
| 201 |
+
"texture_scale": self.texture_scale,
|
| 202 |
+
"view_bias": self.view_bias,
|
| 203 |
+
"target_intensity": self.target_intensity,
|
| 204 |
+
"holding": self.holding,
|
| 205 |
+
"transferred": self.transferred,
|
| 206 |
+
"retrieved": self.retrieved,
|
| 207 |
+
"actor_progress": self.actor_progress,
|
| 208 |
+
"last_actor_template": self.last_actor_template,
|
| 209 |
+
"visibility_trace": list(self.visibility_trace),
|
| 210 |
+
"corridor_trace": list(self.corridor_trace),
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
def restore_state(self, state: dict[str, Any]) -> None:
|
| 214 |
+
self.step_count = int(state["step_count"])
|
| 215 |
+
self.opening = float(state["opening"])
|
| 216 |
+
self.disturbance = float(state["disturbance"])
|
| 217 |
+
self.target_template = int(state["target_template"])
|
| 218 |
+
self.target_depth = float(state["target_depth"])
|
| 219 |
+
self.target_center = float(state["target_center"])
|
| 220 |
+
self.target_radius = float(state["target_radius"])
|
| 221 |
+
self.texture_phase = float(state["texture_phase"])
|
| 222 |
+
self.texture_scale = float(state["texture_scale"])
|
| 223 |
+
self.view_bias = float(state["view_bias"])
|
| 224 |
+
self.target_intensity = float(state["target_intensity"])
|
| 225 |
+
self.holding = bool(state["holding"])
|
| 226 |
+
self.transferred = bool(state["transferred"])
|
| 227 |
+
self.retrieved = bool(state["retrieved"])
|
| 228 |
+
self.actor_progress = float(state["actor_progress"])
|
| 229 |
+
self.last_actor_template = int(state["last_actor_template"])
|
| 230 |
+
self.visibility_trace = list(state["visibility_trace"])
|
| 231 |
+
self.corridor_trace = list(state["corridor_trace"])
|
| 232 |
+
|
| 233 |
+
def _difficulty_ranges(self) -> dict[str, tuple[float, float]]:
|
| 234 |
+
if self.difficulty_bin == "hard":
|
| 235 |
+
return {
|
| 236 |
+
"opening": (0.07, 0.18),
|
| 237 |
+
"disturbance": (0.08, 0.18),
|
| 238 |
+
"target_depth": (0.28, 0.52),
|
| 239 |
+
"target_radius": (0.018, 0.030),
|
| 240 |
+
"view_bias": (-0.18, 0.18),
|
| 241 |
+
}
|
| 242 |
+
return {
|
| 243 |
+
"opening": (0.12, 0.24),
|
| 244 |
+
"disturbance": (0.03, 0.12),
|
| 245 |
+
"target_depth": (0.14, 0.40),
|
| 246 |
+
"target_radius": (0.022, 0.036),
|
| 247 |
+
"view_bias": (-0.10, 0.10),
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
def _stress_parameters(self) -> dict[str, float]:
|
| 251 |
+
params = {
|
| 252 |
+
"reocclusion_bias": 0.0,
|
| 253 |
+
"closure_scale": 1.0,
|
| 254 |
+
"disturbance_gain_scale": 1.0,
|
| 255 |
+
"corridor_scale": 1.0,
|
| 256 |
+
"support_stability_penalty": 0.0,
|
| 257 |
+
"camera_pose_jitter": 0.0,
|
| 258 |
+
"focal_jitter": 0.0,
|
| 259 |
+
"lateral_skew": 0.0,
|
| 260 |
+
"collateral_bias": 0.0,
|
| 261 |
+
"opening_shift": 0.0,
|
| 262 |
+
"disturbance_shift": 0.0,
|
| 263 |
+
"depth_shift": 0.0,
|
| 264 |
+
"view_bias_scale": 1.0,
|
| 265 |
+
}
|
| 266 |
+
if self.stress_slice == "high_reocclusion":
|
| 267 |
+
params.update(
|
| 268 |
+
{
|
| 269 |
+
"reocclusion_bias": 0.18,
|
| 270 |
+
"closure_scale": 1.22,
|
| 271 |
+
"disturbance_gain_scale": 1.12,
|
| 272 |
+
"opening_shift": -0.03,
|
| 273 |
+
}
|
| 274 |
+
)
|
| 275 |
+
elif self.stress_slice == "camera_perturbation":
|
| 276 |
+
params.update(
|
| 277 |
+
{
|
| 278 |
+
"camera_pose_jitter": 1.0 if self.difficulty_bin == "hard" else 0.65,
|
| 279 |
+
"focal_jitter": 0.9 if self.difficulty_bin == "hard" else 0.55,
|
| 280 |
+
"lateral_skew": 0.06 if self.proxy_name == BAG_PROXY.name else 0.02,
|
| 281 |
+
"view_bias_scale": 1.75 if self.difficulty_bin == "hard" else 1.4,
|
| 282 |
+
}
|
| 283 |
+
)
|
| 284 |
+
elif self.stress_slice == "tight_corridor_high_collateral":
|
| 285 |
+
params.update(
|
| 286 |
+
{
|
| 287 |
+
"corridor_scale": 0.76,
|
| 288 |
+
"disturbance_gain_scale": 1.25,
|
| 289 |
+
"collateral_bias": 0.14,
|
| 290 |
+
"support_stability_penalty": 0.08,
|
| 291 |
+
"opening_shift": -0.04,
|
| 292 |
+
}
|
| 293 |
+
)
|
| 294 |
+
elif self.stress_slice == "one_sided_slip":
|
| 295 |
+
params.update(
|
| 296 |
+
{
|
| 297 |
+
"corridor_scale": 0.84,
|
| 298 |
+
"disturbance_gain_scale": 1.10,
|
| 299 |
+
"reocclusion_bias": 0.10,
|
| 300 |
+
"camera_pose_jitter": 0.45,
|
| 301 |
+
"focal_jitter": 0.35,
|
| 302 |
+
"lateral_skew": 0.10 if self.difficulty_bin == "hard" else 0.06,
|
| 303 |
+
}
|
| 304 |
+
)
|
| 305 |
+
elif self.stress_slice == "fold_sensitive_long_persistence":
|
| 306 |
+
params.update(
|
| 307 |
+
{
|
| 308 |
+
"disturbance_gain_scale": 1.18,
|
| 309 |
+
"support_stability_penalty": 0.12,
|
| 310 |
+
"reocclusion_bias": 0.08,
|
| 311 |
+
"collateral_bias": 0.12,
|
| 312 |
+
"depth_shift": 0.04,
|
| 313 |
+
}
|
| 314 |
+
)
|
| 315 |
+
return params
|
| 316 |
+
|
| 317 |
+
def episode_metadata(self) -> dict[str, Any]:
|
| 318 |
+
return {
|
| 319 |
+
"proxy_name": self.proxy_name,
|
| 320 |
+
"task_name": self.task_name,
|
| 321 |
+
"task_id": self.task_id,
|
| 322 |
+
"stress_slice": self.stress_slice,
|
| 323 |
+
"difficulty_bin": self.difficulty_bin,
|
| 324 |
+
"camera_pose_jitter": float(self.camera_pose_jitter),
|
| 325 |
+
"focal_jitter": float(self.focal_jitter),
|
| 326 |
+
"lateral_skew": float(self.lateral_skew),
|
| 327 |
+
"reocclusion_bias": float(self.reocclusion_bias),
|
| 328 |
+
"closure_scale": float(self.closure_scale),
|
| 329 |
+
"disturbance_gain_scale": float(self.disturbance_gain_scale),
|
| 330 |
+
"corridor_scale": float(self.corridor_scale),
|
| 331 |
+
"support_stability_penalty": float(self.support_stability_penalty),
|
| 332 |
+
"collateral_bias": float(self.collateral_bias),
|
| 333 |
+
}
|
| 334 |
+
|
| 335 |
+
def reset(self, seed: int | None = None) -> tuple[dict[str, Any], dict[str, Any]]:
|
| 336 |
+
if seed is not None:
|
| 337 |
+
self.rng = np.random.default_rng(seed)
|
| 338 |
+
ranges = self._difficulty_ranges()
|
| 339 |
+
stress = self._stress_parameters()
|
| 340 |
+
self.reocclusion_bias = float(stress["reocclusion_bias"])
|
| 341 |
+
self.closure_scale = float(stress["closure_scale"])
|
| 342 |
+
self.disturbance_gain_scale = float(stress["disturbance_gain_scale"])
|
| 343 |
+
self.corridor_scale = float(stress["corridor_scale"])
|
| 344 |
+
self.support_stability_penalty = float(stress["support_stability_penalty"])
|
| 345 |
+
self.camera_pose_jitter = float(stress["camera_pose_jitter"])
|
| 346 |
+
self.focal_jitter = float(stress["focal_jitter"])
|
| 347 |
+
self.lateral_skew = float(stress["lateral_skew"])
|
| 348 |
+
self.collateral_bias = float(stress["collateral_bias"])
|
| 349 |
+
self.step_count = 0
|
| 350 |
+
self.opening = float(
|
| 351 |
+
np.clip(
|
| 352 |
+
self.rng.uniform(*ranges["opening"]) + stress["opening_shift"],
|
| 353 |
+
0.03,
|
| 354 |
+
0.95,
|
| 355 |
+
)
|
| 356 |
+
)
|
| 357 |
+
self.disturbance = float(
|
| 358 |
+
np.clip(
|
| 359 |
+
self.rng.uniform(*ranges["disturbance"]) + stress["disturbance_shift"],
|
| 360 |
+
0.0,
|
| 361 |
+
1.0,
|
| 362 |
+
)
|
| 363 |
+
)
|
| 364 |
+
self.target_template = int(self.rng.integers(4, self.num_templates - 4))
|
| 365 |
+
self.target_depth = float(
|
| 366 |
+
np.clip(
|
| 367 |
+
self.rng.uniform(*ranges["target_depth"]) + stress["depth_shift"],
|
| 368 |
+
0.05,
|
| 369 |
+
0.85,
|
| 370 |
+
)
|
| 371 |
+
)
|
| 372 |
+
base_center = self.target_template / float(max(1, self.num_templates - 1))
|
| 373 |
+
self.target_center = float(np.clip(base_center + self.rng.uniform(-0.01, 0.01), 0.06, 0.94))
|
| 374 |
+
self.target_radius = float(self.rng.uniform(*ranges["target_radius"]))
|
| 375 |
+
self.texture_phase = float(self.rng.uniform(0.0, 2.0 * np.pi))
|
| 376 |
+
self.texture_scale = float(self.rng.uniform(0.85, 1.25))
|
| 377 |
+
self.view_bias = float(
|
| 378 |
+
np.clip(
|
| 379 |
+
self.rng.uniform(*ranges["view_bias"]) * stress["view_bias_scale"],
|
| 380 |
+
-0.30,
|
| 381 |
+
0.30,
|
| 382 |
+
)
|
| 383 |
+
)
|
| 384 |
+
self.target_intensity = float(self.rng.uniform(0.45, 0.8))
|
| 385 |
+
self.holding = False
|
| 386 |
+
self.transferred = False
|
| 387 |
+
self.retrieved = False
|
| 388 |
+
self.actor_progress = 0.0
|
| 389 |
+
self.last_actor_template = self.target_template
|
| 390 |
+
privileged_state = self.get_privileged_state()
|
| 391 |
+
self.visibility_trace = [float(privileged_state["visibility"])]
|
| 392 |
+
self.corridor_trace = [float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any())]
|
| 393 |
+
return self.get_observation(privileged_state), privileged_state
|
| 394 |
+
|
| 395 |
+
def _normalized_template(self, template_index: int) -> float:
|
| 396 |
+
return (template_index / float(self.num_templates - 1)) * 2.0 - 1.0
|
| 397 |
+
|
| 398 |
+
def _current_support_mode(self) -> SupportMode:
|
| 399 |
+
if self.holding:
|
| 400 |
+
return SupportMode.HOLD
|
| 401 |
+
if self.transferred:
|
| 402 |
+
return SupportMode.TRANSFER
|
| 403 |
+
return SupportMode.PASSIVE
|
| 404 |
+
|
| 405 |
+
def _mode_from_action(self, action: np.ndarray) -> SupportMode:
|
| 406 |
+
hold_score = (np.tanh(float(action[6])) + 1.0) * 0.5
|
| 407 |
+
transfer_score = (np.tanh(float(action[1])) + 1.0) * 0.5
|
| 408 |
+
passive_score = (np.tanh(float(action[2])) + 1.0) * 0.5
|
| 409 |
+
if hold_score >= max(transfer_score, passive_score):
|
| 410 |
+
return SupportMode.HOLD
|
| 411 |
+
if transfer_score >= passive_score and self.opening >= 0.32:
|
| 412 |
+
return SupportMode.TRANSFER
|
| 413 |
+
return SupportMode.PASSIVE
|
| 414 |
+
|
| 415 |
+
def _visibility(self, opening: float | None = None, disturbance: float | None = None) -> float:
|
| 416 |
+
opening = self.opening if opening is None else float(opening)
|
| 417 |
+
disturbance = self.disturbance if disturbance is None else float(disturbance)
|
| 418 |
+
visibility = (
|
| 419 |
+
1.35 * opening
|
| 420 |
+
- 0.58 * disturbance
|
| 421 |
+
- 0.25 * self.target_depth
|
| 422 |
+
+ self.dynamics.visibility_bias
|
| 423 |
+
)
|
| 424 |
+
return float(np.clip(visibility, 0.0, 1.0))
|
| 425 |
+
|
| 426 |
+
def _mode_factor(self, mode: SupportMode) -> float:
|
| 427 |
+
if mode == SupportMode.HOLD:
|
| 428 |
+
return 1.0
|
| 429 |
+
if mode == SupportMode.TRANSFER:
|
| 430 |
+
return self.dynamics.transfer_support_factor
|
| 431 |
+
return self.dynamics.passive_support_factor
|
| 432 |
+
|
| 433 |
+
def _mode_decay(self, mode: SupportMode) -> float:
|
| 434 |
+
if mode == SupportMode.HOLD:
|
| 435 |
+
return self.dynamics.hold_decay
|
| 436 |
+
if mode == SupportMode.TRANSFER:
|
| 437 |
+
return self.dynamics.transfer_decay
|
| 438 |
+
return self.dynamics.passive_decay
|
| 439 |
+
|
| 440 |
+
def _corridor_for_mode(
|
| 441 |
+
self,
|
| 442 |
+
mode: SupportMode,
|
| 443 |
+
opening: float | None = None,
|
| 444 |
+
disturbance: float | None = None,
|
| 445 |
+
) -> np.ndarray:
|
| 446 |
+
opening = self.opening if opening is None else float(opening)
|
| 447 |
+
disturbance = self.disturbance if disturbance is None else float(disturbance)
|
| 448 |
+
visibility = self._visibility(opening, disturbance)
|
| 449 |
+
effective = (
|
| 450 |
+
self.corridor_scale * opening * self._mode_factor(mode)
|
| 451 |
+
- 0.35 * disturbance
|
| 452 |
+
- 0.18 * self.target_depth
|
| 453 |
+
- 0.10 * self.collateral_bias
|
| 454 |
+
)
|
| 455 |
+
width = int(np.floor(max(0.0, effective) * 8.0))
|
| 456 |
+
corridor = np.zeros((self.num_templates,), dtype=np.float32)
|
| 457 |
+
if visibility < self.dynamics.retrieve_visibility_threshold * 0.7 or width <= 0:
|
| 458 |
+
return corridor
|
| 459 |
+
low = max(0, self.target_template - width)
|
| 460 |
+
high = min(self.num_templates, self.target_template + width + 1)
|
| 461 |
+
corridor[low:high] = 1.0
|
| 462 |
+
return corridor
|
| 463 |
+
|
| 464 |
+
def _persistence_for_mode(self, mode: SupportMode) -> float:
|
| 465 |
+
opening = self.opening
|
| 466 |
+
disturbance = self.disturbance
|
| 467 |
+
persisted = 0.0
|
| 468 |
+
for _ in range(self.rollout_horizon):
|
| 469 |
+
if self._corridor_for_mode(mode, opening, disturbance).any():
|
| 470 |
+
persisted += 1.0
|
| 471 |
+
else:
|
| 472 |
+
break
|
| 473 |
+
opening = float(np.clip(opening - self._mode_decay(mode) + (0.035 if mode == SupportMode.HOLD else 0.0), 0.0, 1.0))
|
| 474 |
+
disturbance = float(np.clip(disturbance * (1.0 - self.dynamics.settle_rate), 0.0, 1.0))
|
| 475 |
+
return persisted
|
| 476 |
+
|
| 477 |
+
def _belief_map(self, visibility: float) -> np.ndarray:
|
| 478 |
+
side = 32
|
| 479 |
+
x = np.linspace(0.0, 1.0, side, dtype=np.float32)
|
| 480 |
+
y = np.linspace(0.0, 1.0, side, dtype=np.float32)
|
| 481 |
+
yy, xx = np.meshgrid(y, x, indexing="ij")
|
| 482 |
+
center_x = self.target_template / float(self.num_templates - 1)
|
| 483 |
+
center_y = 0.72 - 0.25 * self.target_depth
|
| 484 |
+
sigma = 0.08 + 0.05 * (1.0 - visibility)
|
| 485 |
+
belief = np.exp(-(((xx - center_x) ** 2) + ((yy - center_y) ** 2)) / (2.0 * sigma**2))
|
| 486 |
+
belief *= visibility
|
| 487 |
+
return belief.astype(np.float32)
|
| 488 |
+
|
| 489 |
+
def _visibility_map(self, visibility: float) -> np.ndarray:
|
| 490 |
+
belief = self._belief_map(visibility)
|
| 491 |
+
gradient = np.linspace(0.65, 1.0, belief.shape[0], dtype=np.float32).reshape(-1, 1)
|
| 492 |
+
return np.clip(belief * gradient, 0.0, 1.0).astype(np.float32)
|
| 493 |
+
|
| 494 |
+
def _clearance_map(self, visibility: float) -> np.ndarray:
|
| 495 |
+
side = 32
|
| 496 |
+
x = np.linspace(0.0, 1.0, side, dtype=np.float32)
|
| 497 |
+
y = np.linspace(0.0, 1.0, side, dtype=np.float32)
|
| 498 |
+
yy, xx = np.meshgrid(y, x, indexing="ij")
|
| 499 |
+
corridor_width = np.clip(0.05 + 0.18 * self.opening - 0.10 * self.disturbance, 0.01, 0.28)
|
| 500 |
+
corridor = np.exp(-(((xx - self.target_center) ** 2) / max(1e-5, corridor_width**2)))
|
| 501 |
+
vertical = np.exp(-(((yy - (0.72 - 0.25 * self.target_depth)) ** 2) / 0.03))
|
| 502 |
+
left = np.clip(corridor * vertical * visibility * (0.92 - 0.15 * self.disturbance), 0.0, 1.0)
|
| 503 |
+
right = np.clip(corridor * vertical * visibility * (0.88 - 0.10 * self.disturbance), 0.0, 1.0)
|
| 504 |
+
return np.stack([left, right], axis=0).astype(np.float32)
|
| 505 |
+
|
| 506 |
+
def _occluder_contact_map(self) -> np.ndarray:
|
| 507 |
+
side = 32
|
| 508 |
+
x = np.linspace(0.0, 1.0, side, dtype=np.float32)
|
| 509 |
+
y = np.linspace(0.0, 1.0, side, dtype=np.float32)
|
| 510 |
+
yy, xx = np.meshgrid(y, x, indexing="ij")
|
| 511 |
+
gap_width = np.clip(0.03 + 0.16 * self.opening, 0.03, 0.24)
|
| 512 |
+
left_band = np.exp(-(((xx - (self.target_center - gap_width)) ** 2) / 0.0025))
|
| 513 |
+
right_band = np.exp(-(((xx - (self.target_center + gap_width)) ** 2) / 0.0025))
|
| 514 |
+
support = np.exp(-(((yy - 0.55) ** 2) / 0.04))
|
| 515 |
+
return np.clip((left_band + right_band) * support, 0.0, 1.0).astype(np.float32)
|
| 516 |
+
|
| 517 |
+
def _support_stability(self) -> float:
|
| 518 |
+
base = 1.0 - 0.45 * self.disturbance - 0.10 * max(0.0, self.opening - self.dynamics.desired_opening)
|
| 519 |
+
if self._current_support_mode() == self.dynamics.preferred_mode:
|
| 520 |
+
base += 0.08
|
| 521 |
+
base -= self.support_stability_penalty
|
| 522 |
+
return float(np.clip(base, 0.0, 1.0))
|
| 523 |
+
|
| 524 |
+
def _support_stability_map(self) -> np.ndarray:
|
| 525 |
+
return np.full((32, 32), self._support_stability(), dtype=np.float32)
|
| 526 |
+
|
| 527 |
+
def _reocclusion_target(self, persistence: np.ndarray) -> float:
|
| 528 |
+
current_mode = int(self._current_support_mode())
|
| 529 |
+
horizon_ratio = persistence[current_mode] / float(max(1, self.rollout_horizon))
|
| 530 |
+
return float(np.clip(1.0 - horizon_ratio + 0.35 * self.disturbance + self.reocclusion_bias, 0.0, 1.0))
|
| 531 |
+
|
| 532 |
+
def _phase_label(
|
| 533 |
+
self,
|
| 534 |
+
visibility: float,
|
| 535 |
+
corridor: np.ndarray,
|
| 536 |
+
persistence: np.ndarray,
|
| 537 |
+
disturbance_cost: float,
|
| 538 |
+
) -> int:
|
| 539 |
+
support_mode = int(self._current_support_mode())
|
| 540 |
+
corridor_ready = bool(corridor[support_mode, self.target_template] > 0.5)
|
| 541 |
+
persistence_ratio = persistence[support_mode] / float(max(1, self.rollout_horizon))
|
| 542 |
+
opening_ready = self.opening >= (0.75 * self.dynamics.desired_opening)
|
| 543 |
+
retrieve_ready = (
|
| 544 |
+
corridor_ready
|
| 545 |
+
and visibility >= self.dynamics.retrieve_visibility_threshold
|
| 546 |
+
and self.actor_progress >= 0.55
|
| 547 |
+
)
|
| 548 |
+
recovering = disturbance_cost >= 0.55 or (opening_ready and persistence_ratio < 0.35)
|
| 549 |
+
if retrieve_ready:
|
| 550 |
+
return 3
|
| 551 |
+
if recovering:
|
| 552 |
+
return 4
|
| 553 |
+
if opening_ready and persistence_ratio >= 0.6:
|
| 554 |
+
return 2
|
| 555 |
+
if self.opening < self.dynamics.desired_opening or visibility < self.dynamics.retrieve_visibility_threshold:
|
| 556 |
+
return 1
|
| 557 |
+
return 0
|
| 558 |
+
|
| 559 |
+
def _subgoal_progress(
|
| 560 |
+
self,
|
| 561 |
+
visibility: float,
|
| 562 |
+
corridor: np.ndarray,
|
| 563 |
+
persistence: np.ndarray,
|
| 564 |
+
) -> float:
|
| 565 |
+
support_mode = int(self._current_support_mode())
|
| 566 |
+
corridor_mass = float(corridor[support_mode].mean())
|
| 567 |
+
persistence_ratio = float(persistence[support_mode] / float(max(1, self.rollout_horizon)))
|
| 568 |
+
return float(
|
| 569 |
+
np.clip(
|
| 570 |
+
0.35 * self.opening
|
| 571 |
+
+ 0.25 * visibility
|
| 572 |
+
+ 0.20 * corridor_mass
|
| 573 |
+
+ 0.20 * persistence_ratio,
|
| 574 |
+
0.0,
|
| 575 |
+
1.0,
|
| 576 |
+
)
|
| 577 |
+
)
|
| 578 |
+
|
| 579 |
+
def _grasp_affordance_map(
|
| 580 |
+
self,
|
| 581 |
+
belief_map: np.ndarray,
|
| 582 |
+
visibility_map: np.ndarray,
|
| 583 |
+
clearance_map: np.ndarray,
|
| 584 |
+
) -> np.ndarray:
|
| 585 |
+
combined = belief_map * visibility_map * clearance_map.mean(axis=0)
|
| 586 |
+
return np.clip(combined * (1.0 - 0.35 * self.disturbance), 0.0, 1.0).astype(np.float32)
|
| 587 |
+
|
| 588 |
+
def get_privileged_state(self) -> dict[str, Any]:
|
| 589 |
+
support_mode = int(self._current_support_mode())
|
| 590 |
+
corridor = np.stack(
|
| 591 |
+
[self._corridor_for_mode(mode) for mode in SupportMode],
|
| 592 |
+
axis=0,
|
| 593 |
+
)
|
| 594 |
+
persistence = np.asarray([self._persistence_for_mode(mode) for mode in SupportMode], dtype=np.float32)
|
| 595 |
+
visibility = self._visibility()
|
| 596 |
+
disturbance_cost = float(np.clip(self.disturbance + 0.08 * max(0.0, self.opening - self.dynamics.desired_opening), 0.0, 1.0))
|
| 597 |
+
belief_map = self._belief_map(visibility)
|
| 598 |
+
visibility_map = self._visibility_map(visibility)
|
| 599 |
+
clearance_map = self._clearance_map(visibility)
|
| 600 |
+
occluder_contact_map = self._occluder_contact_map()
|
| 601 |
+
support_stability = self._support_stability()
|
| 602 |
+
support_stability_map = self._support_stability_map()
|
| 603 |
+
reocclusion_target = self._reocclusion_target(persistence)
|
| 604 |
+
reocclusion_map = np.full((32, 32), reocclusion_target, dtype=np.float32)
|
| 605 |
+
grasp_affordance_map = self._grasp_affordance_map(belief_map, visibility_map, clearance_map)
|
| 606 |
+
task_metrics: dict[str, float] = {}
|
| 607 |
+
if self.proxy_name == FOLIAGE_PROXY.name:
|
| 608 |
+
task_metrics = {
|
| 609 |
+
"gap_width": float(np.clip(0.03 + 0.16 * self.opening, 0.03, 0.24)),
|
| 610 |
+
"damage_proxy": disturbance_cost,
|
| 611 |
+
"release_collapse_rate": reocclusion_target,
|
| 612 |
+
"target_visibility_confidence": visibility,
|
| 613 |
+
}
|
| 614 |
+
elif self.proxy_name == BAG_PROXY.name:
|
| 615 |
+
task_metrics = {
|
| 616 |
+
"mouth_aperture": float(self.opening),
|
| 617 |
+
"hold_quality": support_stability,
|
| 618 |
+
"rim_slip_risk": reocclusion_target,
|
| 619 |
+
"insertable_actor_corridor": float(corridor[support_mode, self.target_template]),
|
| 620 |
+
}
|
| 621 |
+
elif self.proxy_name == CLOTH_PROXY.name:
|
| 622 |
+
task_metrics = {
|
| 623 |
+
"layer_separation_quality": float(np.clip(self.opening * (1.0 - 0.20 * self.disturbance), 0.0, 1.0)),
|
| 624 |
+
"fold_preservation": float(np.clip(1.0 - disturbance_cost, 0.0, 1.0)),
|
| 625 |
+
"insertion_corridor": float(corridor[support_mode, self.target_template]),
|
| 626 |
+
"top_layer_stability": support_stability,
|
| 627 |
+
"lift_too_much_risk": float(np.clip(max(0.0, self.opening - self.dynamics.desired_opening), 0.0, 1.0)),
|
| 628 |
+
}
|
| 629 |
+
phase_label = self._phase_label(
|
| 630 |
+
visibility=visibility,
|
| 631 |
+
corridor=corridor,
|
| 632 |
+
persistence=persistence,
|
| 633 |
+
disturbance_cost=disturbance_cost,
|
| 634 |
+
)
|
| 635 |
+
subgoal_progress = self._subgoal_progress(
|
| 636 |
+
visibility=visibility,
|
| 637 |
+
corridor=corridor,
|
| 638 |
+
persistence=persistence,
|
| 639 |
+
)
|
| 640 |
+
return {
|
| 641 |
+
"support_mode": support_mode,
|
| 642 |
+
"corridor_feasible": corridor,
|
| 643 |
+
"persistence_horizon": persistence,
|
| 644 |
+
"disturbance_cost": disturbance_cost,
|
| 645 |
+
"belief_map": belief_map,
|
| 646 |
+
"visibility_map": visibility_map,
|
| 647 |
+
"clearance_map": clearance_map,
|
| 648 |
+
"occluder_contact_map": occluder_contact_map,
|
| 649 |
+
"grasp_affordance_map": grasp_affordance_map,
|
| 650 |
+
"support_stability": support_stability,
|
| 651 |
+
"support_stability_map": support_stability_map,
|
| 652 |
+
"reocclusion_target": reocclusion_target,
|
| 653 |
+
"reocclusion_map": reocclusion_map,
|
| 654 |
+
"visibility": visibility,
|
| 655 |
+
"retrieval_success": bool(self.retrieved),
|
| 656 |
+
"target_template": self.target_template,
|
| 657 |
+
"phase_label": int(phase_label),
|
| 658 |
+
"subgoal_progress": float(subgoal_progress),
|
| 659 |
+
"task_name": self.task_name,
|
| 660 |
+
"task_id": self.task_id,
|
| 661 |
+
"stress_slice": self.stress_slice,
|
| 662 |
+
"difficulty_bin": self.difficulty_bin,
|
| 663 |
+
"episode_metadata": self.episode_metadata(),
|
| 664 |
+
**task_metrics,
|
| 665 |
+
}
|
| 666 |
+
|
| 667 |
+
def render_state(self, privileged_state: dict[str, Any] | None = None) -> dict[str, Any]:
|
| 668 |
+
return {
|
| 669 |
+
"opening": float(self.opening),
|
| 670 |
+
"disturbance": float(self.disturbance),
|
| 671 |
+
"target_center": float(self.target_center),
|
| 672 |
+
"target_depth": float(self.target_depth),
|
| 673 |
+
"target_radius": float(self.target_radius),
|
| 674 |
+
"texture_phase": float(self.texture_phase),
|
| 675 |
+
"texture_scale": float(self.texture_scale),
|
| 676 |
+
"view_bias": float(self.view_bias),
|
| 677 |
+
"target_intensity": float(self.target_intensity),
|
| 678 |
+
"step_fraction": float(self.step_count / max(1, self.max_steps)),
|
| 679 |
+
}
|
| 680 |
+
|
| 681 |
+
def _proprio(self, privileged_state: dict[str, Any]) -> np.ndarray:
|
| 682 |
+
features = np.zeros((32,), dtype=np.float32)
|
| 683 |
+
step_fraction = self.step_count / float(max(1, self.max_steps))
|
| 684 |
+
features[0] = step_fraction
|
| 685 |
+
features[1] = np.sin(np.pi * step_fraction)
|
| 686 |
+
features[2] = np.cos(np.pi * step_fraction)
|
| 687 |
+
return features
|
| 688 |
+
|
| 689 |
+
def get_observation(self, privileged_state: dict[str, Any] | None = None) -> dict[str, Any]:
|
| 690 |
+
privileged_state = privileged_state or self.get_privileged_state()
|
| 691 |
+
render_state = self.render_state(privileged_state)
|
| 692 |
+
images = render_views_from_state(
|
| 693 |
+
proxy_name=self.proxy_name,
|
| 694 |
+
render_state=render_state,
|
| 695 |
+
resolution=self.resolution,
|
| 696 |
+
num_templates=self.num_templates,
|
| 697 |
+
include_depth=True,
|
| 698 |
+
)
|
| 699 |
+
camera_intrinsics, camera_extrinsics = default_camera_matrices(
|
| 700 |
+
pose_jitter=self.camera_pose_jitter,
|
| 701 |
+
focal_jitter=self.focal_jitter,
|
| 702 |
+
lateral_skew=self.lateral_skew,
|
| 703 |
+
)
|
| 704 |
+
return {
|
| 705 |
+
"images": np.stack([images[camera] for camera in self.camera_names], axis=0),
|
| 706 |
+
"depths": np.stack([images[f"{camera}_depth"] for camera in self.camera_names], axis=0)[:, None, :, :],
|
| 707 |
+
"depth_valid": np.stack([images[f"{camera}_depth_valid"] for camera in self.camera_names], axis=0)[:, None, :, :],
|
| 708 |
+
"proprio": self._proprio(privileged_state),
|
| 709 |
+
"text": PROXY_GOALS[self.proxy_name],
|
| 710 |
+
"task_name": self.task_name,
|
| 711 |
+
"task_id": self.task_id,
|
| 712 |
+
"stress_slice": self.stress_slice,
|
| 713 |
+
"difficulty_bin": self.difficulty_bin,
|
| 714 |
+
"episode_metadata": self.episode_metadata(),
|
| 715 |
+
"camera_names": self.camera_names,
|
| 716 |
+
"camera_intrinsics": camera_intrinsics,
|
| 717 |
+
"camera_extrinsics": camera_extrinsics,
|
| 718 |
+
"camera_valid_mask": np.ones((len(self.camera_names),), dtype=np.float32),
|
| 719 |
+
}
|
| 720 |
+
|
| 721 |
+
def teacher_action(self) -> np.ndarray:
|
| 722 |
+
privileged_state = self.get_privileged_state()
|
| 723 |
+
preferred_mode = self.dynamics.preferred_mode
|
| 724 |
+
if self.opening < self.dynamics.desired_opening:
|
| 725 |
+
chosen_mode = SupportMode.HOLD
|
| 726 |
+
open_cmd = 0.95
|
| 727 |
+
elif privileged_state["persistence_horizon"][preferred_mode] >= 2.0:
|
| 728 |
+
chosen_mode = preferred_mode
|
| 729 |
+
open_cmd = 0.12
|
| 730 |
+
else:
|
| 731 |
+
chosen_mode = SupportMode.HOLD
|
| 732 |
+
open_cmd = 0.30
|
| 733 |
+
|
| 734 |
+
corridor = privileged_state["corridor_feasible"][int(chosen_mode)]
|
| 735 |
+
actor_ready = bool(corridor[self.target_template] > 0.5)
|
| 736 |
+
retrieve = (
|
| 737 |
+
actor_ready
|
| 738 |
+
and privileged_state["visibility"] >= self.dynamics.retrieve_visibility_threshold
|
| 739 |
+
and self.actor_progress >= 0.55
|
| 740 |
+
)
|
| 741 |
+
action = np.zeros((14,), dtype=np.float32)
|
| 742 |
+
action[0] = np.float32(open_cmd)
|
| 743 |
+
action[1] = np.float32(1.0 if chosen_mode == SupportMode.TRANSFER else -1.0)
|
| 744 |
+
action[2] = np.float32(1.0 if chosen_mode == SupportMode.PASSIVE else -1.0)
|
| 745 |
+
action[6] = np.float32(1.0 if chosen_mode == SupportMode.HOLD else -1.0)
|
| 746 |
+
action[7] = np.float32(self._normalized_template(self.target_template))
|
| 747 |
+
action[8] = np.float32(1.0 if actor_ready else 0.2)
|
| 748 |
+
action[13] = np.float32(1.0 if retrieve else -1.0)
|
| 749 |
+
return action
|
| 750 |
+
|
| 751 |
+
def _set_mode_bits(self, action: np.ndarray, mode: SupportMode) -> None:
|
| 752 |
+
action[1] = np.float32(1.0 if mode == SupportMode.TRANSFER else -1.0)
|
| 753 |
+
action[2] = np.float32(1.0 if mode == SupportMode.PASSIVE else -1.0)
|
| 754 |
+
action[6] = np.float32(1.0 if mode == SupportMode.HOLD else -1.0)
|
| 755 |
+
|
| 756 |
+
def macro_action_chunk(self, macro_name: str, chunk_horizon: int = 8) -> np.ndarray:
|
| 757 |
+
preferred_mode = self.dynamics.preferred_mode
|
| 758 |
+
hold_mode = SupportMode.HOLD
|
| 759 |
+
passive_mode = SupportMode.PASSIVE
|
| 760 |
+
target_index = self.target_template
|
| 761 |
+
left_index = max(0, target_index - 4)
|
| 762 |
+
right_index = min(self.num_templates - 1, target_index + 4)
|
| 763 |
+
wrong_index = 0 if target_index > (self.num_templates // 2) else self.num_templates - 1
|
| 764 |
+
|
| 765 |
+
chunk = np.zeros((chunk_horizon, 14), dtype=np.float32)
|
| 766 |
+
for step_idx in range(chunk_horizon):
|
| 767 |
+
action = self.teacher_action()
|
| 768 |
+
action[13] = np.float32(-1.0)
|
| 769 |
+
action[8] = np.float32(0.2)
|
| 770 |
+
self._set_mode_bits(action, preferred_mode)
|
| 771 |
+
|
| 772 |
+
if macro_name in {"widen_gap", "widen_mouth", "lift_edge", "separate_layer"}:
|
| 773 |
+
self._set_mode_bits(action, hold_mode)
|
| 774 |
+
action[0] = np.float32(0.95)
|
| 775 |
+
elif macro_name in {"maintain_gap", "maintain_mouth", "maintain_lift", "stabilize_fold", "pin_canopy"}:
|
| 776 |
+
self._set_mode_bits(action, preferred_mode)
|
| 777 |
+
action[0] = np.float32(0.12)
|
| 778 |
+
elif macro_name in {"sweep_left", "pin_left_rim"}:
|
| 779 |
+
self._set_mode_bits(action, hold_mode)
|
| 780 |
+
action[0] = np.float32(0.75)
|
| 781 |
+
action[7] = np.float32(self._normalized_template(left_index))
|
| 782 |
+
elif macro_name in {"sweep_right", "pin_right_rim"}:
|
| 783 |
+
self._set_mode_bits(action, hold_mode)
|
| 784 |
+
action[0] = np.float32(0.75)
|
| 785 |
+
action[7] = np.float32(self._normalized_template(right_index))
|
| 786 |
+
elif macro_name == "probe_inside":
|
| 787 |
+
self._set_mode_bits(action, preferred_mode)
|
| 788 |
+
action[0] = np.float32(0.10)
|
| 789 |
+
action[8] = np.float32(0.75)
|
| 790 |
+
elif macro_name == "insert_actor":
|
| 791 |
+
self._set_mode_bits(action, preferred_mode)
|
| 792 |
+
action[0] = np.float32(0.10)
|
| 793 |
+
action[8] = np.float32(1.0)
|
| 794 |
+
elif macro_name == "retrieve":
|
| 795 |
+
self._set_mode_bits(action, preferred_mode)
|
| 796 |
+
action[0] = np.float32(0.05)
|
| 797 |
+
action[8] = np.float32(1.0)
|
| 798 |
+
action[13] = np.float32(1.0)
|
| 799 |
+
elif macro_name == "premature_retrieve":
|
| 800 |
+
self._set_mode_bits(action, passive_mode)
|
| 801 |
+
action[0] = np.float32(-0.20)
|
| 802 |
+
action[8] = np.float32(1.0)
|
| 803 |
+
action[13] = np.float32(1.0)
|
| 804 |
+
elif macro_name in {"reveal_with_release", "foliage_immediate_reocclusion"}:
|
| 805 |
+
reveal_phase = step_idx < max(1, chunk_horizon // 2)
|
| 806 |
+
self._set_mode_bits(action, hold_mode if reveal_phase else passive_mode)
|
| 807 |
+
action[0] = np.float32(0.95 if reveal_phase else -0.35)
|
| 808 |
+
action[8] = np.float32(0.2)
|
| 809 |
+
elif macro_name in {"wrong_side_reveal", "wrong_edge_reveal", "wrong_layer_reveal"}:
|
| 810 |
+
self._set_mode_bits(action, hold_mode)
|
| 811 |
+
action[0] = np.float32(0.65)
|
| 812 |
+
action[7] = np.float32(self._normalized_template(wrong_index))
|
| 813 |
+
elif macro_name in {"over_disturbance", "cloth_lift_high"}:
|
| 814 |
+
self._set_mode_bits(action, passive_mode)
|
| 815 |
+
action[0] = np.float32(1.0)
|
| 816 |
+
action[8] = np.float32(1.0 if macro_name == "over_disturbance" else 0.2)
|
| 817 |
+
elif macro_name == "delayed_actor_entry":
|
| 818 |
+
self._set_mode_bits(action, preferred_mode)
|
| 819 |
+
action[0] = np.float32(0.10)
|
| 820 |
+
action[8] = np.float32(0.2 if step_idx < (chunk_horizon - 1) else 1.0)
|
| 821 |
+
elif macro_name in {"weak_corridor_insert", "bag_fabric_probe"}:
|
| 822 |
+
self._set_mode_bits(action, passive_mode)
|
| 823 |
+
action[0] = np.float32(0.02)
|
| 824 |
+
action[8] = np.float32(1.0)
|
| 825 |
+
else:
|
| 826 |
+
action = self.teacher_action()
|
| 827 |
+
chunk[step_idx] = np.clip(action, -1.0, 1.0)
|
| 828 |
+
return chunk
|
| 829 |
+
|
| 830 |
+
def baseline_action_chunk(self, baseline_name: str, chunk_horizon: int = 8) -> np.ndarray:
|
| 831 |
+
if baseline_name == "teacher":
|
| 832 |
+
chunk, _ = self.teacher_chunk_and_rollout(chunk_horizon=chunk_horizon, rollout_horizon=self.rollout_horizon)
|
| 833 |
+
return chunk
|
| 834 |
+
if baseline_name == "reveal_only":
|
| 835 |
+
return self.macro_action_chunk("widen_gap" if self.proxy_name == FOLIAGE_PROXY.name else ("widen_mouth" if self.proxy_name == BAG_PROXY.name else "lift_edge"), chunk_horizon=chunk_horizon)
|
| 836 |
+
if baseline_name == "retrieve_only":
|
| 837 |
+
return self.macro_action_chunk("premature_retrieve", chunk_horizon=chunk_horizon)
|
| 838 |
+
if baseline_name == "no_hold":
|
| 839 |
+
return self.macro_action_chunk("reveal_with_release", chunk_horizon=chunk_horizon)
|
| 840 |
+
if baseline_name == "random":
|
| 841 |
+
return self.rng.uniform(-1.0, 1.0, size=(chunk_horizon, 14)).astype(np.float32)
|
| 842 |
+
raise KeyError(f"Unknown baseline chunk: {baseline_name}")
|
| 843 |
+
|
| 844 |
+
def teacher_chunk_and_rollout(
|
| 845 |
+
self,
|
| 846 |
+
chunk_horizon: int = 8,
|
| 847 |
+
rollout_horizon: int | None = None,
|
| 848 |
+
) -> tuple[np.ndarray, dict[str, np.ndarray]]:
|
| 849 |
+
rollout_horizon = rollout_horizon or self.rollout_horizon
|
| 850 |
+
snapshot = self.clone_state()
|
| 851 |
+
action_chunk: list[np.ndarray] = []
|
| 852 |
+
rollout_support_mode = []
|
| 853 |
+
rollout_corridor = []
|
| 854 |
+
rollout_persistence = []
|
| 855 |
+
rollout_disturbance = []
|
| 856 |
+
rollout_belief = []
|
| 857 |
+
rollout_visibility = []
|
| 858 |
+
rollout_clearance = []
|
| 859 |
+
rollout_support_stability = []
|
| 860 |
+
rollout_reocclusion = []
|
| 861 |
+
rollout_occluder_contact = []
|
| 862 |
+
rollout_grasp_affordance = []
|
| 863 |
+
rollout_phase = []
|
| 864 |
+
for step in range(chunk_horizon):
|
| 865 |
+
action = self.teacher_action()
|
| 866 |
+
action_chunk.append(action)
|
| 867 |
+
_, _, terminated, truncated, privileged_state = self.step(action)
|
| 868 |
+
if step < rollout_horizon:
|
| 869 |
+
rollout_support_mode.append(privileged_state["support_mode"])
|
| 870 |
+
rollout_corridor.append(privileged_state["corridor_feasible"])
|
| 871 |
+
rollout_persistence.append(privileged_state["persistence_horizon"])
|
| 872 |
+
rollout_disturbance.append(privileged_state["disturbance_cost"])
|
| 873 |
+
rollout_belief.append(privileged_state["belief_map"])
|
| 874 |
+
rollout_visibility.append(privileged_state["visibility_map"])
|
| 875 |
+
rollout_clearance.append(privileged_state["clearance_map"])
|
| 876 |
+
rollout_support_stability.append(privileged_state["support_stability"])
|
| 877 |
+
rollout_reocclusion.append(privileged_state["reocclusion_target"])
|
| 878 |
+
rollout_occluder_contact.append(privileged_state["occluder_contact_map"])
|
| 879 |
+
rollout_grasp_affordance.append(privileged_state["grasp_affordance_map"])
|
| 880 |
+
rollout_phase.append(int(privileged_state["phase_label"]))
|
| 881 |
+
if terminated or truncated:
|
| 882 |
+
break
|
| 883 |
+
while len(action_chunk) < chunk_horizon:
|
| 884 |
+
action_chunk.append(np.zeros((14,), dtype=np.float32))
|
| 885 |
+
while len(rollout_support_mode) < rollout_horizon:
|
| 886 |
+
current = self.get_privileged_state()
|
| 887 |
+
rollout_support_mode.append(int(self._current_support_mode()))
|
| 888 |
+
rollout_corridor.append(current["corridor_feasible"])
|
| 889 |
+
rollout_persistence.append(current["persistence_horizon"])
|
| 890 |
+
rollout_disturbance.append(current["disturbance_cost"])
|
| 891 |
+
rollout_belief.append(current["belief_map"])
|
| 892 |
+
rollout_visibility.append(current["visibility_map"])
|
| 893 |
+
rollout_clearance.append(current["clearance_map"])
|
| 894 |
+
rollout_support_stability.append(current["support_stability"])
|
| 895 |
+
rollout_reocclusion.append(current["reocclusion_target"])
|
| 896 |
+
rollout_occluder_contact.append(current["occluder_contact_map"])
|
| 897 |
+
rollout_grasp_affordance.append(current["grasp_affordance_map"])
|
| 898 |
+
rollout_phase.append(int(current["phase_label"]))
|
| 899 |
+
self.restore_state(snapshot)
|
| 900 |
+
return np.stack(action_chunk, axis=0).astype(np.float32), {
|
| 901 |
+
"rollout_support_mode": np.asarray(rollout_support_mode, dtype=np.int64),
|
| 902 |
+
"rollout_corridor_feasible": np.asarray(rollout_corridor, dtype=np.float32),
|
| 903 |
+
"rollout_persistence_horizon": np.asarray(rollout_persistence, dtype=np.float32),
|
| 904 |
+
"rollout_disturbance_cost": np.asarray(rollout_disturbance, dtype=np.float32),
|
| 905 |
+
"rollout_belief_map": np.asarray(rollout_belief, dtype=np.float32),
|
| 906 |
+
"rollout_visibility_map": np.asarray(rollout_visibility, dtype=np.float32),
|
| 907 |
+
"rollout_clearance_map": np.asarray(rollout_clearance, dtype=np.float32),
|
| 908 |
+
"rollout_support_stability": np.asarray(rollout_support_stability, dtype=np.float32),
|
| 909 |
+
"rollout_reocclusion_target": np.asarray(rollout_reocclusion, dtype=np.float32),
|
| 910 |
+
"rollout_occluder_contact_map": np.asarray(rollout_occluder_contact, dtype=np.float32),
|
| 911 |
+
"rollout_grasp_affordance_map": np.asarray(rollout_grasp_affordance, dtype=np.float32),
|
| 912 |
+
"rollout_phase": np.asarray(rollout_phase, dtype=np.int64),
|
| 913 |
+
}
|
| 914 |
+
|
| 915 |
+
def evaluate_action_chunk(
|
| 916 |
+
self,
|
| 917 |
+
action_chunk: np.ndarray,
|
| 918 |
+
rollout_horizon: int | None = None,
|
| 919 |
+
) -> dict[str, np.ndarray | float]:
|
| 920 |
+
rollout_horizon = rollout_horizon or self.rollout_horizon
|
| 921 |
+
snapshot = self.clone_state()
|
| 922 |
+
rollout_support_mode: list[int] = []
|
| 923 |
+
rollout_corridor: list[np.ndarray] = []
|
| 924 |
+
rollout_persistence: list[np.ndarray] = []
|
| 925 |
+
rollout_disturbance: list[float] = []
|
| 926 |
+
rollout_belief: list[np.ndarray] = []
|
| 927 |
+
rollout_visibility: list[np.ndarray] = []
|
| 928 |
+
rollout_clearance: list[np.ndarray] = []
|
| 929 |
+
rollout_support_stability: list[float] = []
|
| 930 |
+
rollout_reocclusion: list[float] = []
|
| 931 |
+
rollout_occluder_contact: list[np.ndarray] = []
|
| 932 |
+
rollout_grasp_affordance: list[np.ndarray] = []
|
| 933 |
+
rollout_phase: list[int] = []
|
| 934 |
+
corridor_open_trace = [float(self.get_privileged_state()["corridor_feasible"][self._current_support_mode()].any())]
|
| 935 |
+
visibility_trace = [float(self.get_privileged_state()["visibility"])]
|
| 936 |
+
disturbance_trace = [float(self.get_privileged_state()["disturbance_cost"])]
|
| 937 |
+
support_trace = [float(self.get_privileged_state()["support_stability"])]
|
| 938 |
+
opening_trace = [float(self.opening)]
|
| 939 |
+
terminated = False
|
| 940 |
+
truncated = False
|
| 941 |
+
privileged_state = self.get_privileged_state()
|
| 942 |
+
for step, action in enumerate(np.asarray(action_chunk, dtype=np.float32)):
|
| 943 |
+
_, _, terminated, truncated, privileged_state = self.step(action)
|
| 944 |
+
if step < rollout_horizon:
|
| 945 |
+
rollout_support_mode.append(int(privileged_state["support_mode"]))
|
| 946 |
+
rollout_corridor.append(privileged_state["corridor_feasible"].astype(np.float32))
|
| 947 |
+
rollout_persistence.append(privileged_state["persistence_horizon"].astype(np.float32))
|
| 948 |
+
rollout_disturbance.append(float(privileged_state["disturbance_cost"]))
|
| 949 |
+
rollout_belief.append(privileged_state["belief_map"].astype(np.float32))
|
| 950 |
+
rollout_visibility.append(privileged_state["visibility_map"].astype(np.float32))
|
| 951 |
+
rollout_clearance.append(privileged_state["clearance_map"].astype(np.float32))
|
| 952 |
+
rollout_support_stability.append(float(privileged_state["support_stability"]))
|
| 953 |
+
rollout_reocclusion.append(float(privileged_state["reocclusion_target"]))
|
| 954 |
+
rollout_occluder_contact.append(privileged_state["occluder_contact_map"].astype(np.float32))
|
| 955 |
+
rollout_grasp_affordance.append(privileged_state["grasp_affordance_map"].astype(np.float32))
|
| 956 |
+
rollout_phase.append(int(privileged_state["phase_label"]))
|
| 957 |
+
corridor_open_trace.append(float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any()))
|
| 958 |
+
visibility_trace.append(float(privileged_state["visibility"]))
|
| 959 |
+
disturbance_trace.append(float(privileged_state["disturbance_cost"]))
|
| 960 |
+
support_trace.append(float(privileged_state["support_stability"]))
|
| 961 |
+
opening_trace.append(float(self.opening))
|
| 962 |
+
if terminated or truncated:
|
| 963 |
+
break
|
| 964 |
+
while len(rollout_support_mode) < rollout_horizon:
|
| 965 |
+
current = self.get_privileged_state()
|
| 966 |
+
rollout_support_mode.append(int(current["support_mode"]))
|
| 967 |
+
rollout_corridor.append(current["corridor_feasible"].astype(np.float32))
|
| 968 |
+
rollout_persistence.append(current["persistence_horizon"].astype(np.float32))
|
| 969 |
+
rollout_disturbance.append(float(current["disturbance_cost"]))
|
| 970 |
+
rollout_belief.append(current["belief_map"].astype(np.float32))
|
| 971 |
+
rollout_visibility.append(current["visibility_map"].astype(np.float32))
|
| 972 |
+
rollout_clearance.append(current["clearance_map"].astype(np.float32))
|
| 973 |
+
rollout_support_stability.append(float(current["support_stability"]))
|
| 974 |
+
rollout_reocclusion.append(float(current["reocclusion_target"]))
|
| 975 |
+
rollout_occluder_contact.append(current["occluder_contact_map"].astype(np.float32))
|
| 976 |
+
rollout_grasp_affordance.append(current["grasp_affordance_map"].astype(np.float32))
|
| 977 |
+
rollout_phase.append(int(current["phase_label"]))
|
| 978 |
+
final_state = self.get_privileged_state()
|
| 979 |
+
corridor_curve = np.asarray(corridor_open_trace, dtype=np.float32)
|
| 980 |
+
visibility_curve = np.asarray(visibility_trace, dtype=np.float32)
|
| 981 |
+
disturbance_curve = np.asarray(disturbance_trace, dtype=np.float32)
|
| 982 |
+
support_curve = np.asarray(support_trace, dtype=np.float32)
|
| 983 |
+
opening_curve = np.asarray(opening_trace, dtype=np.float32)
|
| 984 |
+
reocclusion = float(
|
| 985 |
+
np.logical_and(
|
| 986 |
+
corridor_curve[:-1] > 0.5,
|
| 987 |
+
corridor_curve[1:] <= 0.5,
|
| 988 |
+
).mean()
|
| 989 |
+
) if len(corridor_open_trace) > 1 else 0.0
|
| 990 |
+
result: dict[str, np.ndarray | float] = {
|
| 991 |
+
"rollout_support_mode": np.asarray(rollout_support_mode, dtype=np.int64),
|
| 992 |
+
"rollout_corridor_feasible": np.asarray(rollout_corridor, dtype=np.float32),
|
| 993 |
+
"rollout_persistence_horizon": np.asarray(rollout_persistence, dtype=np.float32),
|
| 994 |
+
"rollout_disturbance_cost": np.asarray(rollout_disturbance, dtype=np.float32),
|
| 995 |
+
"rollout_belief_map": np.asarray(rollout_belief, dtype=np.float32),
|
| 996 |
+
"rollout_visibility_map": np.asarray(rollout_visibility, dtype=np.float32),
|
| 997 |
+
"rollout_clearance_map": np.asarray(rollout_clearance, dtype=np.float32),
|
| 998 |
+
"rollout_support_stability": np.asarray(rollout_support_stability, dtype=np.float32),
|
| 999 |
+
"rollout_reocclusion_target": np.asarray(rollout_reocclusion, dtype=np.float32),
|
| 1000 |
+
"rollout_occluder_contact_map": np.asarray(rollout_occluder_contact, dtype=np.float32),
|
| 1001 |
+
"rollout_grasp_affordance_map": np.asarray(rollout_grasp_affordance, dtype=np.float32),
|
| 1002 |
+
"rollout_phase": np.asarray(rollout_phase, dtype=np.int64),
|
| 1003 |
+
"retrieval_success": float(final_state["retrieval_success"]),
|
| 1004 |
+
"final_disturbance_cost": float(final_state["disturbance_cost"]),
|
| 1005 |
+
"reocclusion_rate": reocclusion,
|
| 1006 |
+
"visibility_integral": float(np.sum(visibility_curve)),
|
| 1007 |
+
"actor_feasibility_auc": float(corridor_curve.mean()),
|
| 1008 |
+
"reveal_achieved": float(visibility_curve.max() >= self.dynamics.retrieve_visibility_threshold),
|
| 1009 |
+
"hold_persistence": float(corridor_curve.mean()),
|
| 1010 |
+
"support_stability_auc": float(support_curve.mean()),
|
| 1011 |
+
"disturbance_auc": float(disturbance_curve.mean()),
|
| 1012 |
+
"opening_peak": float(opening_curve.max()),
|
| 1013 |
+
}
|
| 1014 |
+
if self.proxy_name == FOLIAGE_PROXY.name:
|
| 1015 |
+
result["candidate_gap_width"] = float(final_state.get("gap_width", opening_curve.max()))
|
| 1016 |
+
result["candidate_damage_proxy"] = float(final_state.get("damage_proxy", final_state["disturbance_cost"]))
|
| 1017 |
+
elif self.proxy_name == BAG_PROXY.name:
|
| 1018 |
+
result["candidate_mouth_aperture"] = float(final_state.get("mouth_aperture", opening_curve.max()))
|
| 1019 |
+
result["candidate_hold_quality"] = float(final_state.get("hold_quality", support_curve.mean()))
|
| 1020 |
+
result["candidate_rim_slip_risk"] = float(final_state.get("rim_slip_risk", reocclusion))
|
| 1021 |
+
elif self.proxy_name == CLOTH_PROXY.name:
|
| 1022 |
+
result["candidate_fold_preservation"] = float(final_state.get("fold_preservation", 1.0 - final_state["disturbance_cost"]))
|
| 1023 |
+
result["candidate_layer_separation_quality"] = float(final_state.get("layer_separation_quality", opening_curve.max()))
|
| 1024 |
+
result["candidate_lift_too_much_risk"] = float(final_state.get("lift_too_much_risk", max(0.0, opening_curve.max() - self.dynamics.desired_opening)))
|
| 1025 |
+
result["candidate_top_layer_stability"] = float(final_state.get("top_layer_stability", support_curve.mean()))
|
| 1026 |
+
self.restore_state(snapshot)
|
| 1027 |
+
return result
|
| 1028 |
+
|
| 1029 |
+
def candidate_outcome_utility(self, outcome: dict[str, np.ndarray | float]) -> float:
|
| 1030 |
+
retrieval_success = float(outcome["retrieval_success"])
|
| 1031 |
+
disturbance = float(outcome["final_disturbance_cost"])
|
| 1032 |
+
reocclusion = float(outcome["reocclusion_rate"])
|
| 1033 |
+
utility = retrieval_success - disturbance - reocclusion
|
| 1034 |
+
if self.proxy_name == CLOTH_PROXY.name:
|
| 1035 |
+
# Cloth success tracks layer separation more than strict fold/disturbance minimization.
|
| 1036 |
+
# Keep a lift-risk penalty, but stop over-penalizing the slightly aggressive actions
|
| 1037 |
+
# that actually create an insertable corridor on the proxy.
|
| 1038 |
+
layer_separation = float(outcome.get("candidate_layer_separation_quality", outcome.get("opening_peak", 0.0)))
|
| 1039 |
+
fold_preservation = float(outcome.get("candidate_fold_preservation", max(0.0, 1.0 - disturbance)))
|
| 1040 |
+
lift_risk = float(outcome.get("candidate_lift_too_much_risk", 0.0))
|
| 1041 |
+
utility = (
|
| 1042 |
+
retrieval_success
|
| 1043 |
+
+ 0.80 * layer_separation
|
| 1044 |
+
+ 0.20 * fold_preservation
|
| 1045 |
+
- 0.20 * disturbance
|
| 1046 |
+
- 0.20 * reocclusion
|
| 1047 |
+
- 0.35 * lift_risk
|
| 1048 |
+
)
|
| 1049 |
+
return float(utility)
|
| 1050 |
+
|
| 1051 |
+
def sample_candidate_action_chunks(
|
| 1052 |
+
self,
|
| 1053 |
+
teacher_chunk: np.ndarray,
|
| 1054 |
+
num_candidates: int = 4,
|
| 1055 |
+
rollout_horizon: int | None = None,
|
| 1056 |
+
) -> tuple[np.ndarray, dict[str, np.ndarray]]:
|
| 1057 |
+
rollout_horizon = rollout_horizon or self.rollout_horizon
|
| 1058 |
+
teacher_chunk = np.asarray(teacher_chunk, dtype=np.float32)
|
| 1059 |
+
candidates = [teacher_chunk.astype(np.float32)]
|
| 1060 |
+
outcomes = [self.evaluate_action_chunk(teacher_chunk, rollout_horizon=rollout_horizon)]
|
| 1061 |
+
candidate_macro_ids = [0]
|
| 1062 |
+
candidate_is_hard_negative = [0.0]
|
| 1063 |
+
candidate_macro_names = ["teacher"]
|
| 1064 |
+
candidate_negative_families = ["teacher"]
|
| 1065 |
+
if self.proxy_name == FOLIAGE_PROXY.name:
|
| 1066 |
+
semantic_specs = [
|
| 1067 |
+
("pin_canopy", "positive"),
|
| 1068 |
+
("maintain_gap", "positive"),
|
| 1069 |
+
("premature_retrieve", "premature_retrieve"),
|
| 1070 |
+
("reveal_with_release", "reveal_with_release"),
|
| 1071 |
+
("wrong_side_reveal", "wrong_side_reveal"),
|
| 1072 |
+
("foliage_immediate_reocclusion", "immediate_reocclusion"),
|
| 1073 |
+
("over_disturbance", "over_disturbance"),
|
| 1074 |
+
("weak_corridor_insert", "weak_corridor_insert"),
|
| 1075 |
+
("insert_actor", "positive"),
|
| 1076 |
+
("retrieve", "positive"),
|
| 1077 |
+
]
|
| 1078 |
+
elif self.proxy_name == BAG_PROXY.name:
|
| 1079 |
+
semantic_specs = [
|
| 1080 |
+
("widen_mouth", "positive"),
|
| 1081 |
+
("maintain_mouth", "positive"),
|
| 1082 |
+
("premature_retrieve", "premature_retrieve"),
|
| 1083 |
+
("reveal_with_release", "reveal_with_release"),
|
| 1084 |
+
("wrong_edge_reveal", "wrong_side_reveal"),
|
| 1085 |
+
("pin_left_rim", "one_rim_slip"),
|
| 1086 |
+
("bag_fabric_probe", "fabric_probe"),
|
| 1087 |
+
("weak_corridor_insert", "weak_corridor_insert"),
|
| 1088 |
+
("insert_actor", "positive"),
|
| 1089 |
+
("retrieve", "positive"),
|
| 1090 |
+
]
|
| 1091 |
+
else:
|
| 1092 |
+
semantic_specs = [
|
| 1093 |
+
("lift_edge", "positive"),
|
| 1094 |
+
("stabilize_fold", "positive"),
|
| 1095 |
+
("premature_retrieve", "premature_retrieve"),
|
| 1096 |
+
("reveal_with_release", "reveal_with_release"),
|
| 1097 |
+
("cloth_lift_high", "lift_too_high"),
|
| 1098 |
+
("wrong_layer_reveal", "wrong_layer_reveal"),
|
| 1099 |
+
("delayed_actor_entry", "delayed_actor_entry"),
|
| 1100 |
+
("weak_corridor_insert", "weak_corridor_insert"),
|
| 1101 |
+
("insert_actor", "positive"),
|
| 1102 |
+
("retrieve", "positive"),
|
| 1103 |
+
]
|
| 1104 |
+
|
| 1105 |
+
for spec_idx, (macro_name, family_name) in enumerate(semantic_specs[: max(0, num_candidates - 1)], start=1):
|
| 1106 |
+
candidate = self.macro_action_chunk(macro_name, chunk_horizon=teacher_chunk.shape[0])
|
| 1107 |
+
candidates.append(candidate.astype(np.float32))
|
| 1108 |
+
outcomes.append(self.evaluate_action_chunk(candidate, rollout_horizon=rollout_horizon))
|
| 1109 |
+
candidate_macro_ids.append(spec_idx)
|
| 1110 |
+
candidate_macro_names.append(macro_name)
|
| 1111 |
+
candidate_negative_families.append(family_name)
|
| 1112 |
+
candidate_is_hard_negative.append(0.0 if family_name == "positive" else 1.0)
|
| 1113 |
+
|
| 1114 |
+
while len(candidates) < num_candidates:
|
| 1115 |
+
random_chunk = self.rng.uniform(-1.0, 1.0, size=teacher_chunk.shape).astype(np.float32)
|
| 1116 |
+
candidates.append(random_chunk)
|
| 1117 |
+
outcomes.append(self.evaluate_action_chunk(random_chunk, rollout_horizon=rollout_horizon))
|
| 1118 |
+
candidate_macro_ids.append(len(candidate_macro_ids))
|
| 1119 |
+
candidate_macro_names.append("random")
|
| 1120 |
+
candidate_negative_families.append("random")
|
| 1121 |
+
candidate_is_hard_negative.append(1.0)
|
| 1122 |
+
stacked_outcomes = {
|
| 1123 |
+
"candidate_rollout_support_mode": np.stack([item["rollout_support_mode"] for item in outcomes], axis=0).astype(np.int64),
|
| 1124 |
+
"candidate_rollout_phase": np.stack([item["rollout_phase"] for item in outcomes], axis=0).astype(np.int64),
|
| 1125 |
+
"candidate_rollout_corridor_feasible": np.stack(
|
| 1126 |
+
[item["rollout_corridor_feasible"] for item in outcomes], axis=0
|
| 1127 |
+
).astype(np.float32),
|
| 1128 |
+
"candidate_rollout_persistence_horizon": np.stack(
|
| 1129 |
+
[item["rollout_persistence_horizon"] for item in outcomes], axis=0
|
| 1130 |
+
).astype(np.float32),
|
| 1131 |
+
"candidate_rollout_disturbance_cost": np.stack(
|
| 1132 |
+
[item["rollout_disturbance_cost"] for item in outcomes], axis=0
|
| 1133 |
+
).astype(np.float32),
|
| 1134 |
+
"candidate_rollout_belief_map": np.stack(
|
| 1135 |
+
[item["rollout_belief_map"] for item in outcomes], axis=0
|
| 1136 |
+
).astype(np.float32),
|
| 1137 |
+
"candidate_rollout_visibility_map": np.stack(
|
| 1138 |
+
[item["rollout_visibility_map"] for item in outcomes], axis=0
|
| 1139 |
+
).astype(np.float32),
|
| 1140 |
+
"candidate_rollout_clearance_map": np.stack(
|
| 1141 |
+
[item["rollout_clearance_map"] for item in outcomes], axis=0
|
| 1142 |
+
).astype(np.float32),
|
| 1143 |
+
"candidate_rollout_support_stability": np.stack(
|
| 1144 |
+
[item["rollout_support_stability"] for item in outcomes], axis=0
|
| 1145 |
+
).astype(np.float32),
|
| 1146 |
+
"candidate_rollout_reocclusion_target": np.stack(
|
| 1147 |
+
[item["rollout_reocclusion_target"] for item in outcomes], axis=0
|
| 1148 |
+
).astype(np.float32),
|
| 1149 |
+
"candidate_rollout_occluder_contact_map": np.stack(
|
| 1150 |
+
[item["rollout_occluder_contact_map"] for item in outcomes], axis=0
|
| 1151 |
+
).astype(np.float32),
|
| 1152 |
+
"candidate_rollout_grasp_affordance_map": np.stack(
|
| 1153 |
+
[item["rollout_grasp_affordance_map"] for item in outcomes], axis=0
|
| 1154 |
+
).astype(np.float32),
|
| 1155 |
+
"candidate_retrieval_success": np.asarray([item["retrieval_success"] for item in outcomes], dtype=np.float32),
|
| 1156 |
+
"candidate_final_disturbance_cost": np.asarray(
|
| 1157 |
+
[item["final_disturbance_cost"] for item in outcomes], dtype=np.float32
|
| 1158 |
+
),
|
| 1159 |
+
"candidate_reocclusion_rate": np.asarray([item["reocclusion_rate"] for item in outcomes], dtype=np.float32),
|
| 1160 |
+
"candidate_visibility_integral": np.asarray([item["visibility_integral"] for item in outcomes], dtype=np.float32),
|
| 1161 |
+
"candidate_actor_feasibility_auc": np.asarray([item["actor_feasibility_auc"] for item in outcomes], dtype=np.float32),
|
| 1162 |
+
"candidate_reveal_achieved": np.asarray([item["reveal_achieved"] for item in outcomes], dtype=np.float32),
|
| 1163 |
+
"candidate_hold_persistence": np.asarray([item["hold_persistence"] for item in outcomes], dtype=np.float32),
|
| 1164 |
+
"candidate_support_stability_auc": np.asarray([item["support_stability_auc"] for item in outcomes], dtype=np.float32),
|
| 1165 |
+
"candidate_disturbance_auc": np.asarray([item["disturbance_auc"] for item in outcomes], dtype=np.float32),
|
| 1166 |
+
"candidate_macro_ids": np.asarray(candidate_macro_ids, dtype=np.int64),
|
| 1167 |
+
"candidate_is_hard_negative": np.asarray(candidate_is_hard_negative, dtype=np.float32),
|
| 1168 |
+
}
|
| 1169 |
+
stacked_outcomes["candidate_risk"] = np.clip(
|
| 1170 |
+
stacked_outcomes["candidate_final_disturbance_cost"] + stacked_outcomes["candidate_reocclusion_rate"],
|
| 1171 |
+
0.0,
|
| 1172 |
+
1.0,
|
| 1173 |
+
).astype(np.float32)
|
| 1174 |
+
stacked_outcomes["candidate_utility"] = np.asarray(
|
| 1175 |
+
[self.candidate_outcome_utility(item) for item in outcomes],
|
| 1176 |
+
dtype=np.float32,
|
| 1177 |
+
)
|
| 1178 |
+
stacked_outcomes["candidate_macro_names"] = candidate_macro_names
|
| 1179 |
+
stacked_outcomes["candidate_negative_families"] = candidate_negative_families
|
| 1180 |
+
if self.proxy_name == FOLIAGE_PROXY.name:
|
| 1181 |
+
stacked_outcomes["candidate_gap_width"] = np.asarray([item["candidate_gap_width"] for item in outcomes], dtype=np.float32)
|
| 1182 |
+
stacked_outcomes["candidate_damage_proxy"] = np.asarray([item["candidate_damage_proxy"] for item in outcomes], dtype=np.float32)
|
| 1183 |
+
elif self.proxy_name == BAG_PROXY.name:
|
| 1184 |
+
stacked_outcomes["candidate_mouth_aperture"] = np.asarray([item["candidate_mouth_aperture"] for item in outcomes], dtype=np.float32)
|
| 1185 |
+
stacked_outcomes["candidate_hold_quality"] = np.asarray([item["candidate_hold_quality"] for item in outcomes], dtype=np.float32)
|
| 1186 |
+
stacked_outcomes["candidate_rim_slip_risk"] = np.asarray([item["candidate_rim_slip_risk"] for item in outcomes], dtype=np.float32)
|
| 1187 |
+
elif self.proxy_name == CLOTH_PROXY.name:
|
| 1188 |
+
stacked_outcomes["candidate_fold_preservation"] = np.asarray([item["candidate_fold_preservation"] for item in outcomes], dtype=np.float32)
|
| 1189 |
+
stacked_outcomes["candidate_layer_separation_quality"] = np.asarray([item["candidate_layer_separation_quality"] for item in outcomes], dtype=np.float32)
|
| 1190 |
+
stacked_outcomes["candidate_lift_too_much_risk"] = np.asarray([item["candidate_lift_too_much_risk"] for item in outcomes], dtype=np.float32)
|
| 1191 |
+
return np.stack(candidates, axis=0).astype(np.float32), stacked_outcomes
|
| 1192 |
+
|
| 1193 |
+
def step(self, action: np.ndarray) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
|
| 1194 |
+
action = np.asarray(action, dtype=np.float32)
|
| 1195 |
+
mode = self._mode_from_action(action)
|
| 1196 |
+
self.holding = mode == SupportMode.HOLD
|
| 1197 |
+
self.transferred = mode == SupportMode.TRANSFER
|
| 1198 |
+
open_cmd = float(np.clip(action[0], -1.0, 1.0))
|
| 1199 |
+
actor_reach = float((np.tanh(float(action[8])) + 1.0) * 0.5)
|
| 1200 |
+
retrieve_cmd = float((np.tanh(float(action[13])) + 1.0) * 0.5)
|
| 1201 |
+
self.last_actor_template = int(
|
| 1202 |
+
np.clip(
|
| 1203 |
+
round(((float(np.clip(action[7], -1.0, 1.0)) + 1.0) * 0.5) * (self.num_templates - 1)),
|
| 1204 |
+
0,
|
| 1205 |
+
self.num_templates - 1,
|
| 1206 |
+
)
|
| 1207 |
+
)
|
| 1208 |
+
|
| 1209 |
+
support_bonus = {SupportMode.HOLD: 0.08, SupportMode.TRANSFER: 0.04, SupportMode.PASSIVE: 0.0}[mode]
|
| 1210 |
+
closure = self.closure_scale * self._mode_decay(mode)
|
| 1211 |
+
self.opening = float(
|
| 1212 |
+
np.clip(
|
| 1213 |
+
self.opening + 0.16 * open_cmd + support_bonus - closure - 0.05 * self.disturbance,
|
| 1214 |
+
0.0,
|
| 1215 |
+
1.0,
|
| 1216 |
+
)
|
| 1217 |
+
)
|
| 1218 |
+
self.disturbance = float(
|
| 1219 |
+
np.clip(
|
| 1220 |
+
self.disturbance
|
| 1221 |
+
+ self.disturbance_gain_scale * self.dynamics.disturbance_gain * abs(open_cmd)
|
| 1222 |
+
+ 0.025 * actor_reach
|
| 1223 |
+
+ 0.05 * max(0.0, self.opening - self.dynamics.desired_opening)
|
| 1224 |
+
+ 0.03 * self.collateral_bias * actor_reach
|
| 1225 |
+
- self.dynamics.settle_rate,
|
| 1226 |
+
0.0,
|
| 1227 |
+
1.0,
|
| 1228 |
+
)
|
| 1229 |
+
)
|
| 1230 |
+
|
| 1231 |
+
self.step_count += 1
|
| 1232 |
+
privileged_state = self.get_privileged_state()
|
| 1233 |
+
corridor = privileged_state["corridor_feasible"][privileged_state["support_mode"]]
|
| 1234 |
+
if corridor[self.last_actor_template] > 0.5 and actor_reach >= 0.55:
|
| 1235 |
+
persistence_ratio = privileged_state["persistence_horizon"][privileged_state["support_mode"]] / float(
|
| 1236 |
+
max(1, self.rollout_horizon)
|
| 1237 |
+
)
|
| 1238 |
+
self.actor_progress = float(np.clip(self.actor_progress + 0.55 * persistence_ratio, 0.0, 1.0))
|
| 1239 |
+
shock = 0.16 * max(0.0, 0.8 - persistence_ratio)
|
| 1240 |
+
if shock > 0.0:
|
| 1241 |
+
self.opening = float(np.clip(self.opening - shock, 0.0, 1.0))
|
| 1242 |
+
privileged_state = self.get_privileged_state()
|
| 1243 |
+
corridor = privileged_state["corridor_feasible"][privileged_state["support_mode"]]
|
| 1244 |
+
else:
|
| 1245 |
+
self.actor_progress = float(np.clip(self.actor_progress - 0.20, 0.0, 1.0))
|
| 1246 |
+
success = bool(
|
| 1247 |
+
retrieve_cmd >= 0.55
|
| 1248 |
+
and self.actor_progress >= 0.80
|
| 1249 |
+
and corridor[self.last_actor_template] > 0.5
|
| 1250 |
+
and privileged_state["visibility"] >= self.dynamics.retrieve_visibility_threshold
|
| 1251 |
+
and self.disturbance < 0.9
|
| 1252 |
+
)
|
| 1253 |
+
if success:
|
| 1254 |
+
self.retrieved = True
|
| 1255 |
+
privileged_state["retrieval_success"] = True
|
| 1256 |
+
|
| 1257 |
+
self.visibility_trace.append(float(privileged_state["visibility"]))
|
| 1258 |
+
self.corridor_trace.append(float(corridor.any()))
|
| 1259 |
+
|
| 1260 |
+
reward = 1.0 if success else (0.08 * privileged_state["visibility"] - 0.03 * privileged_state["disturbance_cost"])
|
| 1261 |
+
terminated = bool(self.retrieved)
|
| 1262 |
+
truncated = bool(self.step_count >= self.max_steps)
|
| 1263 |
+
return self.get_observation(privileged_state), float(reward), terminated, truncated, privileged_state
|
| 1264 |
+
|
| 1265 |
+
|
| 1266 |
+
def render_views_from_state(
|
| 1267 |
+
proxy_name: str,
|
| 1268 |
+
render_state: dict[str, Any],
|
| 1269 |
+
resolution: int,
|
| 1270 |
+
num_templates: int = 32,
|
| 1271 |
+
include_depth: bool = False,
|
| 1272 |
+
) -> dict[str, np.ndarray]:
|
| 1273 |
+
dynamics = PROXY_DYNAMICS[proxy_name]
|
| 1274 |
+
opening = float(render_state["opening"])
|
| 1275 |
+
disturbance = float(render_state["disturbance"])
|
| 1276 |
+
target_center = float(render_state["target_center"])
|
| 1277 |
+
target_depth = float(render_state["target_depth"])
|
| 1278 |
+
target_radius = float(render_state["target_radius"])
|
| 1279 |
+
texture_phase = float(render_state["texture_phase"])
|
| 1280 |
+
texture_scale = float(render_state["texture_scale"])
|
| 1281 |
+
view_bias = float(render_state["view_bias"])
|
| 1282 |
+
target_intensity = float(render_state["target_intensity"])
|
| 1283 |
+
step_fraction = float(render_state["step_fraction"])
|
| 1284 |
+
|
| 1285 |
+
height = width = resolution
|
| 1286 |
+
base = np.ones((height, width, 3), dtype=np.float32)
|
| 1287 |
+
base *= np.asarray(dynamics.palette, dtype=np.float32)
|
| 1288 |
+
|
| 1289 |
+
x = np.linspace(0.0, 1.0, width, dtype=np.float32)
|
| 1290 |
+
y = np.linspace(0.0, 1.0, height, dtype=np.float32)
|
| 1291 |
+
yy, xx = np.meshgrid(y, x, indexing="ij")
|
| 1292 |
+
visibility = np.clip(
|
| 1293 |
+
1.25 * opening - 0.68 * disturbance - 0.24 * target_depth + dynamics.visibility_bias,
|
| 1294 |
+
0.0,
|
| 1295 |
+
1.0,
|
| 1296 |
+
)
|
| 1297 |
+
target_y = 0.74 - 0.22 * target_depth
|
| 1298 |
+
gap_width = np.clip(0.05 + 0.16 * opening - 0.08 * disturbance, 0.02, 0.24)
|
| 1299 |
+
front_center = np.clip(target_center + 0.03 * view_bias, 0.06, 0.94)
|
| 1300 |
+
left_center = np.clip(0.34 + 0.12 * (target_center - 0.5) - 0.05 * view_bias, 0.18, 0.52)
|
| 1301 |
+
right_center = np.clip(0.66 + 0.18 * (target_center - 0.5) + 0.06 * view_bias, 0.42, 0.88)
|
| 1302 |
+
surface_wave = 0.5 + 0.5 * np.sin((xx * (14.0 * texture_scale) + yy * 7.0) * np.pi + texture_phase)
|
| 1303 |
+
weave_wave = 0.5 + 0.5 * np.cos((xx * 6.0 - yy * (10.0 + 2.0 * texture_scale)) * np.pi - 0.6 * texture_phase)
|
| 1304 |
+
clutter = 0.65 * surface_wave + 0.35 * weave_wave
|
| 1305 |
+
disturbance_map = disturbance * (
|
| 1306 |
+
0.55 + 0.45 * np.sin((xx * 9.0 + yy * (12.0 + texture_scale)) * np.pi + 1.3 * texture_phase)
|
| 1307 |
+
)
|
| 1308 |
+
target_mask = ((xx - front_center) ** 2 + ((yy - target_y) / 1.2) ** 2) <= target_radius**2
|
| 1309 |
+
|
| 1310 |
+
front = base.copy()
|
| 1311 |
+
front *= (0.82 + 0.24 * clutter[..., None]).astype(np.float32)
|
| 1312 |
+
occluder_profile = np.abs(xx - front_center) / gap_width + 0.55 * np.abs(yy - (0.56 + 0.08 * view_bias))
|
| 1313 |
+
gap_mask = occluder_profile <= (1.15 + 0.35 * opening)
|
| 1314 |
+
front[gap_mask] = np.clip(front[gap_mask] + np.asarray([0.14, 0.16, 0.14], dtype=np.float32), 0.0, 1.0)
|
| 1315 |
+
target_rgb = np.asarray([0.78, 0.74, 0.58], dtype=np.float32) * target_intensity
|
| 1316 |
+
front[target_mask] = np.clip(
|
| 1317 |
+
front[target_mask] * (1.0 - 0.45 * visibility) + target_rgb * (0.25 + 0.75 * visibility),
|
| 1318 |
+
0.0,
|
| 1319 |
+
1.0,
|
| 1320 |
+
)
|
| 1321 |
+
front[..., 2] = np.clip(front[..., 2] + 0.12 * disturbance_map + 0.04 * step_fraction, 0.0, 1.0)
|
| 1322 |
+
|
| 1323 |
+
wrist_left = np.full((height, width, 3), 0.12, dtype=np.float32)
|
| 1324 |
+
wrist_left *= (0.8 + 0.18 * clutter[..., None]).astype(np.float32)
|
| 1325 |
+
left_slit_width = np.clip(0.04 + 0.18 * opening - 0.10 * disturbance, 0.015, 0.22)
|
| 1326 |
+
left_profile = ((xx - left_center) / left_slit_width) ** 2 + ((yy - 0.58) / (0.40 + 0.10 * opening)) ** 2
|
| 1327 |
+
left_open = left_profile <= 1.0
|
| 1328 |
+
wrist_left[left_open] = np.clip(wrist_left[left_open] + np.asarray([0.08, 0.22, 0.12], dtype=np.float32), 0.0, 1.0)
|
| 1329 |
+
wrist_left[..., 0] = np.clip(wrist_left[..., 0] + 0.18 * disturbance_map, 0.0, 1.0)
|
| 1330 |
+
wrist_left[target_mask] = np.clip(
|
| 1331 |
+
wrist_left[target_mask] * (1.0 - 0.35 * visibility) + target_rgb * (0.18 + 0.52 * visibility),
|
| 1332 |
+
0.0,
|
| 1333 |
+
1.0,
|
| 1334 |
+
)
|
| 1335 |
+
|
| 1336 |
+
wrist_right = np.full((height, width, 3), 0.08, dtype=np.float32)
|
| 1337 |
+
wrist_right *= (0.78 + 0.22 * clutter[..., None]).astype(np.float32)
|
| 1338 |
+
right_band = np.exp(-((xx - right_center) ** 2) / max(1e-4, (0.06 + gap_width) ** 2))
|
| 1339 |
+
right_clear = np.exp(-((yy - (0.52 - 0.12 * target_depth)) ** 2) / max(1e-4, (0.12 + 0.18 * opening) ** 2))
|
| 1340 |
+
wrist_right[..., 1] = np.clip(
|
| 1341 |
+
wrist_right[..., 1] + 0.28 * visibility * right_band * right_clear - 0.10 * disturbance_map,
|
| 1342 |
+
0.0,
|
| 1343 |
+
1.0,
|
| 1344 |
+
)
|
| 1345 |
+
wrist_right[target_mask] = np.clip(
|
| 1346 |
+
wrist_right[target_mask] * (1.0 - 0.40 * visibility) + target_rgb * (0.22 + 0.60 * visibility),
|
| 1347 |
+
0.0,
|
| 1348 |
+
1.0,
|
| 1349 |
+
)
|
| 1350 |
+
wrist_right[..., 2] = np.clip(wrist_right[..., 2] + 0.08 * step_fraction + 0.06 * right_band, 0.0, 1.0)
|
| 1351 |
+
wrist_right = np.clip(wrist_right, 0.0, 1.0)
|
| 1352 |
+
|
| 1353 |
+
outputs = {
|
| 1354 |
+
"front": (front * 255.0).astype(np.uint8),
|
| 1355 |
+
"wrist_left": (wrist_left * 255.0).astype(np.uint8),
|
| 1356 |
+
"wrist_right": (wrist_right * 255.0).astype(np.uint8),
|
| 1357 |
+
}
|
| 1358 |
+
if not include_depth:
|
| 1359 |
+
return outputs
|
| 1360 |
+
|
| 1361 |
+
front_depth = np.clip(0.25 + 0.40 * target_depth + 0.15 * disturbance + 0.10 * (1.0 - visibility), 0.0, 1.0)
|
| 1362 |
+
target_depth_map = np.clip(0.10 + 0.55 * target_depth, 0.0, 1.0)
|
| 1363 |
+
occluder_depth = np.clip(0.30 + 0.20 * disturbance + 0.10 * (1.0 - opening), 0.0, 1.0)
|
| 1364 |
+
front_depth_map = np.full((height, width), front_depth, dtype=np.float32)
|
| 1365 |
+
front_depth_map[gap_mask] = np.minimum(front_depth_map[gap_mask], occluder_depth)
|
| 1366 |
+
front_depth_map[target_mask] = np.minimum(front_depth_map[target_mask], target_depth_map)
|
| 1367 |
+
|
| 1368 |
+
wrist_left_depth = np.clip(0.35 + 0.25 * target_depth + 0.10 * disturbance, 0.0, 1.0)
|
| 1369 |
+
wrist_left_depth_map = np.full((height, width), wrist_left_depth, dtype=np.float32)
|
| 1370 |
+
wrist_left_depth_map[left_open] = np.minimum(wrist_left_depth_map[left_open], 0.22 + 0.25 * target_depth)
|
| 1371 |
+
wrist_left_depth_map[target_mask] = np.minimum(wrist_left_depth_map[target_mask], target_depth_map)
|
| 1372 |
+
|
| 1373 |
+
wrist_right_depth = np.clip(0.35 + 0.20 * target_depth + 0.12 * disturbance, 0.0, 1.0)
|
| 1374 |
+
wrist_right_depth_map = np.full((height, width), wrist_right_depth, dtype=np.float32)
|
| 1375 |
+
right_focus = (right_band * right_clear) > 0.15
|
| 1376 |
+
wrist_right_depth_map[right_focus] = np.minimum(wrist_right_depth_map[right_focus], 0.20 + 0.25 * target_depth)
|
| 1377 |
+
wrist_right_depth_map[target_mask] = np.minimum(wrist_right_depth_map[target_mask], target_depth_map)
|
| 1378 |
+
|
| 1379 |
+
outputs.update(
|
| 1380 |
+
{
|
| 1381 |
+
"front_depth": front_depth_map.astype(np.float32),
|
| 1382 |
+
"wrist_left_depth": wrist_left_depth_map.astype(np.float32),
|
| 1383 |
+
"wrist_right_depth": wrist_right_depth_map.astype(np.float32),
|
| 1384 |
+
"front_depth_valid": np.ones((height, width), dtype=np.float32),
|
| 1385 |
+
"wrist_left_depth_valid": np.ones((height, width), dtype=np.float32),
|
| 1386 |
+
"wrist_right_depth_valid": np.ones((height, width), dtype=np.float32),
|
| 1387 |
+
}
|
| 1388 |
+
)
|
| 1389 |
+
return outputs
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/teachers.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Any
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
@dataclass
|
| 10 |
+
class TeacherAction:
|
| 11 |
+
revealer_action: np.ndarray
|
| 12 |
+
actor_action: np.ndarray
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def foliage_teacher_step(privileged_state: dict[str, Any]) -> TeacherAction:
|
| 16 |
+
target_cluster = privileged_state.get("lowest_cost_strip_direction", [0.0, 0.0, 1.0])
|
| 17 |
+
actor_ready = bool(privileged_state.get("corridor_exists", False))
|
| 18 |
+
revealer = np.asarray(target_cluster, dtype=np.float32)
|
| 19 |
+
actor = np.asarray(privileged_state.get("retrieve_direction", [0.0, 0.0, 0.0]), dtype=np.float32)
|
| 20 |
+
if not actor_ready:
|
| 21 |
+
actor = np.zeros_like(actor)
|
| 22 |
+
return TeacherAction(revealer_action=revealer, actor_action=actor)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def bag_teacher_step(privileged_state: dict[str, Any]) -> TeacherAction:
|
| 26 |
+
contact_a = np.asarray(privileged_state.get("expand_contact_a", [1.0, 0.0, 0.0]), dtype=np.float32)
|
| 27 |
+
contact_b = np.asarray(privileged_state.get("expand_contact_b", [-1.0, 0.0, 0.0]), dtype=np.float32)
|
| 28 |
+
aperture_ready = float(privileged_state.get("aperture", 0.0)) >= float(privileged_state.get("aperture_threshold", 1.0))
|
| 29 |
+
actor = np.asarray(privileged_state.get("retrieve_direction", [0.0, 0.0, 0.0]), dtype=np.float32)
|
| 30 |
+
if not aperture_ready:
|
| 31 |
+
actor = np.zeros_like(actor)
|
| 32 |
+
return TeacherAction(revealer_action=np.concatenate([contact_a, contact_b]), actor_action=actor)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def cloth_teacher_step(privileged_state: dict[str, Any]) -> TeacherAction:
|
| 36 |
+
lift = np.asarray(privileged_state.get("minimal_lift_direction", [0.0, 0.0, 1.0]), dtype=np.float32)
|
| 37 |
+
actor_ready = bool(privileged_state.get("target_exposed", False))
|
| 38 |
+
actor = np.asarray(privileged_state.get("retrieve_direction", [0.0, 0.0, 0.0]), dtype=np.float32)
|
| 39 |
+
if not actor_ready:
|
| 40 |
+
actor = np.zeros_like(actor)
|
| 41 |
+
return TeacherAction(revealer_action=lift, actor_action=actor)
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/base.yaml
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
defaults:
|
| 2 |
+
- model: backbone_only
|
| 3 |
+
- data: rlbench_3cam
|
| 4 |
+
- _self_
|
| 5 |
+
|
| 6 |
+
trainer:
|
| 7 |
+
policy_type: backbone_only
|
| 8 |
+
use_bf16: true
|
| 9 |
+
grad_clip_norm: 1.0
|
| 10 |
+
freeze_backbone: true
|
| 11 |
+
gradient_checkpointing: true
|
| 12 |
+
|
| 13 |
+
optim:
|
| 14 |
+
lr: 1.0e-4
|
| 15 |
+
weight_decay: 1.0e-4
|
| 16 |
+
|
| 17 |
+
runtime:
|
| 18 |
+
batch_size: 8
|
| 19 |
+
num_workers: 4
|
| 20 |
+
seed: 0
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/model/backbone_only.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
policy:
|
| 2 |
+
backbone:
|
| 3 |
+
model_name: openai/clip-vit-base-patch32
|
| 4 |
+
hidden_dim: 512
|
| 5 |
+
max_text_tokens: 32
|
| 6 |
+
freeze_backbone: true
|
| 7 |
+
gradient_checkpointing: true
|
| 8 |
+
use_dummy_backbone: false
|
| 9 |
+
fusion:
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
num_cameras: 3
|
| 12 |
+
num_layers: 4
|
| 13 |
+
num_heads: 8
|
| 14 |
+
ff_dim: 2048
|
| 15 |
+
dropout: 0.1
|
| 16 |
+
proprio_dim: 32
|
| 17 |
+
proprio_tokens: 1
|
| 18 |
+
decoder:
|
| 19 |
+
hidden_dim: 512
|
| 20 |
+
num_heads: 8
|
| 21 |
+
num_layers: 4
|
| 22 |
+
ff_dim: 2048
|
| 23 |
+
dropout: 0.1
|
| 24 |
+
chunk_size: 8
|
| 25 |
+
action_dim: 14
|
| 26 |
+
num_candidates: 8
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_adapter_wrapped_clip_base_fast.yaml
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_adapter_wrapped_clip_base_fast_seed17
|
| 2 |
+
output_dir: /workspace/workspace/outputs/adapter_proxy
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 17
|
| 5 |
+
init_checkpoint: /workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt
|
| 6 |
+
init_strict: false
|
| 7 |
+
|
| 8 |
+
data:
|
| 9 |
+
proxies: [foliage_proxy, bag_proxy, cloth_proxy]
|
| 10 |
+
resolution: 224
|
| 11 |
+
dataset_version: reveal_proxy_v6_rgbd_elastic_state_phase_fast
|
| 12 |
+
train_episodes_per_proxy: 12
|
| 13 |
+
val_episodes_per_proxy: 4
|
| 14 |
+
train_dataset_path: /workspace/workspace/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3_phase_fast.pt
|
| 15 |
+
val_dataset_path: /workspace/workspace/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase_fast.pt
|
| 16 |
+
rebuild_dataset: false
|
| 17 |
+
chunk_horizon: 8
|
| 18 |
+
rollout_horizon: 5
|
| 19 |
+
history_steps: 6
|
| 20 |
+
planner_candidates: 8
|
| 21 |
+
seed: 17
|
| 22 |
+
|
| 23 |
+
optim:
|
| 24 |
+
epochs: 2
|
| 25 |
+
batch_size: 4
|
| 26 |
+
num_workers: 8
|
| 27 |
+
lr: 0.0001
|
| 28 |
+
weight_decay: 0.0001
|
| 29 |
+
|
| 30 |
+
trainer:
|
| 31 |
+
policy_type: adapter_wrapped
|
| 32 |
+
training_regime: adapter_train_frozen_trunk
|
| 33 |
+
eval_mode: adapter_active
|
| 34 |
+
adapter_mode: adapter_active
|
| 35 |
+
adapter_use_transition_model: false
|
| 36 |
+
adapter_use_task_conditioning: true
|
| 37 |
+
use_bf16: true
|
| 38 |
+
grad_clip_norm: 1.0
|
| 39 |
+
freeze_backbone: true
|
| 40 |
+
gradient_checkpointing: false
|
| 41 |
+
plan_during_train: false
|
| 42 |
+
plan_during_eval: false
|
| 43 |
+
support_mode_conditioning: true
|
| 44 |
+
planner_mode: off
|
| 45 |
+
use_depth: true
|
| 46 |
+
use_world_model: false
|
| 47 |
+
use_role_tokens: true
|
| 48 |
+
compute_equivariance_probe: false
|
| 49 |
+
trainable_parameter_prefixes:
|
| 50 |
+
- adapter.state_head
|
| 51 |
+
- adapter.proposal_prior
|
| 52 |
+
- adapter.planner
|
| 53 |
+
|
| 54 |
+
policy:
|
| 55 |
+
backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 512, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: false}
|
| 56 |
+
fusion: {hidden_dim: 512, num_cameras: 3, num_layers: 4, num_heads: 8, ff_dim: 2048, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
|
| 57 |
+
memory: {hidden_dim: 512, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 8, max_history_steps: 8, reveal_cache_steps: 4, reveal_cache_decay: 0.7}
|
| 58 |
+
decoder: {hidden_dim: 512, num_heads: 8, num_layers: 4, ff_dim: 2048, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 7, planner_top_k: 4, proposal_delta_scale: 0.2, proposal_slot_scale: 0.05}
|
| 59 |
+
reveal_head: {hidden_dim: 512, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 8, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, num_tasks: 4}
|
| 60 |
+
world_model: {hidden_dim: 512, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 8, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2, rollout_mode: compact_rollout, num_tasks: 4, lightweight_field_size: 4}
|
| 61 |
+
planner: {hidden_dim: 512, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 8, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4, adapter_confidence_threshold: 0.45}
|
| 62 |
+
|
| 63 |
+
loss_weights:
|
| 64 |
+
action: 1.0
|
| 65 |
+
phase: 0.08
|
| 66 |
+
arm_role: 0.08
|
| 67 |
+
support_mode: 0.08
|
| 68 |
+
corridor: 0.12
|
| 69 |
+
persistence: 0.06
|
| 70 |
+
disturbance: 0.06
|
| 71 |
+
world_model: 0.0
|
| 72 |
+
transition: 0.0
|
| 73 |
+
belief: 0.05
|
| 74 |
+
visibility: 0.05
|
| 75 |
+
clearance: 0.06
|
| 76 |
+
support_stability: 0.06
|
| 77 |
+
reocclusion: 0.06
|
| 78 |
+
occluder_contact: 0.05
|
| 79 |
+
grasp_affordance: 0.05
|
| 80 |
+
planner_success: 0.15
|
| 81 |
+
planner_risk: 0.08
|
| 82 |
+
planner_ranking: 0.15
|
| 83 |
+
proposal_reconstruction: 0.08
|
| 84 |
+
proposal_success: 0.1
|
| 85 |
+
proposal_ranking: 0.12
|
| 86 |
+
proposal_mode: 0.08
|
| 87 |
+
proposal_diversity: 0.05
|
| 88 |
+
role_swap_consistency: 0.0
|
| 89 |
+
task_metrics: 0.06
|
| 90 |
+
gate: 0.05
|
| 91 |
+
distillation: 0.05
|
| 92 |
+
calibration: 0.02
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_adapter_wrapped_clip_rank_only_rebuild128.yaml
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_adapter_wrapped_clip_rank_only_rebuild128_seed17
|
| 2 |
+
output_dir: /workspace/workspace/outputs/adapter_proxy
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 17
|
| 5 |
+
init_checkpoint: /workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt
|
| 6 |
+
init_strict: false
|
| 7 |
+
|
| 8 |
+
data:
|
| 9 |
+
proxies: [foliage_proxy, bag_proxy, cloth_proxy]
|
| 10 |
+
resolution: 224
|
| 11 |
+
dataset_version: reveal_proxy_v6_rgbd_elastic_state_phase
|
| 12 |
+
train_episodes_per_proxy: 128
|
| 13 |
+
val_episodes_per_proxy: 32
|
| 14 |
+
train_dataset_path: /workspace/workspace/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3_phase_rebuild128_seed17.pt
|
| 15 |
+
val_dataset_path: /workspace/workspace/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase_rebuild128_seed17.pt
|
| 16 |
+
rebuild_dataset: true
|
| 17 |
+
chunk_horizon: 8
|
| 18 |
+
rollout_horizon: 5
|
| 19 |
+
history_steps: 6
|
| 20 |
+
planner_candidates: 8
|
| 21 |
+
seed: 17
|
| 22 |
+
|
| 23 |
+
optim:
|
| 24 |
+
epochs: 4
|
| 25 |
+
batch_size: 8
|
| 26 |
+
num_workers: 32
|
| 27 |
+
lr: 0.00005
|
| 28 |
+
weight_decay: 0.0001
|
| 29 |
+
|
| 30 |
+
trainer:
|
| 31 |
+
policy_type: adapter_wrapped
|
| 32 |
+
training_regime: proxy_rank_only
|
| 33 |
+
eval_mode: adapter_active
|
| 34 |
+
adapter_mode: adapter_active
|
| 35 |
+
adapter_use_transition_model: false
|
| 36 |
+
adapter_use_task_conditioning: true
|
| 37 |
+
use_bf16: true
|
| 38 |
+
grad_clip_norm: 1.0
|
| 39 |
+
freeze_backbone: true
|
| 40 |
+
gradient_checkpointing: false
|
| 41 |
+
plan_during_train: false
|
| 42 |
+
plan_during_eval: false
|
| 43 |
+
support_mode_conditioning: true
|
| 44 |
+
planner_mode: off
|
| 45 |
+
use_depth: true
|
| 46 |
+
use_world_model: false
|
| 47 |
+
use_role_tokens: true
|
| 48 |
+
compute_equivariance_probe: false
|
| 49 |
+
trainable_parameter_prefixes:
|
| 50 |
+
- adapter.proposal_prior
|
| 51 |
+
- adapter.planner
|
| 52 |
+
|
| 53 |
+
policy:
|
| 54 |
+
backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 512, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: false}
|
| 55 |
+
fusion: {hidden_dim: 512, num_cameras: 3, num_layers: 4, num_heads: 8, ff_dim: 2048, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
|
| 56 |
+
memory: {hidden_dim: 512, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 8, max_history_steps: 8}
|
| 57 |
+
decoder: {hidden_dim: 512, num_heads: 8, num_layers: 4, ff_dim: 2048, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 7, planner_top_k: 4, proposal_delta_scale: 0.2, proposal_slot_scale: 0.05}
|
| 58 |
+
reveal_head: {hidden_dim: 512, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 8, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, num_tasks: 4}
|
| 59 |
+
world_model: {hidden_dim: 512, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 8, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2, rollout_mode: compact_rollout, num_tasks: 4, lightweight_field_size: 4}
|
| 60 |
+
planner: {hidden_dim: 512, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 8, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4, adapter_confidence_threshold: 0.55}
|
| 61 |
+
|
| 62 |
+
loss_weights:
|
| 63 |
+
action: 0.5
|
| 64 |
+
phase: 0.0
|
| 65 |
+
arm_role: 0.0
|
| 66 |
+
support_mode: 0.0
|
| 67 |
+
corridor: 0.0
|
| 68 |
+
persistence: 0.0
|
| 69 |
+
disturbance: 0.0
|
| 70 |
+
world_model: 0.0
|
| 71 |
+
transition: 0.0
|
| 72 |
+
belief: 0.0
|
| 73 |
+
visibility: 0.0
|
| 74 |
+
clearance: 0.0
|
| 75 |
+
support_stability: 0.0
|
| 76 |
+
reocclusion: 0.0
|
| 77 |
+
occluder_contact: 0.0
|
| 78 |
+
grasp_affordance: 0.0
|
| 79 |
+
planner_success: 0.0
|
| 80 |
+
planner_risk: 0.0
|
| 81 |
+
planner_ranking: 0.2
|
| 82 |
+
proposal_reconstruction: 0.0
|
| 83 |
+
proposal_success: 0.1
|
| 84 |
+
proposal_ranking: 0.2
|
| 85 |
+
proposal_mode: 0.1
|
| 86 |
+
proposal_diversity: 0.02
|
| 87 |
+
role_swap_consistency: 0.0
|
| 88 |
+
task_metrics: 0.0
|
| 89 |
+
gate: 0.0
|
| 90 |
+
distillation: 0.05
|
| 91 |
+
calibration: 0.0
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_backbone_only_clip.yaml
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_backbone_only_clip
|
| 2 |
+
output_dir: /workspace/outputs/reveal_runs
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 7
|
| 5 |
+
|
| 6 |
+
data:
|
| 7 |
+
proxies: [foliage_proxy, bag_proxy, cloth_proxy]
|
| 8 |
+
resolution: 224
|
| 9 |
+
train_episodes_per_proxy: 48
|
| 10 |
+
val_episodes_per_proxy: 16
|
| 11 |
+
train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt
|
| 12 |
+
val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt
|
| 13 |
+
rebuild_dataset: true
|
| 14 |
+
chunk_horizon: 8
|
| 15 |
+
rollout_horizon: 5
|
| 16 |
+
history_steps: 2
|
| 17 |
+
planner_candidates: 4
|
| 18 |
+
seed: 7
|
| 19 |
+
|
| 20 |
+
optim:
|
| 21 |
+
epochs: 4
|
| 22 |
+
batch_size: 2
|
| 23 |
+
num_workers: 0
|
| 24 |
+
lr: 0.0003
|
| 25 |
+
weight_decay: 0.0001
|
| 26 |
+
|
| 27 |
+
trainer:
|
| 28 |
+
policy_type: backbone_only
|
| 29 |
+
use_bf16: true
|
| 30 |
+
grad_clip_norm: 1.0
|
| 31 |
+
freeze_backbone: true
|
| 32 |
+
gradient_checkpointing: false
|
| 33 |
+
plan_during_train: false
|
| 34 |
+
plan_during_eval: false
|
| 35 |
+
support_mode_conditioning: true
|
| 36 |
+
|
| 37 |
+
policy:
|
| 38 |
+
backbone:
|
| 39 |
+
model_name: openai/clip-vit-base-patch32
|
| 40 |
+
hidden_dim: 512
|
| 41 |
+
max_text_tokens: 32
|
| 42 |
+
freeze_backbone: true
|
| 43 |
+
gradient_checkpointing: false
|
| 44 |
+
use_dummy_backbone: false
|
| 45 |
+
fusion:
|
| 46 |
+
hidden_dim: 512
|
| 47 |
+
num_cameras: 3
|
| 48 |
+
num_layers: 4
|
| 49 |
+
num_heads: 8
|
| 50 |
+
ff_dim: 2048
|
| 51 |
+
dropout: 0.1
|
| 52 |
+
proprio_dim: 32
|
| 53 |
+
proprio_tokens: 1
|
| 54 |
+
memory:
|
| 55 |
+
hidden_dim: 512
|
| 56 |
+
history_steps: 2
|
| 57 |
+
num_layers: 1
|
| 58 |
+
dropout: 0.1
|
| 59 |
+
decoder:
|
| 60 |
+
hidden_dim: 512
|
| 61 |
+
num_heads: 8
|
| 62 |
+
num_layers: 4
|
| 63 |
+
ff_dim: 2048
|
| 64 |
+
dropout: 0.1
|
| 65 |
+
chunk_size: 8
|
| 66 |
+
action_dim: 14
|
| 67 |
+
num_candidates: 8
|
| 68 |
+
reveal_head:
|
| 69 |
+
hidden_dim: 512
|
| 70 |
+
num_support_modes: 3
|
| 71 |
+
num_approach_templates: 32
|
| 72 |
+
rollout_horizon: 5
|
| 73 |
+
belief_map_size: 32
|
| 74 |
+
predict_belief_map: true
|
| 75 |
+
world_model:
|
| 76 |
+
hidden_dim: 512
|
| 77 |
+
action_dim: 14
|
| 78 |
+
num_support_modes: 3
|
| 79 |
+
num_approach_templates: 32
|
| 80 |
+
rollout_horizon: 5
|
| 81 |
+
planner:
|
| 82 |
+
hidden_dim: 512
|
| 83 |
+
num_candidates: 8
|
| 84 |
+
action_dim: 14
|
| 85 |
+
utility_margin: 0.1
|
| 86 |
+
|
| 87 |
+
loss_weights:
|
| 88 |
+
action: 1.0
|
| 89 |
+
support_mode: 0.1
|
| 90 |
+
corridor: 0.1
|
| 91 |
+
persistence: 0.05
|
| 92 |
+
disturbance: 0.05
|
| 93 |
+
world_model: 0.1
|
| 94 |
+
belief: 0.05
|
| 95 |
+
planner_success: 0.0
|
| 96 |
+
planner_risk: 0.0
|
| 97 |
+
planner_ranking: 0.0
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_backbone_only_smoke.yaml
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_backbone_only_smoke
|
| 2 |
+
output_dir: /workspace/outputs/smoke
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 11
|
| 5 |
+
|
| 6 |
+
data:
|
| 7 |
+
proxies: [foliage_proxy, bag_proxy, cloth_proxy]
|
| 8 |
+
resolution: 64
|
| 9 |
+
train_episodes_per_proxy: 6
|
| 10 |
+
val_episodes_per_proxy: 2
|
| 11 |
+
train_dataset_path: /workspace/data/reveal_proxy/proxy_train_smoke_v4.pt
|
| 12 |
+
val_dataset_path: /workspace/data/reveal_proxy/proxy_val_smoke_v4.pt
|
| 13 |
+
rebuild_dataset: true
|
| 14 |
+
chunk_horizon: 4
|
| 15 |
+
rollout_horizon: 3
|
| 16 |
+
history_steps: 2
|
| 17 |
+
planner_candidates: 4
|
| 18 |
+
seed: 11
|
| 19 |
+
|
| 20 |
+
optim:
|
| 21 |
+
epochs: 2
|
| 22 |
+
batch_size: 8
|
| 23 |
+
num_workers: 0
|
| 24 |
+
lr: 0.001
|
| 25 |
+
weight_decay: 0.0001
|
| 26 |
+
|
| 27 |
+
trainer:
|
| 28 |
+
policy_type: backbone_only
|
| 29 |
+
use_bf16: true
|
| 30 |
+
grad_clip_norm: 1.0
|
| 31 |
+
freeze_backbone: true
|
| 32 |
+
gradient_checkpointing: false
|
| 33 |
+
plan_during_train: false
|
| 34 |
+
plan_during_eval: false
|
| 35 |
+
support_mode_conditioning: true
|
| 36 |
+
|
| 37 |
+
policy:
|
| 38 |
+
backbone:
|
| 39 |
+
model_name: openai/clip-vit-base-patch32
|
| 40 |
+
hidden_dim: 64
|
| 41 |
+
max_text_tokens: 32
|
| 42 |
+
freeze_backbone: true
|
| 43 |
+
gradient_checkpointing: false
|
| 44 |
+
use_dummy_backbone: true
|
| 45 |
+
fusion:
|
| 46 |
+
hidden_dim: 64
|
| 47 |
+
num_cameras: 3
|
| 48 |
+
num_layers: 2
|
| 49 |
+
num_heads: 4
|
| 50 |
+
ff_dim: 128
|
| 51 |
+
dropout: 0.1
|
| 52 |
+
proprio_dim: 32
|
| 53 |
+
proprio_tokens: 1
|
| 54 |
+
memory:
|
| 55 |
+
hidden_dim: 64
|
| 56 |
+
history_steps: 2
|
| 57 |
+
num_layers: 1
|
| 58 |
+
dropout: 0.1
|
| 59 |
+
decoder:
|
| 60 |
+
hidden_dim: 64
|
| 61 |
+
num_heads: 4
|
| 62 |
+
num_layers: 2
|
| 63 |
+
ff_dim: 128
|
| 64 |
+
dropout: 0.1
|
| 65 |
+
chunk_size: 4
|
| 66 |
+
action_dim: 14
|
| 67 |
+
arm_action_dim: 7
|
| 68 |
+
num_candidates: 4
|
| 69 |
+
reveal_head:
|
| 70 |
+
hidden_dim: 64
|
| 71 |
+
num_support_modes: 3
|
| 72 |
+
num_approach_templates: 32
|
| 73 |
+
rollout_horizon: 3
|
| 74 |
+
belief_map_size: 32
|
| 75 |
+
field_size: 16
|
| 76 |
+
num_heads: 4
|
| 77 |
+
predict_belief_map: true
|
| 78 |
+
world_model:
|
| 79 |
+
hidden_dim: 64
|
| 80 |
+
action_dim: 14
|
| 81 |
+
num_support_modes: 3
|
| 82 |
+
num_approach_templates: 32
|
| 83 |
+
rollout_horizon: 3
|
| 84 |
+
planner:
|
| 85 |
+
hidden_dim: 64
|
| 86 |
+
num_candidates: 4
|
| 87 |
+
action_dim: 14
|
| 88 |
+
utility_margin: 0.1
|
| 89 |
+
|
| 90 |
+
loss_weights:
|
| 91 |
+
action: 1.0
|
| 92 |
+
support_mode: 0.0
|
| 93 |
+
corridor: 0.0
|
| 94 |
+
persistence: 0.0
|
| 95 |
+
disturbance: 0.0
|
| 96 |
+
world_model: 0.0
|
| 97 |
+
belief: 0.0
|
| 98 |
+
planner_success: 0.0
|
| 99 |
+
planner_risk: 0.0
|
| 100 |
+
planner_ranking: 0.0
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_nodepth.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_ablation_nodepth
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 17
|
| 5 |
+
data: {proxies: [foliage_proxy, bag_proxy, cloth_proxy], resolution: 224, dataset_version: reveal_proxy_v6_rgbd_elastic_state, train_episodes_per_proxy: 48, val_episodes_per_proxy: 16, train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3.pt, val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3.pt, rebuild_dataset: false, chunk_horizon: 8, rollout_horizon: 5, history_steps: 6, planner_candidates: 8, seed: 17}
|
| 6 |
+
optim: {epochs: 4, batch_size: 2, num_workers: 0, lr: 0.0003, weight_decay: 0.0001}
|
| 7 |
+
trainer: {policy_type: elastic_reveal, use_bf16: true, grad_clip_norm: 1.0, freeze_backbone: true, gradient_checkpointing: false, plan_during_train: true, plan_during_eval: true, support_mode_conditioning: true, planner_mode: trainable, use_depth: false, use_world_model: true, use_role_tokens: true, compute_equivariance_probe: true}
|
| 8 |
+
policy:
|
| 9 |
+
backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 512, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: false}
|
| 10 |
+
fusion: {hidden_dim: 512, num_cameras: 3, num_layers: 4, num_heads: 8, ff_dim: 2048, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
|
| 11 |
+
memory: {hidden_dim: 512, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 8, max_history_steps: 8}
|
| 12 |
+
decoder: {hidden_dim: 512, num_heads: 8, num_layers: 4, ff_dim: 2048, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
|
| 13 |
+
reveal_head: {hidden_dim: 512, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 8, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
|
| 14 |
+
world_model: {hidden_dim: 512, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 8, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
|
| 15 |
+
planner: {hidden_dim: 512, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 8, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
|
| 16 |
+
loss_weights: {action: 1.0, phase: 0.1, arm_role: 0.15, support_mode: 0.1, corridor: 0.15, persistence: 0.05, disturbance: 0.05, world_model: 0.25, belief: 0.05, visibility: 0.05, clearance: 0.05, support_stability: 0.05, reocclusion: 0.05, occluder_contact: 0.05, grasp_affordance: 0.05, planner_success: 0.25, planner_risk: 0.1, planner_ranking: 0.2, proposal_reconstruction: 0.1, proposal_success: 0.15, proposal_ranking: 0.2, proposal_diversity: 0.05, role_swap_consistency: 0.05}
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_noplanner.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_ablation_noplanner
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 17
|
| 5 |
+
data: {proxies: [foliage_proxy, bag_proxy, cloth_proxy], resolution: 96, dataset_version: reveal_proxy_v6_rgbd_elastic_state, train_episodes_per_proxy: 48, val_episodes_per_proxy: 16, train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage2_dummy.pt, val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage2_dummy.pt, rebuild_dataset: false, chunk_horizon: 8, rollout_horizon: 5, history_steps: 6, planner_candidates: 8, seed: 17}
|
| 6 |
+
optim: {epochs: 10, batch_size: 16, num_workers: 0, lr: 0.001, weight_decay: 0.0001}
|
| 7 |
+
trainer: {policy_type: elastic_reveal, use_bf16: false, grad_clip_norm: 1.0, freeze_backbone: true, gradient_checkpointing: false, plan_during_train: false, plan_during_eval: false, support_mode_conditioning: true, planner_mode: off, use_depth: false, use_world_model: true, use_role_tokens: true, compute_equivariance_probe: true}
|
| 8 |
+
policy:
|
| 9 |
+
backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 192, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: true}
|
| 10 |
+
fusion: {hidden_dim: 192, num_cameras: 3, num_layers: 2, num_heads: 4, ff_dim: 384, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
|
| 11 |
+
memory: {hidden_dim: 192, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 4, max_history_steps: 8}
|
| 12 |
+
decoder: {hidden_dim: 192, num_heads: 4, num_layers: 2, ff_dim: 384, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
|
| 13 |
+
reveal_head: {hidden_dim: 192, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 4, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
|
| 14 |
+
world_model: {hidden_dim: 192, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 4, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
|
| 15 |
+
planner: {hidden_dim: 192, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 4, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
|
| 16 |
+
loss_weights: {action: 1.0}
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_norolesym.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_ablation_norolesym
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 17
|
| 5 |
+
data: {proxies: [foliage_proxy, bag_proxy, cloth_proxy], resolution: 96, dataset_version: reveal_proxy_v6_rgbd_elastic_state, train_episodes_per_proxy: 48, val_episodes_per_proxy: 16, train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage2_dummy.pt, val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage2_dummy.pt, rebuild_dataset: false, chunk_horizon: 8, rollout_horizon: 5, history_steps: 6, planner_candidates: 8, seed: 17}
|
| 6 |
+
optim: {epochs: 10, batch_size: 16, num_workers: 0, lr: 0.001, weight_decay: 0.0001}
|
| 7 |
+
trainer: {policy_type: elastic_reveal, use_bf16: false, grad_clip_norm: 1.0, freeze_backbone: true, gradient_checkpointing: false, plan_during_train: true, plan_during_eval: true, support_mode_conditioning: true, planner_mode: trainable, use_depth: false, use_world_model: true, use_role_tokens: false, compute_equivariance_probe: false}
|
| 8 |
+
policy:
|
| 9 |
+
backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 192, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: true}
|
| 10 |
+
fusion: {hidden_dim: 192, num_cameras: 3, num_layers: 2, num_heads: 4, ff_dim: 384, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
|
| 11 |
+
memory: {hidden_dim: 192, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 4, max_history_steps: 8}
|
| 12 |
+
decoder: {hidden_dim: 192, num_heads: 4, num_layers: 2, ff_dim: 384, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
|
| 13 |
+
reveal_head: {hidden_dim: 192, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 4, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
|
| 14 |
+
world_model: {hidden_dim: 192, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 4, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
|
| 15 |
+
planner: {hidden_dim: 192, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 4, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
|
| 16 |
+
loss_weights: {action: 1.0}
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_nowm.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_ablation_nowm
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 17
|
| 5 |
+
data: {proxies: [foliage_proxy, bag_proxy, cloth_proxy], resolution: 96, dataset_version: reveal_proxy_v6_rgbd_elastic_state, train_episodes_per_proxy: 48, val_episodes_per_proxy: 16, train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage2_dummy.pt, val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage2_dummy.pt, rebuild_dataset: false, chunk_horizon: 8, rollout_horizon: 5, history_steps: 6, planner_candidates: 8, seed: 17}
|
| 6 |
+
optim: {epochs: 10, batch_size: 16, num_workers: 0, lr: 0.001, weight_decay: 0.0001}
|
| 7 |
+
trainer: {policy_type: elastic_reveal, use_bf16: false, grad_clip_norm: 1.0, freeze_backbone: true, gradient_checkpointing: false, plan_during_train: true, plan_during_eval: true, support_mode_conditioning: true, planner_mode: trainable, use_depth: false, use_world_model: false, use_role_tokens: true, compute_equivariance_probe: true}
|
| 8 |
+
policy:
|
| 9 |
+
backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 192, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: true}
|
| 10 |
+
fusion: {hidden_dim: 192, num_cameras: 3, num_layers: 2, num_heads: 4, ff_dim: 384, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
|
| 11 |
+
memory: {hidden_dim: 192, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 4, max_history_steps: 8}
|
| 12 |
+
decoder: {hidden_dim: 192, num_heads: 4, num_layers: 2, ff_dim: 384, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
|
| 13 |
+
reveal_head: {hidden_dim: 192, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 4, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
|
| 14 |
+
world_model: {hidden_dim: 192, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 4, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
|
| 15 |
+
planner: {hidden_dim: 192, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 4, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
|
| 16 |
+
loss_weights: {action: 1.0}
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage1_clip.yaml
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_stage1_clip
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 7
|
| 5 |
+
init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
|
| 6 |
+
init_strict: false
|
| 7 |
+
|
| 8 |
+
data:
|
| 9 |
+
proxies: [foliage_proxy, bag_proxy, cloth_proxy]
|
| 10 |
+
resolution: 224
|
| 11 |
+
dataset_version: reveal_proxy_v6_rgbd_elastic_state
|
| 12 |
+
train_episodes_per_proxy: 48
|
| 13 |
+
val_episodes_per_proxy: 16
|
| 14 |
+
train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage1.pt
|
| 15 |
+
val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage1.pt
|
| 16 |
+
rebuild_dataset: false
|
| 17 |
+
chunk_horizon: 8
|
| 18 |
+
rollout_horizon: 5
|
| 19 |
+
history_steps: 6
|
| 20 |
+
planner_candidates: 8
|
| 21 |
+
seed: 7
|
| 22 |
+
|
| 23 |
+
optim: {epochs: 4, batch_size: 2, num_workers: 4, lr: 0.0003, weight_decay: 0.0001}
|
| 24 |
+
|
| 25 |
+
trainer:
|
| 26 |
+
policy_type: elastic_reveal
|
| 27 |
+
use_bf16: true
|
| 28 |
+
grad_clip_norm: 1.0
|
| 29 |
+
freeze_backbone: true
|
| 30 |
+
gradient_checkpointing: false
|
| 31 |
+
plan_during_train: true
|
| 32 |
+
plan_during_eval: true
|
| 33 |
+
support_mode_conditioning: true
|
| 34 |
+
planner_mode: trainable
|
| 35 |
+
use_depth: false
|
| 36 |
+
use_world_model: true
|
| 37 |
+
use_role_tokens: true
|
| 38 |
+
compute_equivariance_probe: true
|
| 39 |
+
|
| 40 |
+
policy:
|
| 41 |
+
backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 512, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: false}
|
| 42 |
+
fusion: {hidden_dim: 512, num_cameras: 3, num_layers: 4, num_heads: 8, ff_dim: 2048, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
|
| 43 |
+
memory: {hidden_dim: 512, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 8, max_history_steps: 8}
|
| 44 |
+
decoder: {hidden_dim: 512, num_heads: 8, num_layers: 4, ff_dim: 2048, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
|
| 45 |
+
reveal_head: {hidden_dim: 512, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 8, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
|
| 46 |
+
world_model: {hidden_dim: 512, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 8, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
|
| 47 |
+
planner: {hidden_dim: 512, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 8, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
|
| 48 |
+
|
| 49 |
+
loss_weights:
|
| 50 |
+
action: 1.0
|
| 51 |
+
phase: 0.1
|
| 52 |
+
arm_role: 0.15
|
| 53 |
+
support_mode: 0.1
|
| 54 |
+
corridor: 0.15
|
| 55 |
+
persistence: 0.05
|
| 56 |
+
disturbance: 0.05
|
| 57 |
+
world_model: 0.2
|
| 58 |
+
belief: 0.05
|
| 59 |
+
visibility: 0.05
|
| 60 |
+
clearance: 0.05
|
| 61 |
+
support_stability: 0.05
|
| 62 |
+
reocclusion: 0.05
|
| 63 |
+
occluder_contact: 0.05
|
| 64 |
+
grasp_affordance: 0.05
|
| 65 |
+
planner_success: 0.25
|
| 66 |
+
planner_risk: 0.1
|
| 67 |
+
planner_ranking: 0.2
|
| 68 |
+
proposal_reconstruction: 0.1
|
| 69 |
+
proposal_success: 0.15
|
| 70 |
+
proposal_ranking: 0.2
|
| 71 |
+
proposal_diversity: 0.05
|
| 72 |
+
role_swap_consistency: 0.05
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage1_dummy.yaml
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_stage1_dummy
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 13
|
| 5 |
+
|
| 6 |
+
data:
|
| 7 |
+
proxies: [foliage_proxy, bag_proxy, cloth_proxy]
|
| 8 |
+
resolution: 96
|
| 9 |
+
dataset_version: reveal_proxy_v6_rgbd_elastic_state
|
| 10 |
+
train_episodes_per_proxy: 48
|
| 11 |
+
val_episodes_per_proxy: 16
|
| 12 |
+
train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage1_dummy.pt
|
| 13 |
+
val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage1_dummy.pt
|
| 14 |
+
rebuild_dataset: false
|
| 15 |
+
chunk_horizon: 8
|
| 16 |
+
rollout_horizon: 5
|
| 17 |
+
history_steps: 6
|
| 18 |
+
planner_candidates: 8
|
| 19 |
+
seed: 13
|
| 20 |
+
|
| 21 |
+
optim:
|
| 22 |
+
epochs: 4
|
| 23 |
+
batch_size: 16
|
| 24 |
+
num_workers: 4
|
| 25 |
+
lr: 0.001
|
| 26 |
+
weight_decay: 0.0001
|
| 27 |
+
|
| 28 |
+
trainer:
|
| 29 |
+
policy_type: elastic_reveal
|
| 30 |
+
use_bf16: false
|
| 31 |
+
grad_clip_norm: 1.0
|
| 32 |
+
freeze_backbone: true
|
| 33 |
+
gradient_checkpointing: false
|
| 34 |
+
plan_during_train: true
|
| 35 |
+
plan_during_eval: true
|
| 36 |
+
support_mode_conditioning: true
|
| 37 |
+
planner_mode: trainable
|
| 38 |
+
use_depth: false
|
| 39 |
+
use_world_model: true
|
| 40 |
+
use_role_tokens: true
|
| 41 |
+
compute_equivariance_probe: true
|
| 42 |
+
|
| 43 |
+
policy:
|
| 44 |
+
backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 192, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: true}
|
| 45 |
+
fusion: {hidden_dim: 192, num_cameras: 3, num_layers: 2, num_heads: 4, ff_dim: 384, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
|
| 46 |
+
memory: {hidden_dim: 192, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 4, max_history_steps: 8}
|
| 47 |
+
decoder: {hidden_dim: 192, num_heads: 4, num_layers: 2, ff_dim: 384, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
|
| 48 |
+
reveal_head: {hidden_dim: 192, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 4, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
|
| 49 |
+
world_model: {hidden_dim: 192, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 4, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
|
| 50 |
+
planner: {hidden_dim: 192, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 4, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
|
| 51 |
+
|
| 52 |
+
loss_weights:
|
| 53 |
+
action: 1.0
|
| 54 |
+
phase: 0.15
|
| 55 |
+
arm_role: 0.2
|
| 56 |
+
support_mode: 0.15
|
| 57 |
+
corridor: 0.2
|
| 58 |
+
persistence: 0.1
|
| 59 |
+
disturbance: 0.1
|
| 60 |
+
world_model: 0.25
|
| 61 |
+
belief: 0.05
|
| 62 |
+
visibility: 0.05
|
| 63 |
+
clearance: 0.05
|
| 64 |
+
support_stability: 0.05
|
| 65 |
+
reocclusion: 0.05
|
| 66 |
+
occluder_contact: 0.05
|
| 67 |
+
grasp_affordance: 0.05
|
| 68 |
+
planner_success: 0.2
|
| 69 |
+
planner_risk: 0.1
|
| 70 |
+
planner_ranking: 0.1
|
| 71 |
+
proposal_reconstruction: 0.2
|
| 72 |
+
proposal_success: 0.1
|
| 73 |
+
proposal_ranking: 0.1
|
| 74 |
+
proposal_diversity: 0.05
|
| 75 |
+
role_swap_consistency: 0.05
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage2_clip.yaml
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_stage2_clip
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 11
|
| 5 |
+
init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
|
| 6 |
+
init_strict: false
|
| 7 |
+
data: {proxies: [foliage_proxy, bag_proxy, cloth_proxy], resolution: 224, dataset_version: reveal_proxy_v6_rgbd_elastic_state, train_episodes_per_proxy: 48, val_episodes_per_proxy: 16, train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage2.pt, val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage2.pt, rebuild_dataset: false, chunk_horizon: 8, rollout_horizon: 5, history_steps: 6, planner_candidates: 8, seed: 11}
|
| 8 |
+
optim: {epochs: 4, batch_size: 2, num_workers: 4, lr: 0.0003, weight_decay: 0.0001}
|
| 9 |
+
trainer: {policy_type: elastic_reveal, use_bf16: true, grad_clip_norm: 1.0, freeze_backbone: true, gradient_checkpointing: false, plan_during_train: true, plan_during_eval: true, support_mode_conditioning: true, planner_mode: trainable, use_depth: false, use_world_model: true, use_role_tokens: true, compute_equivariance_probe: true}
|
| 10 |
+
policy:
|
| 11 |
+
backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 512, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: false}
|
| 12 |
+
fusion: {hidden_dim: 512, num_cameras: 3, num_layers: 4, num_heads: 8, ff_dim: 2048, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
|
| 13 |
+
memory: {hidden_dim: 512, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 8, max_history_steps: 8}
|
| 14 |
+
decoder: {hidden_dim: 512, num_heads: 8, num_layers: 4, ff_dim: 2048, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
|
| 15 |
+
reveal_head: {hidden_dim: 512, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 8, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
|
| 16 |
+
world_model: {hidden_dim: 512, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 8, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
|
| 17 |
+
planner: {hidden_dim: 512, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 8, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
|
| 18 |
+
loss_weights: {action: 1.0, phase: 0.1, arm_role: 0.15, support_mode: 0.1, corridor: 0.15, persistence: 0.05, disturbance: 0.05, world_model: 0.25, belief: 0.05, visibility: 0.05, clearance: 0.05, support_stability: 0.05, reocclusion: 0.05, occluder_contact: 0.05, grasp_affordance: 0.05, planner_success: 0.25, planner_risk: 0.1, planner_ranking: 0.2, proposal_reconstruction: 0.1, proposal_success: 0.15, proposal_ranking: 0.2, proposal_diversity: 0.05, role_swap_consistency: 0.05}
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage2_dummy.yaml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_stage2_dummy
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 21
|
| 5 |
+
defaults: []
|
| 6 |
+
data: {proxies: [foliage_proxy, bag_proxy, cloth_proxy], resolution: 96, dataset_version: reveal_proxy_v6_rgbd_elastic_state, train_episodes_per_proxy: 48, val_episodes_per_proxy: 16, train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage2_dummy.pt, val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage2_dummy.pt, rebuild_dataset: false, chunk_horizon: 8, rollout_horizon: 5, history_steps: 6, planner_candidates: 8, seed: 21}
|
| 7 |
+
optim: {epochs: 4, batch_size: 16, num_workers: 4, lr: 0.001, weight_decay: 0.0001}
|
| 8 |
+
trainer: {policy_type: elastic_reveal, use_bf16: false, grad_clip_norm: 1.0, freeze_backbone: true, gradient_checkpointing: false, plan_during_train: true, plan_during_eval: true, support_mode_conditioning: true, planner_mode: trainable, use_depth: false, use_world_model: true, use_role_tokens: true, compute_equivariance_probe: true}
|
| 9 |
+
policy:
|
| 10 |
+
backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 192, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: true}
|
| 11 |
+
fusion: {hidden_dim: 192, num_cameras: 3, num_layers: 2, num_heads: 4, ff_dim: 384, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
|
| 12 |
+
memory: {hidden_dim: 192, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 4, max_history_steps: 8}
|
| 13 |
+
decoder: {hidden_dim: 192, num_heads: 4, num_layers: 2, ff_dim: 384, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
|
| 14 |
+
reveal_head: {hidden_dim: 192, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 4, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
|
| 15 |
+
world_model: {hidden_dim: 192, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 4, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
|
| 16 |
+
planner: {hidden_dim: 192, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 4, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
|
| 17 |
+
loss_weights: {action: 1.0, phase: 0.15, arm_role: 0.2, support_mode: 0.15, corridor: 0.2, persistence: 0.1, disturbance: 0.1, world_model: 0.3, belief: 0.05, visibility: 0.05, clearance: 0.05, support_stability: 0.05, reocclusion: 0.05, occluder_contact: 0.05, grasp_affordance: 0.05, planner_success: 0.2, planner_risk: 0.1, planner_ranking: 0.1, proposal_reconstruction: 0.2, proposal_success: 0.1, proposal_ranking: 0.1, proposal_diversity: 0.05, role_swap_consistency: 0.05}
|
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd.yaml
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_stage3_clip_rgbd
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 17
|
| 5 |
+
init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
|
| 6 |
+
init_strict: false
|
| 7 |
+
data: {proxies: [foliage_proxy, bag_proxy, cloth_proxy], resolution: 224, dataset_version: reveal_proxy_v6_rgbd_elastic_state, train_episodes_per_proxy: 48, val_episodes_per_proxy: 16, train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3.pt, val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3.pt, rebuild_dataset: false, chunk_horizon: 8, rollout_horizon: 5, history_steps: 6, planner_candidates: 8, seed: 17}
|
| 8 |
+
optim: {epochs: 4, batch_size: 2, num_workers: 4, lr: 0.0003, weight_decay: 0.0001}
|
| 9 |
+
trainer: {policy_type: elastic_reveal, use_bf16: true, grad_clip_norm: 1.0, freeze_backbone: true, gradient_checkpointing: false, plan_during_train: true, plan_during_eval: true, support_mode_conditioning: true, planner_mode: trainable, use_depth: true, use_world_model: true, use_role_tokens: true, compute_equivariance_probe: true}
|
| 10 |
+
policy:
|
| 11 |
+
backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 512, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: false}
|
| 12 |
+
fusion: {hidden_dim: 512, num_cameras: 3, num_layers: 4, num_heads: 8, ff_dim: 2048, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
|
| 13 |
+
memory: {hidden_dim: 512, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 8, max_history_steps: 8}
|
| 14 |
+
decoder: {hidden_dim: 512, num_heads: 8, num_layers: 4, ff_dim: 2048, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
|
| 15 |
+
reveal_head: {hidden_dim: 512, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 8, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
|
| 16 |
+
world_model: {hidden_dim: 512, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 8, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
|
| 17 |
+
planner: {hidden_dim: 512, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 8, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
|
| 18 |
+
loss_weights: {action: 1.0, phase: 0.1, arm_role: 0.15, support_mode: 0.1, corridor: 0.15, persistence: 0.05, disturbance: 0.05, world_model: 0.25, belief: 0.05, visibility: 0.05, clearance: 0.05, support_stability: 0.05, reocclusion: 0.05, occluder_contact: 0.05, grasp_affordance: 0.05, planner_success: 0.25, planner_risk: 0.1, planner_ranking: 0.2, proposal_reconstruction: 0.1, proposal_success: 0.15, proposal_ranking: 0.2, proposal_diversity: 0.05, role_swap_consistency: 0.05}
|