lsnu commited on 25 days ago

Commit

6fa1956

verified ·

1 Parent(s): 35377df

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

artifacts/outputs/reveal_runs/proxy_backbone_only/config_resolved.yaml +85 -0
artifacts/outputs/reveal_runs/proxy_backbone_only/metrics.json +106 -0
artifacts/outputs/reveal_runs/proxy_backbone_only_clip/config_resolved.yaml +88 -0
artifacts/outputs/reveal_runs/proxy_backbone_only_clip/metrics.json +54 -0
artifacts/outputs/reveal_runs/proxy_reveal_state/config_resolved.yaml +85 -0
artifacts/outputs/reveal_runs/proxy_reveal_state/metrics.json +186 -0
artifacts/outputs/reveal_runs/proxy_reveal_state_clip/config_resolved.yaml +88 -0
artifacts/outputs/reveal_runs/proxy_reveal_state_clip/metrics.json +94 -0
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/config_resolved.yaml +89 -0
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/metrics.json +28 -0
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/summary.json +44 -0
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/config_resolved.yaml +89 -0
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/metrics.json +28 -0
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/summary.json +44 -0
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/config_resolved.yaml +89 -0
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/metrics.json +28 -0
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/summary.json +44 -0
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/config_resolved.yaml +89 -0
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/metrics.json +28 -0
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/summary.json +44 -0
artifacts/reports/reveal_eval/reveal_benchmark.json +28 -0
artifacts/reports/reveal_eval/reveal_benchmark.md +25 -0
artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.json +41 -0
artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.md +12 -0
artifacts/reports/rlbench_custom_clip/reveal_state_rollout_noplan/rollout_eval.json +41 -0
code/reveal_vla_bimanual/.gitignore +12 -0
code/reveal_vla_bimanual/README.md +82 -0
code/reveal_vla_bimanual/docs/upstream_pins.md +24 -0
code/reveal_vla_bimanual/docs/xorg.rtx6000.conf +33 -0
code/reveal_vla_bimanual/envs/reveal310.yaml +38 -0
code/reveal_vla_bimanual/envs/rlbench310.yaml +50 -0
code/reveal_vla_bimanual/eval/__init__.py +3 -0
code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/eval/__pycache__/ablations.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/eval/__pycache__/ablations.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/eval/__pycache__/report.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/eval/__pycache__/report.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/eval/__pycache__/run_ablations.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/eval/__pycache__/run_ablations.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/eval/__pycache__/run_rlbench_rollout_eval.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/eval/ablations.py +8 -0
code/reveal_vla_bimanual/eval/metrics.py +52 -0
code/reveal_vla_bimanual/eval/report.py +50 -0
code/reveal_vla_bimanual/eval/run_ablations.py +68 -0
code/reveal_vla_bimanual/eval/run_reveal_benchmark.py +231 -0

artifacts/outputs/reveal_runs/proxy_backbone_only/config_resolved.yaml ADDED Viewed

	@@ -0,0 +1,85 @@

+experiment_name: proxy_backbone_only
+output_dir: /workspace/outputs/reveal_runs
+device: cuda
+seed: 7
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 96
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/data/reveal_proxy/proxy_train_v2.pt
+  val_dataset_path: /workspace/data/reveal_proxy/proxy_val_v2.pt
+  rebuild_dataset: true
+  chunk_horizon: 8
+  rollout_horizon: 5
+  seed: 7
+optim:
+  epochs: 8
+  batch_size: 16
+  num_workers: 0
+  lr: 0.001
+  weight_decay: 0.0001
+trainer:
+  policy_type: backbone_only
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 128
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 128
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 256
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  decoder:
+    hidden_dim: 128
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 256
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    num_candidates: 8
+  reveal_head:
+    hidden_dim: 128
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    predict_belief_map: true
+  world_model:
+    hidden_dim: 128
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+  planner:
+    num_candidates: 8
+    corridor_weight: 1.0
+    persistence_weight: 0.5
+    proposal_weight: 0.5
+    disturbance_weight: 0.75
+    reocclusion_weight: 0.5
+    visibility_weight: 0.25
+loss_weights:
+  action: 1.0
+  support_mode: 0.1
+  corridor: 0.1
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.1
+  belief: 0.05

artifacts/outputs/reveal_runs/proxy_backbone_only/metrics.json ADDED Viewed

	@@ -0,0 +1,106 @@

+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.06700062464612226,
+      "total": 0.06700062464612226,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.02209080010652542,
+      "total": 0.02209080010652542,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.02441179845482111,
+      "total": 0.02441179845482111,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.01861108955927193,
+      "total": 0.01861108955927193,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.020652000947544973,
+      "total": 0.020652000947544973,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.01581601658836007,
+      "total": 0.01581601658836007,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.01735153196689983,
+      "total": 0.01735153196689983,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.01413003564812243,
+      "total": 0.01413003564812243,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 4,
+    "train": {
+      "action": 0.015502698409060637,
+      "total": 0.015502698409060637,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.012679400155320764,
+      "total": 0.012679400155320764,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 5,
+    "train": {
+      "action": 0.015521424783704182,
+      "total": 0.015521424783704182,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.011973066837526858,
+      "total": 0.011973066837526858,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 6,
+    "train": {
+      "action": 0.014476912096142769,
+      "total": 0.014476912096142769,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.011093099834397435,
+      "total": 0.011093099834397435,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 7,
+    "train": {
+      "action": 0.012226066280466815,
+      "total": 0.012226066280466815,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.012411019764840603,
+      "total": 0.012411019764840603,
+      "world_model": 0.0
+    }
+  }
+]

artifacts/outputs/reveal_runs/proxy_backbone_only_clip/config_resolved.yaml ADDED Viewed

	@@ -0,0 +1,88 @@

+experiment_name: proxy_backbone_only_clip
+output_dir: /workspace/outputs/reveal_runs
+device: cuda
+seed: 7
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 224
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224.pt
+  val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224.pt
+  rebuild_dataset: true
+  chunk_horizon: 8
+  rollout_horizon: 5
+  seed: 7
+optim:
+  epochs: 4
+  batch_size: 2
+  num_workers: 0
+  lr: 0.0003
+  weight_decay: 0.0001
+trainer:
+  policy_type: backbone_only
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: false
+  plan_during_eval: false
+  support_mode_conditioning: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 512
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: false
+  fusion:
+    hidden_dim: 512
+    num_cameras: 3
+    num_layers: 4
+    num_heads: 8
+    ff_dim: 2048
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  decoder:
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 4
+    ff_dim: 2048
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    num_candidates: 8
+  reveal_head:
+    hidden_dim: 512
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    predict_belief_map: true
+  world_model:
+    hidden_dim: 512
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+  planner:
+    num_candidates: 8
+    corridor_weight: 1.0
+    persistence_weight: 0.5
+    proposal_weight: 0.5
+    disturbance_weight: 0.75
+    reocclusion_weight: 0.5
+    visibility_weight: 0.25
+loss_weights:
+  action: 1.0
+  support_mode: 0.1
+  corridor: 0.1
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.1
+  belief: 0.05

artifacts/outputs/reveal_runs/proxy_backbone_only_clip/metrics.json ADDED Viewed

	@@ -0,0 +1,54 @@

+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.14342915779711063,
+      "total": 0.14342915779711063,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.026520084648851364,
+      "total": 0.026520084648851364,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.01376689436079944,
+      "total": 0.01376689436079944,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.00792281218390498,
+      "total": 0.00792281218390498,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.009396829446095057,
+      "total": 0.009396829446095057,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.006728713663058385,
+      "total": 0.006728713663058385,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.007774835790102784,
+      "total": 0.007774835790102784,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.005187951255634073,
+      "total": 0.005187951255634073,
+      "world_model": 0.0
+    }
+  }
+]

artifacts/outputs/reveal_runs/proxy_reveal_state/config_resolved.yaml ADDED Viewed

	@@ -0,0 +1,85 @@

+experiment_name: proxy_reveal_state
+output_dir: /workspace/outputs/reveal_runs
+device: cuda
+seed: 7
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 96
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/data/reveal_proxy/proxy_train_v2.pt
+  val_dataset_path: /workspace/data/reveal_proxy/proxy_val_v2.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  seed: 7
+optim:
+  epochs: 8
+  batch_size: 16
+  num_workers: 0
+  lr: 0.001
+  weight_decay: 0.0001
+trainer:
+  policy_type: reveal_state
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 128
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 128
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 256
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  decoder:
+    hidden_dim: 128
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 256
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    num_candidates: 8
+  reveal_head:
+    hidden_dim: 128
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    predict_belief_map: true
+  world_model:
+    hidden_dim: 128
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+  planner:
+    num_candidates: 8
+    corridor_weight: 1.0
+    persistence_weight: 0.65
+    proposal_weight: 0.35
+    disturbance_weight: 0.8
+    reocclusion_weight: 0.6
+    visibility_weight: 0.35
+loss_weights:
+  action: 1.0
+  support_mode: 0.15
+  corridor: 0.2
+  persistence: 0.1
+  disturbance: 0.1
+  world_model: 0.2
+  belief: 0.05

artifacts/outputs/reveal_runs/proxy_reveal_state/metrics.json ADDED Viewed

	@@ -0,0 +1,186 @@

+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.2602546961667637,
+      "belief": 0.4802860766649246,
+      "corridor": 0.6443073948224386,
+      "disturbance": 0.006578955658672688,
+      "persistence": 4.514919241269429,
+      "support_mode": 0.8015391031901041,
+      "total": 2.0875226110219955,
+      "world_model": 5.510057131449382
+    },
+    "val": {
+      "action": 0.04658499173820019,
+      "belief": 0.280171237885952,
+      "corridor": 0.5032978095114231,
+      "disturbance": 0.003645064221927896,
+      "persistence": 3.8178451359272003,
+      "support_mode": 0.6714280992746353,
+      "total": 1.012940600514412,
+      "world_model": 1.8441212028265
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.03881739747400085,
+      "belief": 0.18641860752056041,
+      "corridor": 0.3944183625280857,
+      "disturbance": 0.030439561344489146,
+      "persistence": 3.206294293204943,
+      "support_mode": 0.5347911287099123,
+      "total": 0.9082020496328672,
+      "world_model": 1.8864398151636124
+    },
+    "val": {
+      "action": 0.04213718790560961,
+      "belief": 0.15712551027536392,
+      "corridor": 0.3507457673549652,
+      "disturbance": 0.006276358384639025,
+      "persistence": 1.8078171163797379,
+      "support_mode": 0.10970124043524265,
+      "total": 0.6724201738834381,
+      "world_model": 1.772064983844757
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.031200370130439598,
+      "belief": 0.13828600694735846,
+      "corridor": 0.31750819956262905,
+      "disturbance": 0.011857866222271696,
+      "persistence": 1.7015922193725903,
+      "support_mode": 0.02674841312303518,
+      "total": 0.6129550884167353,
+      "world_model": 1.6799074759085972
+    },
+    "val": {
+      "action": 0.019523032009601593,
+      "belief": 0.09429990872740746,
+      "corridor": 0.24884792044758797,
+      "disturbance": 0.0043011417728848755,
+      "persistence": 1.5114311277866364,
+      "support_mode": 0.0060500025865621865,
+      "total": 0.5359727554023266,
+      "world_model": 1.5474220663309097
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.022356805779660743,
+      "belief": 0.09125891048461199,
+      "corridor": 0.23351835707823435,
+      "disturbance": 0.006718798467773013,
+      "persistence": 1.6300043910741806,
+      "support_mode": 0.004253969304651643,
+      "total": 0.5548354809482893,
+      "world_model": 1.5845081210136414
+    },
+    "val": {
+      "action": 0.01580847823061049,
+      "belief": 0.09042494650930166,
+      "corridor": 0.22376472875475883,
+      "disturbance": 0.018967560958117247,
+      "persistence": 1.4363956600427628,
+      "support_mode": 0.03418254409916699,
+      "total": 0.5279115326702595,
+      "world_model": 1.5608257874846458
+    }
+  },
+  {
+    "epoch": 4,
+    "train": {
+      "action": 0.019881066245337326,
+      "belief": 0.08954659259567659,
+      "corridor": 0.21636931287745634,
+      "disturbance": 0.005539724506282558,
+      "persistence": 1.592231921851635,
+      "support_mode": 0.008331454223177085,
+      "total": 0.5372808227936426,
+      "world_model": 1.5431083713968594
+    },
+    "val": {
+      "action": 0.015133287757635117,
+      "belief": 0.08718204218894243,
+      "corridor": 0.20481965504586697,
+      "disturbance": 0.0031357303814729676,
+      "persistence": 1.3192060887813568,
+      "support_mode": 0.0030863596766721457,
+      "total": 0.47997843474149704,
+      "world_model": 1.4341248571872711
+    }
+  },
+  {
+    "epoch": 5,
+    "train": {
+      "action": 0.030778280459344387,
+      "belief": 0.09159998937199514,
+      "corridor": 0.21967005419234434,
+      "disturbance": 0.005901901221174437,
+      "persistence": 1.651158797244231,
+      "support_mode": 0.0024410486221313477,
+      "total": 0.5050872204204401,
+      "world_model": 1.2986134762565296
+    },
+    "val": {
+      "action": 0.03259791061282158,
+      "belief": 0.08867455553263426,
+      "corridor": 0.20528649538755417,
+      "disturbance": 0.0037689711316488683,
+      "persistence": 1.3772646486759186,
+      "support_mode": 0.0007588127191411331,
+      "total": 0.4101765304803848,
+      "world_model": 0.9693519398570061
+    }
+  },
+  {
+    "epoch": 6,
+    "train": {
+      "action": 0.028416083427146077,
+      "belief": 0.09289384291817744,
+      "corridor": 0.22298985657592615,
+      "disturbance": 0.0031898027373244986,
+      "persistence": 1.2752377291520436,
+      "support_mode": 0.04850278014297752,
+      "total": 0.40898223718007404,
+      "world_model": 0.9810265600681305
+    },
+    "val": {
+      "action": 0.02159481483977288,
+      "belief": 0.08797950763255358,
+      "corridor": 0.20524934865534306,
+      "disturbance": 0.0015436648827744648,
+      "persistence": 1.286000706255436,
+      "support_mode": 0.0010480962373549119,
+      "total": 0.3605738691985607,
+      "world_model": 0.8230927512049675
+    }
+  },
+  {
+    "epoch": 7,
+    "train": {
+      "action": 0.021424691736077268,
+      "belief": 0.0899931692207853,
+      "corridor": 0.21607277914881706,
+      "disturbance": 0.0034827212220989168,
+      "persistence": 0.9069182885189851,
+      "support_mode": 0.00435957100125961,
+      "total": 0.3383450036247571,
+      "world_model": 0.8875602881113688
+    },
+    "val": {
+      "action": 0.017686392879113555,
+      "belief": 0.09035013243556023,
+      "corridor": 0.21036655083298683,
+      "disturbance": 0.004888073919573799,
+      "persistence": 0.5709216743707657,
+      "support_mode": 0.001884725206764415,
+      "total": 0.31777225248515606,
+      "world_model": 0.978156752884388
+    }
+  }
+]

artifacts/outputs/reveal_runs/proxy_reveal_state_clip/config_resolved.yaml ADDED Viewed

	@@ -0,0 +1,88 @@

+experiment_name: proxy_reveal_state_clip
+output_dir: /workspace/outputs/reveal_runs
+device: cuda
+seed: 7
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 224
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224.pt
+  val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  seed: 7
+optim:
+  epochs: 4
+  batch_size: 2
+  num_workers: 0
+  lr: 0.0003
+  weight_decay: 0.0001
+trainer:
+  policy_type: reveal_state
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 512
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: false
+  fusion:
+    hidden_dim: 512
+    num_cameras: 3
+    num_layers: 4
+    num_heads: 8
+    ff_dim: 2048
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  decoder:
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 4
+    ff_dim: 2048
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    num_candidates: 8
+  reveal_head:
+    hidden_dim: 512
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    predict_belief_map: true
+  world_model:
+    hidden_dim: 512
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+  planner:
+    num_candidates: 8
+    corridor_weight: 1.0
+    persistence_weight: 0.65
+    proposal_weight: 0.35
+    disturbance_weight: 0.8
+    reocclusion_weight: 0.6
+    visibility_weight: 0.35
+loss_weights:
+  action: 1.0
+  support_mode: 0.15
+  corridor: 0.2
+  persistence: 0.1
+  disturbance: 0.1
+  world_model: 0.2
+  belief: 0.05

artifacts/outputs/reveal_runs/proxy_reveal_state_clip/metrics.json ADDED Viewed

	@@ -0,0 +1,94 @@

+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.2168051045758562,
+      "belief": 0.16835976690444024,
+      "corridor": 0.2947022703851705,
+      "disturbance": 0.007973204485554213,
+      "persistence": 4.26063614482967,
+      "support_mode": 0.7333370827879581,
+      "total": 1.1824027625990163,
+      "world_model": 1.8068884567440493
+    },
+    "val": {
+      "action": 0.06980070081495103,
+      "belief": 0.09293079068736425,
+      "corridor": 0.23202623426914215,
+      "disturbance": 0.006832122442401236,
+      "persistence": 3.871745571257576,
+      "support_mode": 0.6699983808729384,
+      "total": 0.7863351002572074,
+      "world_model": 0.8856253113065448
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.054234529075003104,
+      "belief": 0.09439963061382009,
+      "corridor": 0.24123663386983396,
+      "disturbance": 0.008799185583979581,
+      "persistence": 3.9709763473865247,
+      "support_mode": 0.674577163776178,
+      "total": 0.796180099092853,
+      "world_model": 0.9490705705125918
+    },
+    "val": {
+      "action": 0.06558700479448788,
+      "belief": 0.1815936780638165,
+      "corridor": 0.3361685186151474,
+      "disturbance": 0.023940630294086915,
+      "persistence": 4.7415515091565865,
+      "support_mode": 0.8642671259622725,
+      "total": 0.9338183213794042,
+      "world_model": 0.9286431225519332
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.03919103866472294,
+      "belief": 0.09202757795677759,
+      "corridor": 0.21921133667874243,
+      "disturbance": 0.04529383548148981,
+      "persistence": 1.5436662856260246,
+      "support_mode": 0.23989241035820927,
+      "total": 0.45590807076212,
+      "world_model": 0.8669675243774634
+    },
+    "val": {
+      "action": 0.02496799406787706,
+      "belief": 0.08762083173034683,
+      "corridor": 0.1930048821996602,
+      "disturbance": 0.012308748878745569,
+      "persistence": 0.9973389923809066,
+      "support_mode": 0.14653402309687363,
+      "total": 0.34120540746620726,
+      "world_model": 0.7515525425237323
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.034251564747961094,
+      "belief": 0.0881565280882788,
+      "corridor": 0.19749194407513784,
+      "disturbance": 0.019202744416642326,
+      "persistence": 1.0902665860137868,
+      "support_mode": 0.07417118861413127,
+      "total": 0.3623058025905599,
+      "world_model": 0.810377035309507
+    },
+    "val": {
+      "action": 0.020182275937663183,
+      "belief": 0.08651774370717624,
+      "corridor": 0.18512752960022125,
+      "disturbance": 0.02845218790591591,
+      "persistence": 1.0011120429706006,
+      "support_mode": 0.1388084255080367,
+      "total": 0.3356363290832156,
+      "world_model": 0.7516248249818408
+    }
+  }
+]

artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/config_resolved.yaml ADDED Viewed

	@@ -0,0 +1,89 @@

+experiment_name: rlbench_subset3_backbone_only_clip
+output_dir: /workspace/outputs/rlbench_custom
+device: cuda
+seed: 7
+init_checkpoint: /workspace/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
+init_strict: false
+data:
+  dataset_root: /workspace/data/rlbench2
+  tasks:
+  - bimanual_lift_ball
+  - bimanual_push_box
+  - bimanual_dual_push_buttons
+  train_episodes:
+  - 0
+  val_episodes:
+  - 1
+  resolution: 224
+  chunk_horizon: 8
+  proprio_dim: 32
+optim:
+  epochs: 2
+  batch_size: 2
+  num_workers: 0
+  lr: 0.0002
+  weight_decay: 0.0001
+trainer:
+  policy_type: backbone_only
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: false
+  plan_during_eval: false
+  support_mode_conditioning: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 512
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: false
+  fusion:
+    hidden_dim: 512
+    num_cameras: 3
+    num_layers: 4
+    num_heads: 8
+    ff_dim: 2048
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  decoder:
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 4
+    ff_dim: 2048
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    num_candidates: 8
+  reveal_head:
+    hidden_dim: 512
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    predict_belief_map: true
+  world_model:
+    hidden_dim: 512
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+  planner:
+    num_candidates: 8
+    corridor_weight: 1.0
+    persistence_weight: 0.5
+    proposal_weight: 0.5
+    disturbance_weight: 0.75
+    reocclusion_weight: 0.5
+    visibility_weight: 0.25
+loss_weights:
+  action: 1.0
+  support_mode: 0.1
+  corridor: 0.1
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.1
+  belief: 0.05

artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/metrics.json ADDED Viewed

	@@ -0,0 +1,28 @@

+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.010832569689285108,
+      "total": 0.010832569689285108,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.00584922067168602,
+      "total": 0.00584922067168602,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.007243322389241776,
+      "total": 0.007243322389241776,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.004669623740794346,
+      "total": 0.004669623740794346,
+      "world_model": 0.0
+    }
+  }
+]

artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/summary.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "experiment_name": "rlbench_subset3_backbone_only_clip",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/checkpoint_best.pt",
+  "final_train_total": 0.007243322389241776,
+  "final_val_total": 0.004669623740794346,
+  "train_dataset": {
+    "dataset_root": "/workspace/data/rlbench2",
+    "tasks": [
+      "bimanual_lift_ball",
+      "bimanual_push_box",
+      "bimanual_dual_push_buttons"
+    ],
+    "episode_indices": [
+      0
+    ],
+    "num_episodes": 3,
+    "num_samples": 381,
+    "resolution": 224,
+    "chunk_size": 8,
+    "proprio_dim": 32
+  },
+  "val_dataset": {
+    "dataset_root": "/workspace/data/rlbench2",
+    "tasks": [
+      "bimanual_lift_ball",
+      "bimanual_push_box",
+      "bimanual_dual_push_buttons"
+    ],
+    "episode_indices": [
+      1
+    ],
+    "num_episodes": 3,
+    "num_samples": 374,
+    "resolution": 224,
+    "chunk_size": 8,
+    "proprio_dim": 32
+  },
+  "init_info": {
+    "path": "/workspace/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
+    "missing_keys": [],
+    "unexpected_keys": []
+  }
+}

artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/config_resolved.yaml ADDED Viewed

	@@ -0,0 +1,89 @@

+experiment_name: rlbench_subset3_backbone_only_dummy
+output_dir: /workspace/outputs/rlbench_custom
+device: cuda
+seed: 7
+init_checkpoint: /workspace/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
+init_strict: false
+data:
+  dataset_root: /workspace/data/rlbench2
+  tasks:
+  - bimanual_lift_ball
+  - bimanual_push_box
+  - bimanual_dual_push_buttons
+  train_episodes:
+  - 0
+  val_episodes:
+  - 1
+  resolution: 224
+  chunk_horizon: 8
+  proprio_dim: 32
+optim:
+  epochs: 2
+  batch_size: 4
+  num_workers: 0
+  lr: 0.0005
+  weight_decay: 0.0001
+trainer:
+  policy_type: backbone_only
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: false
+  plan_during_eval: false
+  support_mode_conditioning: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 128
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 128
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 256
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  decoder:
+    hidden_dim: 128
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 256
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    num_candidates: 8
+  reveal_head:
+    hidden_dim: 128
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    predict_belief_map: true
+  world_model:
+    hidden_dim: 128
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+  planner:
+    num_candidates: 8
+    corridor_weight: 1.0
+    persistence_weight: 0.5
+    proposal_weight: 0.5
+    disturbance_weight: 0.75
+    reocclusion_weight: 0.5
+    visibility_weight: 0.25
+loss_weights:
+  action: 1.0
+  support_mode: 0.1
+  corridor: 0.1
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.1
+  belief: 0.05

artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/metrics.json ADDED Viewed

	@@ -0,0 +1,28 @@

+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.012133247866586316,
+      "total": 0.012133247866586316,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.008180527588070191,
+      "total": 0.008180527588070191,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.00792471425726641,
+      "total": 0.00792471425726641,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.005605970580716608,
+      "total": 0.005605970580716608,
+      "world_model": 0.0
+    }
+  }
+]

artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/summary.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "experiment_name": "rlbench_subset3_backbone_only_dummy",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/checkpoint_best.pt",
+  "final_train_total": 0.00792471425726641,
+  "final_val_total": 0.005605970580716608,
+  "train_dataset": {
+    "dataset_root": "/workspace/data/rlbench2",
+    "tasks": [
+      "bimanual_lift_ball",
+      "bimanual_push_box",
+      "bimanual_dual_push_buttons"
+    ],
+    "episode_indices": [
+      0
+    ],
+    "num_episodes": 3,
+    "num_samples": 381,
+    "resolution": 224,
+    "chunk_size": 8,
+    "proprio_dim": 32
+  },
+  "val_dataset": {
+    "dataset_root": "/workspace/data/rlbench2",
+    "tasks": [
+      "bimanual_lift_ball",
+      "bimanual_push_box",
+      "bimanual_dual_push_buttons"
+    ],
+    "episode_indices": [
+      1
+    ],
+    "num_episodes": 3,
+    "num_samples": 374,
+    "resolution": 224,
+    "chunk_size": 8,
+    "proprio_dim": 32
+  },
+  "init_info": {
+    "path": "/workspace/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt",
+    "missing_keys": [],
+    "unexpected_keys": []
+  }
+}

artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/config_resolved.yaml ADDED Viewed

	@@ -0,0 +1,89 @@

+experiment_name: rlbench_subset3_reveal_state_clip
+output_dir: /workspace/outputs/rlbench_custom
+device: cuda
+seed: 7
+init_checkpoint: /workspace/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt
+init_strict: false
+data:
+  dataset_root: /workspace/data/rlbench2
+  tasks:
+  - bimanual_lift_ball
+  - bimanual_push_box
+  - bimanual_dual_push_buttons
+  train_episodes:
+  - 0
+  val_episodes:
+  - 1
+  resolution: 224
+  chunk_horizon: 8
+  proprio_dim: 32
+optim:
+  epochs: 2
+  batch_size: 2
+  num_workers: 0
+  lr: 0.0002
+  weight_decay: 0.0001
+trainer:
+  policy_type: reveal_state
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: false
+  plan_during_eval: false
+  support_mode_conditioning: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 512
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: false
+  fusion:
+    hidden_dim: 512
+    num_cameras: 3
+    num_layers: 4
+    num_heads: 8
+    ff_dim: 2048
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  decoder:
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 4
+    ff_dim: 2048
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    num_candidates: 8
+  reveal_head:
+    hidden_dim: 512
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    predict_belief_map: true
+  world_model:
+    hidden_dim: 512
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+  planner:
+    num_candidates: 8
+    corridor_weight: 1.0
+    persistence_weight: 0.65
+    proposal_weight: 0.35
+    disturbance_weight: 0.8
+    reocclusion_weight: 0.6
+    visibility_weight: 0.35
+loss_weights:
+  action: 1.0
+  support_mode: 0.15
+  corridor: 0.2
+  persistence: 0.1
+  disturbance: 0.1
+  world_model: 0.2
+  belief: 0.05

artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/metrics.json ADDED Viewed

	@@ -0,0 +1,28 @@

+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.012311161635931172,
+      "total": 0.012311161635931172,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.00556847607881269,
+      "total": 0.00556847607881269,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.0070935887447924045,
+      "total": 0.0070935887447924045,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.004233352240750238,
+      "total": 0.004233352240750238,
+      "world_model": 0.0
+    }
+  }
+]

artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/summary.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "experiment_name": "rlbench_subset3_reveal_state_clip",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/checkpoint_best.pt",
+  "final_train_total": 0.0070935887447924045,
+  "final_val_total": 0.004233352240750238,
+  "train_dataset": {
+    "dataset_root": "/workspace/data/rlbench2",
+    "tasks": [
+      "bimanual_lift_ball",
+      "bimanual_push_box",
+      "bimanual_dual_push_buttons"
+    ],
+    "episode_indices": [
+      0
+    ],
+    "num_episodes": 3,
+    "num_samples": 381,
+    "resolution": 224,
+    "chunk_size": 8,
+    "proprio_dim": 32
+  },
+  "val_dataset": {
+    "dataset_root": "/workspace/data/rlbench2",
+    "tasks": [
+      "bimanual_lift_ball",
+      "bimanual_push_box",
+      "bimanual_dual_push_buttons"
+    ],
+    "episode_indices": [
+      1
+    ],
+    "num_episodes": 3,
+    "num_samples": 374,
+    "resolution": 224,
+    "chunk_size": 8,
+    "proprio_dim": 32
+  },
+  "init_info": {
+    "path": "/workspace/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt",
+    "missing_keys": [],
+    "unexpected_keys": []
+  }
+}

artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/config_resolved.yaml ADDED Viewed

	@@ -0,0 +1,89 @@

+experiment_name: rlbench_subset3_reveal_state_dummy
+output_dir: /workspace/outputs/rlbench_custom
+device: cuda
+seed: 7
+init_checkpoint: /workspace/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
+init_strict: false
+data:
+  dataset_root: /workspace/data/rlbench2
+  tasks:
+  - bimanual_lift_ball
+  - bimanual_push_box
+  - bimanual_dual_push_buttons
+  train_episodes:
+  - 0
+  val_episodes:
+  - 1
+  resolution: 224
+  chunk_horizon: 8
+  proprio_dim: 32
+optim:
+  epochs: 2
+  batch_size: 4
+  num_workers: 0
+  lr: 0.0005
+  weight_decay: 0.0001
+trainer:
+  policy_type: reveal_state
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: false
+  plan_during_eval: false
+  support_mode_conditioning: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 128
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 128
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 256
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  decoder:
+    hidden_dim: 128
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 256
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    num_candidates: 8
+  reveal_head:
+    hidden_dim: 128
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    predict_belief_map: true
+  world_model:
+    hidden_dim: 128
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+  planner:
+    num_candidates: 8
+    corridor_weight: 1.0
+    persistence_weight: 0.65
+    proposal_weight: 0.35
+    disturbance_weight: 0.8
+    reocclusion_weight: 0.6
+    visibility_weight: 0.35
+loss_weights:
+  action: 1.0
+  support_mode: 0.15
+  corridor: 0.2
+  persistence: 0.1
+  disturbance: 0.1
+  world_model: 0.2
+  belief: 0.05

artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/metrics.json ADDED Viewed

	@@ -0,0 +1,28 @@

+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.015062082646181807,
+      "total": 0.015062082646181807,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.008003641142846738,
+      "total": 0.008003641142846738,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.007828686845944807,
+      "total": 0.007828686845944807,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.0091639062995958,
+      "total": 0.0091639062995958,
+      "world_model": 0.0
+    }
+  }
+]

artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/summary.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "experiment_name": "rlbench_subset3_reveal_state_dummy",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/checkpoint_best.pt",
+  "final_train_total": 0.007828686845944807,
+  "final_val_total": 0.0091639062995958,
+  "train_dataset": {
+    "dataset_root": "/workspace/data/rlbench2",
+    "tasks": [
+      "bimanual_lift_ball",
+      "bimanual_push_box",
+      "bimanual_dual_push_buttons"
+    ],
+    "episode_indices": [
+      0
+    ],
+    "num_episodes": 3,
+    "num_samples": 381,
+    "resolution": 224,
+    "chunk_size": 8,
+    "proprio_dim": 32
+  },
+  "val_dataset": {
+    "dataset_root": "/workspace/data/rlbench2",
+    "tasks": [
+      "bimanual_lift_ball",
+      "bimanual_push_box",
+      "bimanual_dual_push_buttons"
+    ],
+    "episode_indices": [
+      1
+    ],
+    "num_episodes": 3,
+    "num_samples": 374,
+    "resolution": 224,
+    "chunk_size": 8,
+    "proprio_dim": 32
+  },
+  "init_info": {
+    "path": "/workspace/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt",
+    "missing_keys": [],
+    "unexpected_keys": []
+  }
+}

artifacts/reports/reveal_eval/reveal_benchmark.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "backbone": {
+    "per_task_success": {
+      "foliage_proxy": 1.0,
+      "bag_proxy": 1.0,
+      "cloth_proxy": 1.0
+    },
+    "mean_success": 1.0,
+    "visibility_integral": 1.7894555413060718,
+    "corridor_availability": 0.7018518588609166,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 0.0,
+    "disturbance_cost": 0.1193024102701909
+  },
+  "reveal": {
+    "per_task_success": {
+      "foliage_proxy": 0.9583333333333334,
+      "bag_proxy": 0.9166666666666666,
+      "cloth_proxy": 1.0
+    },
+    "mean_success": 0.9583333333333334,
+    "visibility_integral": 6.966822463605139,
+    "corridor_availability": 0.7799575842089124,
+    "reocclusion_rate": 0.005997474747474748,
+    "persistence_horizon_mae": 1.2541997782345518,
+    "disturbance_cost": 0.2107134228054848
+  }
+}

artifacts/reports/reveal_eval/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,25 @@

+# Reveal Proxy Benchmark
+## backbone
+- checkpoint: /workspace/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
+- mean_success: 1.000
+- visibility_integral: 1.789
+- corridor_availability: 0.702
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 0.000
+- disturbance_cost: 0.119
+- foliage_proxy_success: 1.000
+- bag_proxy_success: 1.000
+- cloth_proxy_success: 1.000
+## reveal
+- checkpoint: /workspace/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
+- mean_success: 0.958
+- visibility_integral: 6.967
+- corridor_availability: 0.780
+- reocclusion_rate: 0.006
+- persistence_horizon_mae: 1.254
+- disturbance_cost: 0.211
+- foliage_proxy_success: 0.958
+- bag_proxy_success: 0.917
+- cloth_proxy_success: 1.000

artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/checkpoint_best.pt",
+  "plan": false,
+  "support_mode_conditioning": true,
+  "episodes_per_task": 1,
+  "episode_length": 5,
+  "resolution": 224,
+  "tasks": {
+    "bimanual_lift_ball": {
+      "successes": [
+        0.0
+      ],
+      "returns": [
+        0.0
+      ],
+      "mean_success": 0.0,
+      "mean_return": 0.0
+    },
+    "bimanual_push_box": {
+      "successes": [
+        0.0
+      ],
+      "returns": [
+        0.0
+      ],
+      "mean_success": 0.0,
+      "mean_return": 0.0
+    },
+    "bimanual_dual_push_buttons": {
+      "successes": [
+        0.0
+      ],
+      "returns": [
+        0.0
+      ],
+      "mean_success": 0.0,
+      "mean_return": 0.0
+    }
+  },
+  "mean_success": 0.0
+}

artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.md ADDED Viewed

	@@ -0,0 +1,12 @@

+# RLBench Rollout Eval
+- Checkpoint: `/workspace/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/checkpoint_best.pt`
+- Plan enabled: `False`
+- Support-mode conditioning: `True`
+- Mean success: `0.000`
+## Per-task
+- `bimanual_lift_ball`: mean_success=0.000, returns=[0.0]
+- `bimanual_push_box`: mean_success=0.000, returns=[0.0]
+- `bimanual_dual_push_buttons`: mean_success=0.000, returns=[0.0]

artifacts/reports/rlbench_custom_clip/reveal_state_rollout_noplan/rollout_eval.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/checkpoint_best.pt",
+  "plan": false,
+  "support_mode_conditioning": true,
+  "episodes_per_task": 1,
+  "episode_length": 5,
+  "resolution": 224,
+  "tasks": {
+    "bimanual_lift_ball": {
+      "successes": [
+        0.0
+      ],
+      "returns": [
+        0.0
+      ],
+      "mean_success": 0.0,
+      "mean_return": 0.0
+    },
+    "bimanual_push_box": {
+      "successes": [
+        0.0
+      ],
+      "returns": [
+        0.0
+      ],
+      "mean_success": 0.0,
+      "mean_return": 0.0
+    },
+    "bimanual_dual_push_buttons": {
+      "successes": [
+        0.0
+      ],
+      "returns": [
+        0.0
+      ],
+      "mean_success": 0.0,
+      "mean_return": 0.0
+    }
+  },
+  "mean_success": 0.0
+}

code/reveal_vla_bimanual/.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+__pycache__/
+*.pyc
+.DS_Store
+.mypy_cache/
+.pytest_cache/
+.ruff_cache/
+.venv/
+artifacts/
+outputs/
+logs/
+wandb/
+reports/

code/reveal_vla_bimanual/README.md ADDED Viewed

	@@ -0,0 +1,82 @@

+# reveal_vla_bimanual
+Simulation-first prototype for bimanual reveal-and-retrieve under elastic occlusion.
+This repo is structured around five top-level modules:
+- `sim_rlbench/`: RLBench2 / PerAct2 wrappers, dataset hooks, camera setup, and benchmark evaluation helpers.
+- `sim_reveal/`: reveal-proxy environments, scripted teachers, and privileged label extraction.
+- `models/`: shared backbone wrappers, multi-view fusion, bimanual decoder, reveal-state head, world model, and planner.
+- `train/`: trainers, losses, checkpointing, and Hydra/YAML configs.
+- `eval/`: benchmark scripts, ablations, metrics, plots, and report generation.
+Current bootstrap priorities:
+1. Reproduce the RLBench2 / PerAct2 stack with a fixed 3-camera interface.
+2. Stand up a backbone-only 3-camera policy in the same training/eval harness.
+3. Add reveal-state supervision and short-horizon planning for synthetic reveal proxies.
+Upstream dependencies are kept in `/workspace/third_party` and pinned in `docs/upstream_pins.md`.
+## RLBench env A
+The RLBench / PerAct2 stack is pinned to Python 3.10 and lives in `/workspace/envs/rlbench`.
+Bring it up with:
+```bash
+/workspace/reveal_vla_bimanual/scripts/setup_env_a_rlbench.sh
+/workspace/reveal_vla_bimanual/scripts/setup_rlbench_headless_x.sh
+/workspace/reveal_vla_bimanual/scripts/start_rlbench_x.sh
+```
+Verify GPU GL on the headless display:
+```bash
+DISPLAY=:99 glxinfo -B
+```
+Run the RLBench launch/reset/step smoke test:
+```bash
+env \
+  DISPLAY=:99 \
+  XDG_RUNTIME_DIR=/tmp/runtime-root \
+  COPPELIASIM_ROOT=/workspace/assets/coppeliasim_v4_1_0 \
+  LD_LIBRARY_PATH=/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu:/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu/nvidia:/workspace/assets/coppeliasim_v4_1_0 \
+  QT_QPA_PLATFORM_PLUGIN_PATH=/workspace/assets/coppeliasim_v4_1_0 \
+  /workspace/.tools/micromamba/bin/micromamba run \
+    -r /workspace/.micromamba \
+    -p /workspace/envs/rlbench \
+    python -m sim_rlbench.launch_smoke --headless
+```
+The working benchmark interface is fixed to three cameras only:
+- `front`
+- `wrist_left`
+- `wrist_right`
+The smoke test covers launch, bimanual task reset, canonical observation extraction, and one bimanual action step in `headless=True`, which is the same mode used by the upstream PerAct2-style training stack.
+Generate the PerAct2-compatible train command for the fixed 3-camera interface with:
+```bash
+micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
+  python -m sim_rlbench.smoke_test --print-train-command
+```
+Download the published PerAct2 demos into `/workspace/data/rlbench2` with checksum verification:
+```bash
+micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
+  python -m sim_rlbench.dataset_download --resolution 256 --splits train
+```
+If you want the archives unpacked directly into the demo root expected by RLBench, add `--extract`:
+```bash
+apt-get install -y squashfs-tools
+micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
+  python -m sim_rlbench.dataset_download --resolution 256 --splits train --extract
+```

code/reveal_vla_bimanual/docs/upstream_pins.md ADDED Viewed

	@@ -0,0 +1,24 @@

+# Upstream Pins
+Pinned on 2026-03-22 in `/workspace/third_party`.
+Mandatory benchmark stack:
+- `peract_bimanual`: `bb0232a6ba3fe116566e9568f0c7af980ed6703d`
+- `RLBench`: `8af748c51287989294e00c9c670e3330a0e35ed5`
+- `PyRep`: `b8bd1d7a3182adcd570d001649c0849047ebf197`
+- `YARR`: `6822ff78602c77878b27d4cfe759ce029c67bffb`
+Optional published baseline:
+- `AnyBimanual`: `76024e48b0e9489101459e85bc909c126ec581b4`
+Reveal-proxy stack candidate:
+- `IsaacLab`: `v2.3.1` was cloned for inspection, but it targets Python 3.11 and Isaac Sim 5.x.
+- For the frozen project scope of Python 3.10 on Ubuntu 22.04, env B should stay on an Isaac Sim 4.5-compatible Isaac Lab release instead of the latest branch.
+Notes:
+- `peract_bimanual` defaults to 6 cameras and older Python/Torch pins. This repo overrides camera selection and environment creation rather than running the upstream install scripts unchanged.
+- RLBench headless execution on this RunPod host will require an X server setup because the base image does not currently ship `X`, `xvfb`, or `nvidia-xconfig`.

code/reveal_vla_bimanual/docs/xorg.rtx6000.conf ADDED Viewed

	@@ -0,0 +1,33 @@

+Section "ServerLayout"
+    Identifier     "Layout0"
+    Screen      0  "Screen0"
+EndSection
+Section "Monitor"
+    Identifier     "Monitor0"
+    VendorName     "Unknown"
+    ModelName      "Unknown"
+    Option         "DPMS"
+EndSection
+Section "Device"
+    Identifier     "Device0"
+    Driver         "nvidia"
+    VendorName     "NVIDIA Corporation"
+    BusID          "PCI:65:0:0"
+    Option         "AllowEmptyInitialConfiguration" "True"
+    Option         "UseDisplayDevice" "None"
+    Option         "ProbeAllGpus" "False"
+EndSection
+Section "Screen"
+    Identifier     "Screen0"
+    Device         "Device0"
+    Monitor        "Monitor0"
+    DefaultDepth   24
+    Option         "AllowEmptyInitialConfiguration" "True"
+    SubSection     "Display"
+        Depth      24
+        Virtual    1280 1024
+    EndSubSection
+EndSection

code/reveal_vla_bimanual/envs/reveal310.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+name: reveal310
+channels:
+  - pytorch
+  - nvidia
+  - conda-forge
+dependencies:
+  - python=3.10
+  - pip
+  - git
+  - cmake
+  - ninja
+  - make
+  - gxx_linux-64
+  - pkg-config
+  - numpy=1.26.*
+  - pandas=2.2.*
+  - scipy=1.13.*
+  - matplotlib=3.8.*
+  - pyyaml=6.*
+  - imageio
+  - trimesh
+  - networkx
+  - psutil
+  - tqdm
+  - pytorch=2.3.1
+  - torchvision=0.18.1
+  - torchaudio=2.3.1
+  - pytorch-cuda=12.1
+  - pip:
+      - accelerate==0.31.0
+      - einops==0.8.0
+      - hydra-core==1.3.2
+      - omegaconf==2.3.0
+      - safetensors==0.4.3
+      - tensorboard==2.16.2
+      - timm==1.0.7
+      - transformers==4.41.2
+      - wandb==0.18.0

code/reveal_vla_bimanual/envs/rlbench310.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+name: rlbench310
+channels:
+  - pytorch
+  - nvidia
+  - conda-forge
+dependencies:
+  - python=3.10
+  - pip
+  - git
+  - cmake
+  - cffi
+  - ninja
+  - make
+  - gxx_linux-64
+  - pkg-config
+  - numpy=1.26.*
+  - pandas=2.2.*
+  - scipy=1.13.*
+  - matplotlib=3.8.*
+  - pyyaml=6.*
+  - h5py
+  - imageio
+  - pillow
+  - psutil
+  - tqdm
+  - trimesh
+  - pytorch=2.3.1
+  - torchvision=0.18.1
+  - torchaudio=2.3.1
+  - pytorch-cuda=12.1
+  - pip:
+      - accelerate==0.31.0
+      - absl-py==2.1.0
+      - clip @ git+https://github.com/openai/CLIP.git
+      - einops==0.8.0
+      - ftfy==6.2.0
+      - gym==0.26.2
+      - hydra-core==1.3.2
+      - natsort==8.4.0
+      - omegaconf==2.3.0
+      - perceiver-pytorch==0.8.8
+      - pyrender==0.1.45
+      - pytorch-lamb==1.0.0
+      - regex==2024.5.15
+      - rich==13.9.4
+      - rich-click==1.8.9
+      - safetensors==0.4.3
+      - tensorboard==2.16.2
+      - transformers==4.41.2
+      - wandb==0.18.0

code/reveal_vla_bimanual/eval/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from eval.metrics import BenchmarkMetrics
2	+
3	+ __all__ = ["BenchmarkMetrics"]

code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (215 Bytes). View file

code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (249 Bytes). View file

code/reveal_vla_bimanual/eval/__pycache__/ablations.cpython-310.pyc ADDED Viewed

Binary file (344 Bytes). View file

code/reveal_vla_bimanual/eval/__pycache__/ablations.cpython-311.pyc ADDED Viewed

Binary file (408 Bytes). View file

code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-310.pyc ADDED Viewed

Binary file (2.03 kB). View file

code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-311.pyc ADDED Viewed

Binary file (3.58 kB). View file

code/reveal_vla_bimanual/eval/__pycache__/report.cpython-310.pyc ADDED Viewed

Binary file (1.71 kB). View file

code/reveal_vla_bimanual/eval/__pycache__/report.cpython-311.pyc ADDED Viewed

Binary file (3.29 kB). View file

code/reveal_vla_bimanual/eval/__pycache__/run_ablations.cpython-310.pyc ADDED Viewed

Binary file (2.12 kB). View file

code/reveal_vla_bimanual/eval/__pycache__/run_ablations.cpython-311.pyc ADDED Viewed

Binary file (3.77 kB). View file

code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-310.pyc ADDED Viewed

Binary file (7.33 kB). View file

code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-311.pyc ADDED Viewed

Binary file (14.2 kB). View file

code/reveal_vla_bimanual/eval/__pycache__/run_rlbench_rollout_eval.cpython-310.pyc ADDED Viewed

Binary file (5.96 kB). View file

code/reveal_vla_bimanual/eval/ablations.py ADDED Viewed

	@@ -0,0 +1,8 @@

+MANDATORY_ABLATIONS: tuple[str, ...] = (
+    "no_reveal_state_head",
+    "no_world_model",
+    "no_planner_reranking",
+    "no_support_mode_conditioning",
+    "no_wrist_cameras",
+    "no_global_camera",
+)

code/reveal_vla_bimanual/eval/metrics.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+@dataclass
+class BenchmarkMetrics:
+    per_task_success: dict[str, float]
+    mean_success: float
+    visibility_integral: float | None = None
+    corridor_availability: float | None = None
+    reocclusion_rate: float | None = None
+    persistence_horizon_mae: float | None = None
+    disturbance_cost: float | None = None
+def mean_success(per_task_success: dict[str, float]) -> float:
+    if not per_task_success:
+        return 0.0
+    return float(np.mean(list(per_task_success.values())))
+def visibility_integral(curve: np.ndarray) -> float:
+    curve = np.asarray(curve, dtype=np.float32)
+    return float(curve.sum())
+def corridor_availability(corridor_open: np.ndarray) -> float:
+    corridor_open = np.asarray(corridor_open, dtype=np.float32)
+    return float(corridor_open.mean())
+def reocclusion_rate(corridor_open: np.ndarray) -> float:
+    corridor_open = np.asarray(corridor_open, dtype=np.float32)
+    if corridor_open.size < 2:
+        return 0.0
+    return float(np.logical_and(corridor_open[:-1] > 0.5, corridor_open[1:] <= 0.5).mean())
+def persistence_horizon_mae(prediction: np.ndarray, target: np.ndarray) -> float:
+    prediction = np.asarray(prediction, dtype=np.float32)
+    target = np.asarray(target, dtype=np.float32)
+    return float(np.abs(prediction - target).mean())
+def mean_disturbance_cost(values: np.ndarray) -> float:
+    values = np.asarray(values, dtype=np.float32)
+    if values.size == 0:
+        return 0.0
+    return float(values.mean())

code/reveal_vla_bimanual/eval/report.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from __future__ import annotations
+from pathlib import Path
+from eval.metrics import BenchmarkMetrics
+def write_markdown_report(
+    output_path: Path,
+    title: str,
+    metrics: BenchmarkMetrics,
+    hardware: str,
+    training_settings: dict[str, str],
+    published_reference: dict[str, float] | None = None,
+) -> None:
+    lines = [f"# {title}", "", f"- Hardware: {hardware}"]
+    for key, value in training_settings.items():
+        lines.append(f"- {key}: {value}")
+    lines.extend(["", "## Success"])
+    for task, score in metrics.per_task_success.items():
+        lines.append(f"- {task}: {score:.3f}")
+    lines.append(f"- mean_success: {metrics.mean_success:.3f}")
+    if published_reference:
+        lines.extend(["", "## Published Reference"])
+        for task, score in published_reference.items():
+            lines.append(f"- {task}: {score:.3f}")
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text("\n".join(lines), encoding="utf-8")
+def write_comparison_report(
+    output_path: Path,
+    title: str,
+    sections: dict[str, dict[str, float | str]],
+) -> None:
+    lines = [f"# {title}", ""]
+    for section_name, values in sections.items():
+        lines.append(f"## {section_name}")
+        for key, value in values.items():
+            if isinstance(value, float):
+                lines.append(f"- {key}: {value:.3f}")
+            else:
+                lines.append(f"- {key}: {value}")
+        lines.append("")
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")

code/reveal_vla_bimanual/eval/run_ablations.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from __future__ import annotations
+import argparse
+import json
+from pathlib import Path
+from eval.ablations import MANDATORY_ABLATIONS
+from eval.report import write_comparison_report
+from eval.run_reveal_benchmark import evaluate_model, load_model
+from sim_reveal import available_proxy_names
+import torch
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--checkpoint", required=True)
+    parser.add_argument("--episodes", type=int, default=24)
+    parser.add_argument("--resolution", type=int, default=None)
+    parser.add_argument("--output-root", default="/workspace/reports/reveal_ablation")
+    parser.add_argument("--proxies", nargs="*", default=None)
+    args = parser.parse_args()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model, checkpoint = load_model(args.checkpoint, device=device)
+    resolution = int(args.resolution or checkpoint.get("data_resolution", 96))
+    proxies = list(args.proxies or available_proxy_names())
+    output_root = Path(args.output_root)
+    output_root.mkdir(parents=True, exist_ok=True)
+    sections = {}
+    raw = {}
+    for ablation in (None, *MANDATORY_ABLATIONS):
+        label = "full_model" if ablation is None else ablation
+        metrics = evaluate_model(
+            model=model,
+            device=device,
+            proxies=proxies,
+            episodes=args.episodes,
+            resolution=resolution,
+            ablation=ablation,
+        )
+        raw[label] = {
+            "per_task_success": metrics.per_task_success,
+            "mean_success": metrics.mean_success,
+            "visibility_integral": metrics.visibility_integral,
+            "corridor_availability": metrics.corridor_availability,
+            "reocclusion_rate": metrics.reocclusion_rate,
+            "persistence_horizon_mae": metrics.persistence_horizon_mae,
+            "disturbance_cost": metrics.disturbance_cost,
+        }
+        sections[label] = {
+            "mean_success": metrics.mean_success,
+            "visibility_integral": metrics.visibility_integral or 0.0,
+            "corridor_availability": metrics.corridor_availability or 0.0,
+            "reocclusion_rate": metrics.reocclusion_rate or 0.0,
+            "persistence_horizon_mae": metrics.persistence_horizon_mae or 0.0,
+            "disturbance_cost": metrics.disturbance_cost or 0.0,
+        }
+    json_path = output_root / "ablations.json"
+    json_path.write_text(json.dumps(raw, indent=2), encoding="utf-8")
+    write_comparison_report(output_root / "ablations.md", "Reveal Ablations", sections)
+    print(json.dumps({"output_json": str(json_path), "sections": sections}, indent=2))
+if __name__ == "__main__":
+    main()

code/reveal_vla_bimanual/eval/run_reveal_benchmark.py ADDED Viewed

	@@ -0,0 +1,231 @@

+from __future__ import annotations
+import argparse
+import json
+from dataclasses import asdict
+from pathlib import Path
+from typing import Any
+import numpy as np
+import torch
+from torch import Tensor
+from eval.metrics import (
+    BenchmarkMetrics,
+    corridor_availability,
+    mean_disturbance_cost,
+    mean_success,
+    persistence_horizon_mae,
+    reocclusion_rate,
+    visibility_integral,
+)
+from eval.report import write_comparison_report
+from models.action_decoder import ChunkDecoderConfig
+from models.backbones import FrozenVLBackboneConfig
+from models.multiview_fusion import MultiViewFusionConfig
+from models.planner import PlannerConfig
+from models.policy import PolicyConfig
+from models.reveal_head import RevealHeadConfig
+from models.world_model import RevealWMConfig
+from sim_reveal import available_proxy_names, make_proxy_env
+from train.trainer import TrainerConfig, build_policy
+def _policy_config_from_dict(cfg: dict[str, Any]) -> PolicyConfig:
+    return PolicyConfig(
+        backbone=FrozenVLBackboneConfig(**cfg["backbone"]),
+        fusion=MultiViewFusionConfig(**cfg["fusion"]),
+        decoder=ChunkDecoderConfig(**cfg["decoder"]),
+        reveal_head=RevealHeadConfig(**cfg["reveal_head"]),
+        world_model=RevealWMConfig(**cfg["world_model"]),
+        planner=PlannerConfig(**cfg["planner"]),
+    )
+def _trainer_config_from_dict(cfg: dict[str, Any]) -> TrainerConfig:
+    return TrainerConfig(**cfg)
+def load_model(checkpoint_path: str | Path, device: torch.device) -> tuple[torch.nn.Module, dict[str, Any]]:
+    checkpoint = torch.load(Path(checkpoint_path), map_location="cpu")
+    policy_config = _policy_config_from_dict(checkpoint["policy_config"])
+    trainer_config = _trainer_config_from_dict(checkpoint["trainer_config"])
+    model = build_policy(policy_config, trainer_config).to(device)
+    model.load_state_dict(checkpoint["state_dict"])
+    model.eval()
+    return model, checkpoint
+def _prepare_batch(observation: dict[str, Any], device: torch.device) -> dict[str, Any]:
+    images = torch.from_numpy(observation["images"]).permute(0, 3, 1, 2).unsqueeze(0).float() / 255.0
+    proprio = torch.from_numpy(observation["proprio"]).unsqueeze(0).float()
+    return {
+        "images": images.to(device),
+        "proprio": proprio.to(device),
+        "texts": [observation["text"]],
+    }
+def _apply_camera_ablation(images: Tensor, ablation: str | None) -> Tensor:
+    images = images.clone()
+    if ablation == "no_wrist_cameras":
+        images[:, 1:] = 0.0
+    if ablation == "no_global_camera":
+        images[:, 0] = 0.0
+    return images
+def select_chunk(
+    model: torch.nn.Module,
+    batch: dict[str, Any],
+    ablation: str | None = None,
+) -> tuple[Tensor, dict[str, Tensor]]:
+    images = _apply_camera_ablation(batch["images"], ablation)
+    forward_kwargs = {
+        "images": images,
+        "proprio": batch["proprio"],
+        "texts": batch["texts"],
+    }
+    if hasattr(model, "reveal_head"):
+        if ablation == "no_world_model":
+            outputs = model(**forward_kwargs, plan=False)
+            return outputs["action_mean"], outputs
+        outputs = model(
+            **forward_kwargs,
+            plan=True,
+            support_mode_conditioning=(ablation != "no_support_mode_conditioning"),
+        )
+        if ablation == "no_planner_reranking":
+            return outputs["candidate_chunks"][:, 0], outputs
+        if "planned_chunk" in outputs and ablation != "no_reveal_state_head":
+            return outputs["planned_chunk"], outputs
+        return outputs["action_mean"], outputs
+    outputs = model(**forward_kwargs)
+    return outputs["action_mean"], outputs
+def evaluate_model(
+    model: torch.nn.Module,
+    device: torch.device,
+    proxies: list[str],
+    episodes: int,
+    resolution: int,
+    ablation: str | None = None,
+) -> BenchmarkMetrics:
+    per_task_success: dict[str, float] = {}
+    visibility_scores = []
+    corridor_scores = []
+    reocclusion_scores = []
+    persistence_errors = []
+    disturbance_scores = []
+    for proxy_offset, proxy_name in enumerate(proxies):
+        successes = []
+        for episode_idx in range(episodes):
+            env = make_proxy_env(
+                proxy_name=proxy_name,
+                resolution=resolution,
+                seed=proxy_offset * 10_000 + episode_idx,
+            )
+            observation, privileged_state = env.reset(seed=proxy_offset * 10_000 + episode_idx)
+            episode_visibility = [float(privileged_state["visibility"])]
+            episode_corridor = [float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any())]
+            episode_disturbance = [float(privileged_state["disturbance_cost"])]
+            done = False
+            while not done:
+                batch = _prepare_batch(observation, device=device)
+                with torch.no_grad():
+                    chunk, outputs = select_chunk(model, batch, ablation=ablation)
+                action = chunk[0, 0].detach().cpu().numpy()
+                observation, _, terminated, truncated, privileged_state = env.step(action)
+                episode_visibility.append(float(privileged_state["visibility"]))
+                episode_corridor.append(float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any()))
+                episode_disturbance.append(float(privileged_state["disturbance_cost"]))
+                if "reveal_state" in outputs and ablation != "no_reveal_state_head":
+                    persistence_errors.append(
+                        persistence_horizon_mae(
+                            outputs["reveal_state"]["persistence_horizon"][0].detach().cpu().numpy(),
+                            privileged_state["persistence_horizon"],
+                        )
+                    )
+                done = bool(terminated or truncated)
+            successes.append(float(privileged_state["retrieval_success"]))
+            visibility_scores.append(visibility_integral(np.asarray(episode_visibility)))
+            corridor_scores.append(corridor_availability(np.asarray(episode_corridor)))
+            reocclusion_scores.append(reocclusion_rate(np.asarray(episode_corridor)))
+            disturbance_scores.append(mean_disturbance_cost(np.asarray(episode_disturbance)))
+        per_task_success[proxy_name] = float(np.mean(successes))
+    return BenchmarkMetrics(
+        per_task_success=per_task_success,
+        mean_success=mean_success(per_task_success),
+        visibility_integral=float(np.mean(visibility_scores)) if visibility_scores else None,
+        corridor_availability=float(np.mean(corridor_scores)) if corridor_scores else None,
+        reocclusion_rate=float(np.mean(reocclusion_scores)) if reocclusion_scores else None,
+        persistence_horizon_mae=float(np.mean(persistence_errors)) if persistence_errors else None,
+        disturbance_cost=float(np.mean(disturbance_scores)) if disturbance_scores else None,
+    )
+def _metrics_to_dict(metrics: BenchmarkMetrics) -> dict[str, float | dict[str, float]]:
+    return {
+        "per_task_success": metrics.per_task_success,
+        "mean_success": metrics.mean_success,
+        "visibility_integral": metrics.visibility_integral or 0.0,
+        "corridor_availability": metrics.corridor_availability or 0.0,
+        "reocclusion_rate": metrics.reocclusion_rate or 0.0,
+        "persistence_horizon_mae": metrics.persistence_horizon_mae or 0.0,
+        "disturbance_cost": metrics.disturbance_cost or 0.0,
+    }
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", action="append", required=True, help="label=/abs/path/checkpoint.pt")
+    parser.add_argument("--episodes", type=int, default=24)
+    parser.add_argument("--resolution", type=int, default=None)
+    parser.add_argument("--ablation", default=None)
+    parser.add_argument("--output-root", default="/workspace/reports/reveal_eval")
+    parser.add_argument("--proxies", nargs="*", default=None)
+    args = parser.parse_args()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    proxies = list(args.proxies or available_proxy_names())
+    output_root = Path(args.output_root)
+    output_root.mkdir(parents=True, exist_ok=True)
+    sections: dict[str, dict[str, float | str]] = {}
+    raw_metrics: dict[str, dict[str, float | dict[str, float]]] = {}
+    for item in args.model:
+        label, checkpoint_path = item.split("=", maxsplit=1)
+        model, checkpoint = load_model(checkpoint_path, device=device)
+        resolution = int(args.resolution or checkpoint.get("data_resolution", 96))
+        metrics = evaluate_model(
+            model=model,
+            device=device,
+            proxies=proxies,
+            episodes=args.episodes,
+            resolution=resolution,
+            ablation=args.ablation,
+        )
+        raw_metrics[label] = _metrics_to_dict(metrics)
+        sections[label] = {
+            "checkpoint": checkpoint_path,
+            "mean_success": metrics.mean_success,
+            "visibility_integral": metrics.visibility_integral or 0.0,
+            "corridor_availability": metrics.corridor_availability or 0.0,
+            "reocclusion_rate": metrics.reocclusion_rate or 0.0,
+            "persistence_horizon_mae": metrics.persistence_horizon_mae or 0.0,
+            "disturbance_cost": metrics.disturbance_cost or 0.0,
+        }
+        for task_name, score in metrics.per_task_success.items():
+            sections[label][f"{task_name}_success"] = score
+    json_path = output_root / "reveal_benchmark.json"
+    json_path.write_text(json.dumps(raw_metrics, indent=2), encoding="utf-8")
+    write_comparison_report(output_root / "reveal_benchmark.md", "Reveal Proxy Benchmark", sections)
+    print(json.dumps({"output_json": str(json_path), "sections": sections}, indent=2))
+if __name__ == "__main__":
+    main()