lsnu commited on Mar 23

Commit

c6cdf9b

verified ·

1 Parent(s): 10471c5

Add files using upload-large-folder tool

Browse files

Files changed (43) hide show

artifacts/logs/system/rlbench_launch_smoke.txt +84 -0
artifacts/logs/system/x99.conf +33 -0
artifacts/logs/system/x99.log +16 -0
artifacts/logs/system/x99.pid +1 -0
artifacts/outputs/reveal_runs/proxy_backbone_only/config_resolved.yaml +15 -8
artifacts/outputs/reveal_runs/proxy_backbone_only/metrics.json +80 -32
artifacts/outputs/reveal_runs/proxy_backbone_only_clip/config_resolved.yaml +15 -8
artifacts/outputs/reveal_runs/proxy_backbone_only_clip/metrics.json +40 -16
artifacts/outputs/reveal_runs/proxy_reveal_state/config_resolved.yaml +18 -9
artifacts/outputs/reveal_runs/proxy_reveal_state/metrics.json +208 -128
artifacts/outputs/reveal_runs/proxy_reveal_state_clip/config_resolved.yaml +18 -9
artifacts/outputs/reveal_runs/reveal_ablation_v4/ablations.json +93 -0
artifacts/outputs/reveal_runs/reveal_ablation_v4/ablations.md +57 -0
artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.json +93 -0
artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.md +57 -0
artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.partial.json +156 -0
artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep4/ablations.json +93 -0
artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep4/ablations.md +57 -0
artifacts/outputs/reveal_runs/reveal_ablation_v4_det/ablations.json +93 -0
artifacts/outputs/reveal_runs/reveal_ablation_v4_det/ablations.md +57 -0
artifacts/outputs/smoke/proxy_backbone_only_smoke/config_resolved.yaml +98 -0
artifacts/outputs/smoke/proxy_backbone_only_smoke/metrics.json +40 -0
artifacts/outputs/smoke/proxy_reveal_state_smoke/config_resolved.yaml +98 -0
artifacts/outputs/smoke/proxy_reveal_state_smoke/metrics.json +68 -0
artifacts/outputs/smoke/reveal_ablation_ep2/ablations.json +93 -0
artifacts/outputs/smoke/reveal_ablation_ep2/ablations.md +57 -0
artifacts/outputs/smoke/reveal_eval_ep2/reveal_benchmark.json +28 -0
artifacts/outputs/smoke/reveal_eval_ep2/reveal_benchmark.md +25 -0
code/reveal_vla_bimanual/eval/run_ablations.py +31 -2
code/reveal_vla_bimanual/eval/run_reveal_benchmark.py +42 -2
code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py +59 -5
code/reveal_vla_bimanual/models/action_decoder.py +110 -19
code/reveal_vla_bimanual/models/observation_memory.py +56 -0
code/reveal_vla_bimanual/models/planner.py +60 -32
code/reveal_vla_bimanual/models/policy.py +89 -10
code/reveal_vla_bimanual/models/reveal_head.py +88 -25
code/reveal_vla_bimanual/models/world_model.py +10 -0
code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/PKG-INFO +67 -2
code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/SOURCES.txt +15 -0
code/reveal_vla_bimanual/scripts/start_rlbench_x.sh +42 -1
code/reveal_vla_bimanual/sim_reveal/dataset.py +102 -3
code/reveal_vla_bimanual/sim_reveal/generate_dataset.py +4 -0
code/reveal_vla_bimanual/sim_reveal/procedural_envs.py +189 -59

artifacts/logs/system/rlbench_launch_smoke.txt ADDED Viewed

	@@ -0,0 +1,84 @@

+qt.qpa.xcb: QXcbConnection: XCB error: 148 (Unknown), sequence: 181, resource id: 0, major code: 140 (Unknown), minor code: 20
+WARNING:root:not sure how _robot_shapes are used is used.
+{
+  "display": ":99",
+  "headless": true,
+  "task": "bimanual_lift_ball",
+  "description": "Lift the ball",
+  "rgb_shapes": {
+    "front": [
+      224,
+      224,
+      3
+    ],
+    "wrist_left": [
+      224,
+      224,
+      3
+    ],
+    "wrist_right": [
+      224,
+      224,
+      3
+    ]
+  },
+  "intrinsic_shapes": {
+    "front": [
+      3,
+      3
+    ],
+    "wrist_left": [
+      3,
+      3
+    ],
+    "wrist_right": [
+      3,
+      3
+    ]
+  },
+  "extrinsic_shapes": {
+    "front": [
+      4,
+      4
+    ],
+    "wrist_left": [
+      4,
+      4
+    ],
+    "wrist_right": [
+      4,
+      4
+    ]
+  },
+  "point_cloud_shapes": {
+    "front": [
+      224,
+      224,
+      3
+    ],
+    "wrist_left": [
+      224,
+      224,
+      3
+    ],
+    "wrist_right": [
+      224,
+      224,
+      3
+    ]
+  },
+  "proprio_shape": [
+    6
+  ],
+  "action_shape": [
+    16
+  ],
+  "reward": 0.0,
+  "done": false,
+  "front_rgb_shape_after_step": [
+    224,
+    224,
+    3
+  ]
+}
+[CoppeliaSim:loadinfo]   done.

artifacts/logs/system/x99.conf ADDED Viewed

	@@ -0,0 +1,33 @@

+Section "ServerLayout"
+    Identifier     "Layout0"
+    Screen      0  "Screen0"
+EndSection
+Section "Monitor"
+    Identifier     "Monitor0"
+    VendorName     "Unknown"
+    ModelName      "Unknown"
+    Option         "DPMS"
+EndSection
+Section "Device"
+    Identifier     "Device0"
+    Driver         "nvidia"
+    VendorName     "NVIDIA Corporation"
+    BusID          "PCI:1:0:0"
+    Option         "AllowEmptyInitialConfiguration" "True"
+    Option         "UseDisplayDevice" "None"
+    Option         "ProbeAllGpus" "False"
+EndSection
+Section "Screen"
+    Identifier     "Screen0"
+    Device         "Device0"
+    Monitor        "Monitor0"
+    DefaultDepth   24
+    Option         "AllowEmptyInitialConfiguration" "True"
+    SubSection     "Display"
+        Depth      24
+        Virtual    1280 1024
+    EndSubSection
+EndSection

artifacts/logs/system/x99.log ADDED Viewed

	@@ -0,0 +1,16 @@

+X.Org X Server 1.21.1.4
+X Protocol Version 11, Revision 0
+Current Operating System: Linux c36959bce5da 6.8.0-52-generic #53~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Wed Jan 15 19:18:46 UTC 2 x86_64
+Kernel command line: BOOT_IMAGE=/boot/vmlinuz-6.8.0-52-generic root=UUID=81695786-7953-442a-89df-662fe6d16003 ro systemd.unified_cgroup_hierarchy=false
+xorg-server 2:21.1.4-2ubuntu1.7~22.04.16 (For technical support please see http://www.ubuntu.com/support)
+Current version of pixman: 0.40.0
+	Before reporting problems, check http://wiki.x.org
+	to make sure that you have the latest version.
+Markers: (--) probed, (**) from config file, (==) default setting,
+	(++) from command line, (!!) notice, (II) informational,
+	(WW) warning, (EE) error, (NI) not implemented, (??) unknown.
+(==) Log file: "/var/log/Xorg.99.log", Time: Mon Mar 23 15:30:14 2026
+(++) Using config file: "/workspace/logs/x99.conf"
+(==) Using config directory: "/etc/X11/xorg.conf.d"
+(==) Using system config directory "/usr/share/X11/xorg.conf.d"

artifacts/logs/system/x99.pid ADDED Viewed

	@@ -0,0 +1 @@


1	+ 9762

artifacts/outputs/reveal_runs/proxy_backbone_only/config_resolved.yaml CHANGED Viewed

@@ -10,11 +10,13 @@ data:
   resolution: 96
   train_episodes_per_proxy: 48
   val_episodes_per_proxy: 16
-  train_dataset_path: /workspace/data/reveal_proxy/proxy_train_v2.pt
-  val_dataset_path: /workspace/data/reveal_proxy/proxy_val_v2.pt
   rebuild_dataset: true
   chunk_horizon: 8
   rollout_horizon: 5
   seed: 7
 optim:
   epochs: 8
@@ -45,6 +47,11 @@ policy:
     dropout: 0.1
     proprio_dim: 32
     proprio_tokens: 1
   decoder:
     hidden_dim: 128
     num_heads: 4
@@ -68,13 +75,10 @@ policy:
     num_approach_templates: 32
     rollout_horizon: 5
   planner:
     num_candidates: 8
-    corridor_weight: 1.0
-    persistence_weight: 0.5
-    proposal_weight: 0.5
-    disturbance_weight: 0.75
-    reocclusion_weight: 0.5
-    visibility_weight: 0.25
 loss_weights:
   action: 1.0
   support_mode: 0.1
@@ -83,3 +87,6 @@ loss_weights:
   disturbance: 0.05
   world_model: 0.1
   belief: 0.05

   resolution: 96
   train_episodes_per_proxy: 48
   val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/data/reveal_proxy/proxy_train_v4_noleak_counterfactual.pt
+  val_dataset_path: /workspace/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt
   rebuild_dataset: true
   chunk_horizon: 8
   rollout_horizon: 5
+  history_steps: 2
+  planner_candidates: 4
   seed: 7
 optim:
   epochs: 8
     dropout: 0.1
     proprio_dim: 32
     proprio_tokens: 1
+  memory:
+    hidden_dim: 128
+    history_steps: 2
+    num_layers: 1
+    dropout: 0.1
   decoder:
     hidden_dim: 128
     num_heads: 4
     num_approach_templates: 32
     rollout_horizon: 5
   planner:
+    hidden_dim: 128
     num_candidates: 8
+    action_dim: 14
+    utility_margin: 0.1
 loss_weights:
   action: 1.0
   support_mode: 0.1
   disturbance: 0.05
   world_model: 0.1
   belief: 0.05
+  planner_success: 0.0
+  planner_risk: 0.0
+  planner_ranking: 0.0

artifacts/outputs/reveal_runs/proxy_backbone_only/metrics.json CHANGED Viewed

@@ -2,104 +2,152 @@
   {
     "epoch": 0,
     "train": {
-      "action": 0.06700062464612226,
-      "total": 0.06700062464612226,
       "world_model": 0.0
     },
     "val": {
-      "action": 0.02209080010652542,
-      "total": 0.02209080010652542,
       "world_model": 0.0
     }
   },
   {
     "epoch": 1,
     "train": {
-      "action": 0.02441179845482111,
-      "total": 0.02441179845482111,
       "world_model": 0.0
     },
     "val": {
-      "action": 0.01861108955927193,
-      "total": 0.01861108955927193,
       "world_model": 0.0
     }
   },
   {
     "epoch": 2,
     "train": {
-      "action": 0.020652000947544973,
-      "total": 0.020652000947544973,
       "world_model": 0.0
     },
     "val": {
-      "action": 0.01581601658836007,
-      "total": 0.01581601658836007,
       "world_model": 0.0
     }
   },
   {
     "epoch": 3,
     "train": {
-      "action": 0.01735153196689983,
-      "total": 0.01735153196689983,
       "world_model": 0.0
     },
     "val": {
-      "action": 0.01413003564812243,
-      "total": 0.01413003564812243,
       "world_model": 0.0
     }
   },
   {
     "epoch": 4,
     "train": {
-      "action": 0.015502698409060637,
-      "total": 0.015502698409060637,
       "world_model": 0.0
     },
     "val": {
-      "action": 0.012679400155320764,
-      "total": 0.012679400155320764,
       "world_model": 0.0
     }
   },
   {
     "epoch": 5,
     "train": {
-      "action": 0.015521424783704182,
-      "total": 0.015521424783704182,
       "world_model": 0.0
     },
     "val": {
-      "action": 0.011973066837526858,
-      "total": 0.011973066837526858,
       "world_model": 0.0
     }
   },
   {
     "epoch": 6,
     "train": {
-      "action": 0.014476912096142769,
-      "total": 0.014476912096142769,
       "world_model": 0.0
     },
     "val": {
-      "action": 0.011093099834397435,
-      "total": 0.011093099834397435,
       "world_model": 0.0
     }
   },
   {
     "epoch": 7,
     "train": {
-      "action": 0.012226066280466815,
-      "total": 0.012226066280466815,
       "world_model": 0.0
     },
     "val": {
-      "action": 0.012411019764840603,
-      "total": 0.012411019764840603,
       "world_model": 0.0
     }
   }

   {
     "epoch": 0,
     "train": {
+      "action": 0.07641935829694073,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.07641935829694073,
       "world_model": 0.0
     },
     "val": {
+      "action": 0.023501936811953783,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.023501936811953783,
       "world_model": 0.0
     }
   },
   {
     "epoch": 1,
     "train": {
+      "action": 0.025264446934064228,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.025264446934064228,
       "world_model": 0.0
     },
     "val": {
+      "action": 0.020145865622907877,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.020145865622907877,
       "world_model": 0.0
     }
   },
   {
     "epoch": 2,
     "train": {
+      "action": 0.02363461550946037,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.02363461550946037,
       "world_model": 0.0
     },
     "val": {
+      "action": 0.019843176240101457,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.019843176240101457,
       "world_model": 0.0
     }
   },
   {
     "epoch": 3,
     "train": {
+      "action": 0.022404288329804938,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.022404288329804938,
       "world_model": 0.0
     },
     "val": {
+      "action": 0.02007088577374816,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.02007088577374816,
       "world_model": 0.0
     }
   },
   {
     "epoch": 4,
     "train": {
+      "action": 0.022064159469058115,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.022064159469058115,
       "world_model": 0.0
     },
     "val": {
+      "action": 0.020531073212623596,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.020531073212623596,
       "world_model": 0.0
     }
   },
   {
     "epoch": 5,
     "train": {
+      "action": 0.022056781298791368,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.022056781298791368,
       "world_model": 0.0
     },
     "val": {
+      "action": 0.02022958523593843,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.02022958523593843,
       "world_model": 0.0
     }
   },
   {
     "epoch": 6,
     "train": {
+      "action": 0.02186405410369237,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.02186405410369237,
       "world_model": 0.0
     },
     "val": {
+      "action": 0.02032211748883128,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.02032211748883128,
       "world_model": 0.0
     }
   },
   {
     "epoch": 7,
     "train": {
+      "action": 0.0213407213644435,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.0213407213644435,
       "world_model": 0.0
     },
     "val": {
+      "action": 0.019940752536058426,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.019940752536058426,
       "world_model": 0.0
     }
   }

artifacts/outputs/reveal_runs/proxy_backbone_only_clip/config_resolved.yaml CHANGED Viewed

@@ -10,11 +10,13 @@ data:
   resolution: 224
   train_episodes_per_proxy: 48
   val_episodes_per_proxy: 16
-  train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224.pt
-  val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224.pt
   rebuild_dataset: true
   chunk_horizon: 8
   rollout_horizon: 5
   seed: 7
 optim:
   epochs: 4
@@ -48,6 +50,11 @@ policy:
     dropout: 0.1
     proprio_dim: 32
     proprio_tokens: 1
   decoder:
     hidden_dim: 512
     num_heads: 8
@@ -71,13 +78,10 @@ policy:
     num_approach_templates: 32
     rollout_horizon: 5
   planner:
     num_candidates: 8
-    corridor_weight: 1.0
-    persistence_weight: 0.5
-    proposal_weight: 0.5
-    disturbance_weight: 0.75
-    reocclusion_weight: 0.5
-    visibility_weight: 0.25
 loss_weights:
   action: 1.0
   support_mode: 0.1
@@ -86,3 +90,6 @@ loss_weights:
   disturbance: 0.05
   world_model: 0.1
   belief: 0.05

   resolution: 224
   train_episodes_per_proxy: 48
   val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt
+  val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt
   rebuild_dataset: true
   chunk_horizon: 8
   rollout_horizon: 5
+  history_steps: 2
+  planner_candidates: 4
   seed: 7
 optim:
   epochs: 4
     dropout: 0.1
     proprio_dim: 32
     proprio_tokens: 1
+  memory:
+    hidden_dim: 512
+    history_steps: 2
+    num_layers: 1
+    dropout: 0.1
   decoder:
     hidden_dim: 512
     num_heads: 8
     num_approach_templates: 32
     rollout_horizon: 5
   planner:
+    hidden_dim: 512
     num_candidates: 8
+    action_dim: 14
+    utility_margin: 0.1
 loss_weights:
   action: 1.0
   support_mode: 0.1
   disturbance: 0.05
   world_model: 0.1
   belief: 0.05
+  planner_success: 0.0
+  planner_risk: 0.0
+  planner_ranking: 0.0

artifacts/outputs/reveal_runs/proxy_backbone_only_clip/metrics.json CHANGED Viewed

@@ -2,52 +2,76 @@
   {
     "epoch": 0,
     "train": {
-      "action": 0.14342915779711063,
-      "total": 0.14342915779711063,
       "world_model": 0.0
     },
     "val": {
-      "action": 0.026520084648851364,
-      "total": 0.026520084648851364,
       "world_model": 0.0
     }
   },
   {
     "epoch": 1,
     "train": {
-      "action": 0.01376689436079944,
-      "total": 0.01376689436079944,
       "world_model": 0.0
     },
     "val": {
-      "action": 0.00792281218390498,
-      "total": 0.00792281218390498,
       "world_model": 0.0
     }
   },
   {
     "epoch": 2,
     "train": {
-      "action": 0.009396829446095057,
-      "total": 0.009396829446095057,
       "world_model": 0.0
     },
     "val": {
-      "action": 0.006728713663058385,
-      "total": 0.006728713663058385,
       "world_model": 0.0
     }
   },
   {
     "epoch": 3,
     "train": {
-      "action": 0.007774835790102784,
-      "total": 0.007774835790102784,
       "world_model": 0.0
     },
     "val": {
-      "action": 0.005187951255634073,
-      "total": 0.005187951255634073,
       "world_model": 0.0
     }
   }

   {
     "epoch": 0,
     "train": {
+      "action": 0.22041595953453275,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.22041595953453275,
       "world_model": 0.0
     },
     "val": {
+      "action": 0.030633409138000202,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.030633409138000202,
       "world_model": 0.0
     }
   },
   {
     "epoch": 1,
     "train": {
+      "action": 0.02929408008144944,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.02929408008144944,
       "world_model": 0.0
     },
     "val": {
+      "action": 0.022482769120307194,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.022482769120307194,
       "world_model": 0.0
     }
   },
   {
     "epoch": 2,
     "train": {
+      "action": 0.023234238926106723,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.023234238926106723,
       "world_model": 0.0
     },
     "val": {
+      "action": 0.018214622157670203,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.018214622157670203,
       "world_model": 0.0
     }
   },
   {
     "epoch": 3,
     "train": {
+      "action": 0.017409040848602644,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.017409040848602644,
       "world_model": 0.0
     },
     "val": {
+      "action": 0.010200991117883296,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.010200991117883296,
       "world_model": 0.0
     }
   }

artifacts/outputs/reveal_runs/proxy_reveal_state/config_resolved.yaml CHANGED Viewed

@@ -10,11 +10,13 @@ data:
   resolution: 96
   train_episodes_per_proxy: 48
   val_episodes_per_proxy: 16
-  train_dataset_path: /workspace/data/reveal_proxy/proxy_train_v2.pt
-  val_dataset_path: /workspace/data/reveal_proxy/proxy_val_v2.pt
-  rebuild_dataset: false
   chunk_horizon: 8
   rollout_horizon: 5
   seed: 7
 optim:
   epochs: 8
@@ -45,6 +47,11 @@ policy:
     dropout: 0.1
     proprio_dim: 32
     proprio_tokens: 1
   decoder:
     hidden_dim: 128
     num_heads: 4
@@ -60,6 +67,8 @@ policy:
     num_approach_templates: 32
     rollout_horizon: 5
     belief_map_size: 32
     predict_belief_map: true
   world_model:
     hidden_dim: 128
@@ -68,13 +77,10 @@ policy:
     num_approach_templates: 32
     rollout_horizon: 5
   planner:
     num_candidates: 8
-    corridor_weight: 1.0
-    persistence_weight: 0.65
-    proposal_weight: 0.35
-    disturbance_weight: 0.8
-    reocclusion_weight: 0.6
-    visibility_weight: 0.35
 loss_weights:
   action: 1.0
   support_mode: 0.15
@@ -83,3 +89,6 @@ loss_weights:
   disturbance: 0.1
   world_model: 0.2
   belief: 0.05

   resolution: 96
   train_episodes_per_proxy: 48
   val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/data/reveal_proxy/proxy_train_v4_noleak_counterfactual.pt
+  val_dataset_path: /workspace/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt
+  rebuild_dataset: true
   chunk_horizon: 8
   rollout_horizon: 5
+  history_steps: 2
+  planner_candidates: 4
   seed: 7
 optim:
   epochs: 8
     dropout: 0.1
     proprio_dim: 32
     proprio_tokens: 1
+  memory:
+    hidden_dim: 128
+    history_steps: 2
+    num_layers: 1
+    dropout: 0.1
   decoder:
     hidden_dim: 128
     num_heads: 4
     num_approach_templates: 32
     rollout_horizon: 5
     belief_map_size: 32
+    field_size: 16
+    num_heads: 4
     predict_belief_map: true
   world_model:
     hidden_dim: 128
     num_approach_templates: 32
     rollout_horizon: 5
   planner:
+    hidden_dim: 128
     num_candidates: 8
+    action_dim: 14
+    utility_margin: 0.1
 loss_weights:
   action: 1.0
   support_mode: 0.15
   disturbance: 0.1
   world_model: 0.2
   belief: 0.05
+  planner_success: 0.2
+  planner_risk: 0.1
+  planner_ranking: 0.1

artifacts/outputs/reveal_runs/proxy_reveal_state/metrics.json CHANGED Viewed

@@ -2,185 +2,265 @@
   {
     "epoch": 0,
     "train": {
-      "action": 0.2602546961667637,
-      "belief": 0.4802860766649246,
-      "corridor": 0.6443073948224386,
-      "disturbance": 0.006578955658672688,
-      "persistence": 4.514919241269429,
-      "support_mode": 0.8015391031901041,
-      "total": 2.0875226110219955,
-      "world_model": 5.510057131449382
     },
     "val": {
-      "action": 0.04658499173820019,
-      "belief": 0.280171237885952,
-      "corridor": 0.5032978095114231,
-      "disturbance": 0.003645064221927896,
-      "persistence": 3.8178451359272003,
-      "support_mode": 0.6714280992746353,
-      "total": 1.012940600514412,
-      "world_model": 1.8441212028265
     }
   },
   {
     "epoch": 1,
     "train": {
-      "action": 0.03881739747400085,
-      "belief": 0.18641860752056041,
-      "corridor": 0.3944183625280857,
-      "disturbance": 0.030439561344489146,
-      "persistence": 3.206294293204943,
-      "support_mode": 0.5347911287099123,
-      "total": 0.9082020496328672,
-      "world_model": 1.8864398151636124
     },
     "val": {
-      "action": 0.04213718790560961,
-      "belief": 0.15712551027536392,
-      "corridor": 0.3507457673549652,
-      "disturbance": 0.006276358384639025,
-      "persistence": 1.8078171163797379,
-      "support_mode": 0.10970124043524265,
-      "total": 0.6724201738834381,
-      "world_model": 1.772064983844757
     }
   },
   {
     "epoch": 2,
     "train": {
-      "action": 0.031200370130439598,
-      "belief": 0.13828600694735846,
-      "corridor": 0.31750819956262905,
-      "disturbance": 0.011857866222271696,
-      "persistence": 1.7015922193725903,
-      "support_mode": 0.02674841312303518,
-      "total": 0.6129550884167353,
-      "world_model": 1.6799074759085972
     },
     "val": {
-      "action": 0.019523032009601593,
-      "belief": 0.09429990872740746,
-      "corridor": 0.24884792044758797,
-      "disturbance": 0.0043011417728848755,
-      "persistence": 1.5114311277866364,
-      "support_mode": 0.0060500025865621865,
-      "total": 0.5359727554023266,
-      "world_model": 1.5474220663309097
     }
   },
   {
     "epoch": 3,
     "train": {
-      "action": 0.022356805779660743,
-      "belief": 0.09125891048461199,
-      "corridor": 0.23351835707823435,
-      "disturbance": 0.006718798467773013,
-      "persistence": 1.6300043910741806,
-      "support_mode": 0.004253969304651643,
-      "total": 0.5548354809482893,
-      "world_model": 1.5845081210136414
     },
     "val": {
-      "action": 0.01580847823061049,
-      "belief": 0.09042494650930166,
-      "corridor": 0.22376472875475883,
-      "disturbance": 0.018967560958117247,
-      "persistence": 1.4363956600427628,
-      "support_mode": 0.03418254409916699,
-      "total": 0.5279115326702595,
-      "world_model": 1.5608257874846458
     }
   },
   {
     "epoch": 4,
     "train": {
-      "action": 0.019881066245337326,
-      "belief": 0.08954659259567659,
-      "corridor": 0.21636931287745634,
-      "disturbance": 0.005539724506282558,
-      "persistence": 1.592231921851635,
-      "support_mode": 0.008331454223177085,
-      "total": 0.5372808227936426,
-      "world_model": 1.5431083713968594
     },
     "val": {
-      "action": 0.015133287757635117,
-      "belief": 0.08718204218894243,
-      "corridor": 0.20481965504586697,
-      "disturbance": 0.0031357303814729676,
-      "persistence": 1.3192060887813568,
-      "support_mode": 0.0030863596766721457,
-      "total": 0.47997843474149704,
-      "world_model": 1.4341248571872711
     }
   },
   {
     "epoch": 5,
     "train": {
-      "action": 0.030778280459344387,
-      "belief": 0.09159998937199514,
-      "corridor": 0.21967005419234434,
-      "disturbance": 0.005901901221174437,
-      "persistence": 1.651158797244231,
-      "support_mode": 0.0024410486221313477,
-      "total": 0.5050872204204401,
-      "world_model": 1.2986134762565296
     },
     "val": {
-      "action": 0.03259791061282158,
-      "belief": 0.08867455553263426,
-      "corridor": 0.20528649538755417,
-      "disturbance": 0.0037689711316488683,
-      "persistence": 1.3772646486759186,
-      "support_mode": 0.0007588127191411331,
-      "total": 0.4101765304803848,
-      "world_model": 0.9693519398570061
     }
   },
   {
     "epoch": 6,
     "train": {
-      "action": 0.028416083427146077,
-      "belief": 0.09289384291817744,
-      "corridor": 0.22298985657592615,
-      "disturbance": 0.0031898027373244986,
-      "persistence": 1.2752377291520436,
-      "support_mode": 0.04850278014297752,
-      "total": 0.40898223718007404,
-      "world_model": 0.9810265600681305
     },
     "val": {
-      "action": 0.02159481483977288,
-      "belief": 0.08797950763255358,
-      "corridor": 0.20524934865534306,
-      "disturbance": 0.0015436648827744648,
-      "persistence": 1.286000706255436,
-      "support_mode": 0.0010480962373549119,
-      "total": 0.3605738691985607,
-      "world_model": 0.8230927512049675
     }
   },
   {
     "epoch": 7,
     "train": {
-      "action": 0.021424691736077268,
-      "belief": 0.0899931692207853,
-      "corridor": 0.21607277914881706,
-      "disturbance": 0.0034827212220989168,
-      "persistence": 0.9069182885189851,
-      "support_mode": 0.00435957100125961,
-      "total": 0.3383450036247571,
-      "world_model": 0.8875602881113688
     },
     "val": {
-      "action": 0.017686392879113555,
-      "belief": 0.09035013243556023,
-      "corridor": 0.21036655083298683,
-      "disturbance": 0.004888073919573799,
-      "persistence": 0.5709216743707657,
-      "support_mode": 0.001884725206764415,
-      "total": 0.31777225248515606,
-      "world_model": 0.978156752884388
     }
   }
 ]

   {
     "epoch": 0,
     "train": {
+      "action": 0.06191213273753723,
+      "belief": 0.3969618324190378,
+      "corridor": 0.4305709345887105,
+      "disturbance": 0.03469782391524253,
+      "persistence": 4.175889949003856,
+      "planner_ranking": 0.09875048324465752,
+      "planner_risk": 0.04163226300928121,
+      "planner_success": 0.6267699748277664,
+      "reocclusion": 0.6964956695834795,
+      "support_mode": 0.7304433186848959,
+      "total": 2.203073134024938,
+      "uncertainty": 0.3537220476816098,
+      "world_model": 6.738570133845012
     },
     "val": {
+      "action": 0.024067950202152133,
+      "belief": 0.12672400567680597,
+      "corridor": 0.24239582754671574,
+      "disturbance": 0.002230136582511477,
+      "persistence": 3.987179756164551,
+      "planner_ranking": 0.09912661369889975,
+      "planner_risk": 0.011295226053334773,
+      "planner_success": 0.6335860788822174,
+      "reocclusion": 0.6723387092351913,
+      "support_mode": 0.6936560198664665,
+      "total": 1.2380555346608162,
+      "uncertainty": 0.14553490467369556,
+      "world_model": 2.517606645822525
     }
   },
   {
     "epoch": 1,
     "train": {
+      "action": 0.02929696316520373,
+      "belief": 0.1461242881293098,
+      "corridor": 0.25149450699488324,
+      "disturbance": 0.004778304447730382,
+      "persistence": 4.120666732390721,
+      "planner_ranking": 0.09903711639344692,
+      "planner_risk": 0.0110635906457901,
+      "planner_success": 0.6181311855713526,
+      "reocclusion": 0.6676681761940321,
+      "support_mode": 0.6968543653686842,
+      "total": 1.2383184656500816,
+      "uncertainty": 0.07630281266756356,
+      "world_model": 2.42795492708683
     },
     "val": {
+      "action": 0.023099895333871245,
+      "belief": 0.12967702373862267,
+      "corridor": 0.24076062999665737,
+      "disturbance": 0.002764956690953113,
+      "persistence": 3.8617295920848846,
+      "planner_ranking": 0.09899506811052561,
+      "planner_risk": 0.011750921490602195,
+      "planner_success": 0.6350084543228149,
+      "reocclusion": 0.6680542901158333,
+      "support_mode": 0.6658758223056793,
+      "total": 1.1821558326482773,
+      "uncertainty": 0.03126319474540651,
+      "world_model": 2.3316954374313354
     }
   },
   {
     "epoch": 2,
     "train": {
+      "action": 0.02398723006869356,
+      "belief": 0.1364164650440216,
+      "corridor": 0.24753919864694277,
+      "disturbance": 0.0023582005330050984,
+      "persistence": 4.01392004887263,
+      "planner_ranking": 0.09883626519391935,
+      "planner_risk": 0.010473574026642988,
+      "planner_success": 0.6160491754611334,
+      "reocclusion": 0.6690979475776354,
+      "support_mode": 0.6750953321655592,
+      "total": 1.1870681991179783,
+      "uncertainty": 0.030905649531632662,
+      "world_model": 2.2801418056090674
     },
     "val": {
+      "action": 0.01999341929331422,
+      "belief": 0.12642040569335222,
+      "corridor": 0.2386692836880684,
+      "disturbance": 0.0017178563502966426,
+      "persistence": 3.911038339138031,
+      "planner_ranking": 0.09849496744573116,
+      "planner_risk": 0.010672552860341966,
+      "planner_success": 0.6324039027094841,
+      "reocclusion": 0.6649576723575592,
+      "support_mode": 0.6655856594443321,
+      "total": 1.1648448407649994,
+      "uncertainty": 0.02670970605686307,
+      "world_model": 2.2435964047908783
     }
   },
   {
     "epoch": 3,
     "train": {
+      "action": 0.023305251883963745,
+      "belief": 0.12743763532489538,
+      "corridor": 0.2423833180218935,
+      "disturbance": 0.002423852672412371,
+      "persistence": 4.037976682186127,
+      "planner_ranking": 0.09848632694532473,
+      "planner_risk": 0.010158603176629791,
+      "planner_success": 0.6135045563181242,
+      "reocclusion": 0.670435386399428,
+      "support_mode": 0.6804824098944664,
+      "total": 1.181638777256012,
+      "uncertainty": 0.020501127738195162,
+      "world_model": 2.250967080394427
     },
     "val": {
+      "action": 0.0216117303352803,
+      "belief": 0.11998547799885273,
+      "corridor": 0.23061690665781498,
+      "disturbance": 0.0017021069324982818,
+      "persistence": 3.871658682823181,
+      "planner_ranking": 0.0980530520901084,
+      "planner_risk": 0.010595057916361839,
+      "planner_success": 0.6308894380927086,
+      "reocclusion": 0.663639560341835,
+      "support_mode": 0.6656133309006691,
+      "total": 1.1477141454815865,
+      "uncertainty": 0.018870073137804866,
+      "world_model": 2.181487277150154
     }
   },
   {
     "epoch": 4,
     "train": {
+      "action": 0.022862333881979186,
+      "belief": 0.1157925771549344,
+      "corridor": 0.23798241962989172,
+      "disturbance": 0.0024595247232355177,
+      "persistence": 4.04269211490949,
+      "planner_ranking": 0.0980245132620136,
+      "planner_risk": 0.010186576827739676,
+      "planner_success": 0.6112014849980673,
+      "reocclusion": 0.6680525466799736,
+      "support_mode": 0.6751382003227869,
+      "total": 1.1686089982589085,
+      "uncertainty": 0.01871865172870457,
+      "world_model": 2.1998249938090644
     },
     "val": {
+      "action": 0.020634466782212257,
+      "belief": 0.11527534108608961,
+      "corridor": 0.23497656919062138,
+      "disturbance": 0.002371684633544646,
+      "persistence": 3.8176176249980927,
+      "planner_ranking": 0.09711439348757267,
+      "planner_risk": 0.010582514689303935,
+      "planner_success": 0.6349476724863052,
+      "reocclusion": 0.6623468473553658,
+      "support_mode": 0.6932553052902222,
+      "total": 1.1341337114572525,
+      "uncertainty": 0.024459586245939136,
+      "world_model": 2.11751089990139
     }
   },
   {
     "epoch": 5,
     "train": {
+      "action": 0.024459178171431024,
+      "belief": 0.14042565568039814,
+      "corridor": 0.25979805178940296,
+      "disturbance": 0.010113566526949095,
+      "persistence": 4.210421055555344,
+      "planner_ranking": 0.0971421217545867,
+      "planner_risk": 0.01027063278403754,
+      "planner_success": 0.6094371701280276,
+      "reocclusion": 0.6682968338330587,
+      "support_mode": 0.6893241529663404,
+      "total": 1.1891141335169475,
+      "uncertainty": 0.05712907208362594,
+      "world_model": 2.1682801693677902
     },
     "val": {
+      "action": 0.022404357325285673,
+      "belief": 0.1317315762862563,
+      "corridor": 0.248648414388299,
+      "disturbance": 0.001959386427188292,
+      "persistence": 3.8251605927944183,
+      "planner_ranking": 0.09636734332889318,
+      "planner_risk": 0.010734643263276666,
+      "planner_success": 0.6269454136490822,
+      "reocclusion": 0.6585175022482872,
+      "support_mode": 0.687481202185154,
+      "total": 1.129515826702118,
+      "uncertainty": 0.022852399852126837,
+      "world_model": 2.0773144513368607
     }
   },
   {
     "epoch": 6,
     "train": {
+      "action": 0.023649626101056736,
+      "belief": 0.10499086945007245,
+      "corridor": 0.23988350170354047,
+      "disturbance": 0.0025371607140793153,
+      "persistence": 4.0185394287109375,
+      "planner_ranking": 0.09640810824930668,
+      "planner_risk": 0.010166237130761147,
+      "planner_success": 0.6055587517718474,
+      "reocclusion": 0.6686983207861582,
+      "support_mode": 0.6687941774725914,
+      "total": 1.1488103543718655,
+      "uncertainty": 0.011886686998574683,
+      "world_model": 2.1212283273537955
     },
     "val": {
+      "action": 0.02155012753792107,
+      "belief": 0.10505348909646273,
+      "corridor": 0.22914408333599567,
+      "disturbance": 0.0021536786225624382,
+      "persistence": 3.736493021249771,
+      "planner_ranking": 0.09544396214187145,
+      "planner_risk": 0.010955312522128224,
+      "planner_success": 0.6418561860918999,
+      "reocclusion": 0.6577628254890442,
+      "support_mode": 0.665456235408783,
+      "total": 1.0746963024139404,
+      "uncertainty": 0.013590520713478327,
+      "world_model": 1.8803961426019669
     }
   },
   {
     "epoch": 7,
     "train": {
+      "action": 0.024166353822996218,
+      "belief": 0.1225533156345288,
+      "corridor": 0.24451578905185065,
+      "disturbance": 0.0034010016097454354,
+      "persistence": 3.975986421108246,
+      "planner_ranking": 0.09560706652700901,
+      "planner_risk": 0.010631242844586572,
+      "planner_success": 0.6020257460574309,
+      "reocclusion": 0.6639501129587492,
+      "support_mode": 0.6688573931654295,
+      "total": 1.0815023109316826,
+      "uncertainty": 0.01951570008532144,
+      "world_model": 1.7976730863253276
     },
     "val": {
+      "action": 0.020741463406011462,
+      "belief": 0.09398300107568502,
+      "corridor": 0.22893342934548855,
+      "disturbance": 0.0030847050511511043,
+      "persistence": 4.020223438739777,
+      "planner_ranking": 0.09519834443926811,
+      "planner_risk": 0.011243910586927086,
+      "planner_success": 0.630848728120327,
+      "reocclusion": 0.6507743820548058,
+      "support_mode": 0.6755735874176025,
+      "total": 1.0336408764123917,
+      "uncertainty": 0.004630188253941014,
+      "world_model": 1.544354408979416
     }
   }
 ]

artifacts/outputs/reveal_runs/proxy_reveal_state_clip/config_resolved.yaml CHANGED Viewed

@@ -10,11 +10,13 @@ data:
   resolution: 224
   train_episodes_per_proxy: 48
   val_episodes_per_proxy: 16
-  train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224.pt
-  val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224.pt
-  rebuild_dataset: false
   chunk_horizon: 8
   rollout_horizon: 5
   seed: 7
 optim:
   epochs: 4
@@ -48,6 +50,11 @@ policy:
     dropout: 0.1
     proprio_dim: 32
     proprio_tokens: 1
   decoder:
     hidden_dim: 512
     num_heads: 8
@@ -63,6 +70,8 @@ policy:
     num_approach_templates: 32
     rollout_horizon: 5
     belief_map_size: 32
     predict_belief_map: true
   world_model:
     hidden_dim: 512
@@ -71,13 +80,10 @@ policy:
     num_approach_templates: 32
     rollout_horizon: 5
   planner:
     num_candidates: 8
-    corridor_weight: 1.0
-    persistence_weight: 0.65
-    proposal_weight: 0.35
-    disturbance_weight: 0.8
-    reocclusion_weight: 0.6
-    visibility_weight: 0.35
 loss_weights:
   action: 1.0
   support_mode: 0.15
@@ -86,3 +92,6 @@ loss_weights:
   disturbance: 0.1
   world_model: 0.2
   belief: 0.05

   resolution: 224
   train_episodes_per_proxy: 48
   val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt
+  val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt
+  rebuild_dataset: true
   chunk_horizon: 8
   rollout_horizon: 5
+  history_steps: 2
+  planner_candidates: 4
   seed: 7
 optim:
   epochs: 4
     dropout: 0.1
     proprio_dim: 32
     proprio_tokens: 1
+  memory:
+    hidden_dim: 512
+    history_steps: 2
+    num_layers: 1
+    dropout: 0.1
   decoder:
     hidden_dim: 512
     num_heads: 8
     num_approach_templates: 32
     rollout_horizon: 5
     belief_map_size: 32
+    field_size: 16
+    num_heads: 4
     predict_belief_map: true
   world_model:
     hidden_dim: 512
     num_approach_templates: 32
     rollout_horizon: 5
   planner:
+    hidden_dim: 512
     num_candidates: 8
+    action_dim: 14
+    utility_margin: 0.1
 loss_weights:
   action: 1.0
   support_mode: 0.15
   disturbance: 0.1
   world_model: 0.2
   belief: 0.05
+  planner_success: 0.2
+  planner_risk: 0.1
+  planner_ranking: 0.1

artifacts/outputs/reveal_runs/reveal_ablation_v4/ablations.json ADDED Viewed

	@@ -0,0 +1,93 @@

+{
+  "full_model": {
+    "per_task_success": {
+      "foliage_proxy": 0.16666666666666666,
+      "bag_proxy": 0.25,
+      "cloth_proxy": 0.5
+    },
+    "mean_success": 0.3055555555555555,
+    "visibility_integral": 45.800796369711556,
+    "corridor_availability": 0.9180314590533575,
+    "reocclusion_rate": 0.027777777777777776,
+    "persistence_horizon_mae": 2.3963313409379188,
+    "disturbance_cost": 0.7033085679221485
+  },
+  "no_reveal_state_head": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.73463360468546,
+    "corridor_availability": 0.9864540547132492,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": null,
+    "disturbance_cost": 0.8931084167626169
+  },
+  "no_world_model": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.73463360468546,
+    "corridor_availability": 0.9864540547132492,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.419941000816309,
+    "disturbance_cost": 0.8931084167626169
+  },
+  "no_planner_reranking": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.73463360468546,
+    "corridor_availability": 0.9864540547132492,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.419941000816309,
+    "disturbance_cost": 0.8931084167626169
+  },
+  "no_support_mode_conditioning": {
+    "per_task_success": {
+      "foliage_proxy": 0.375,
+      "bag_proxy": 0.25,
+      "cloth_proxy": 0.5416666666666666
+    },
+    "mean_success": 0.38888888888888884,
+    "visibility_integral": 41.20910889903704,
+    "corridor_availability": 0.9009349540703826,
+    "reocclusion_rate": 0.027437974833808165,
+    "persistence_horizon_mae": 2.406442765584018,
+    "disturbance_cost": 0.6425008794499768
+  },
+  "no_wrist_cameras": {
+    "per_task_success": {
+      "foliage_proxy": 0.25,
+      "bag_proxy": 0.2916666666666667,
+      "cloth_proxy": 0.5416666666666666
+    },
+    "mean_success": 0.36111111111111116,
+    "visibility_integral": 42.94617295927472,
+    "corridor_availability": 0.911839393277963,
+    "reocclusion_rate": 0.026319995590828923,
+    "persistence_horizon_mae": 2.5709509358907683,
+    "disturbance_cost": 0.6674723047763109
+  },
+  "no_global_camera": {
+    "per_task_success": {
+      "foliage_proxy": 0.16666666666666666,
+      "bag_proxy": 0.3333333333333333,
+      "cloth_proxy": 0.3333333333333333
+    },
+    "mean_success": 0.27777777777777773,
+    "visibility_integral": 47.384350614415276,
+    "corridor_availability": 0.9166230356527699,
+    "reocclusion_rate": 0.025818452380952383,
+    "persistence_horizon_mae": 2.957454740526246,
+    "disturbance_cost": 0.7210023639102777
+  }
+}

artifacts/outputs/reveal_runs/reveal_ablation_v4/ablations.md ADDED Viewed

	@@ -0,0 +1,57 @@

+# Reveal Ablations
+## full_model
+- mean_success: 0.306
+- visibility_integral: 45.801
+- corridor_availability: 0.918
+- reocclusion_rate: 0.028
+- persistence_horizon_mae: 2.396
+- disturbance_cost: 0.703
+## no_reveal_state_head
+- mean_success: 0.000
+- visibility_integral: 62.735
+- corridor_availability: 0.986
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 0.000
+- disturbance_cost: 0.893
+## no_world_model
+- mean_success: 0.000
+- visibility_integral: 62.735
+- corridor_availability: 0.986
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.420
+- disturbance_cost: 0.893
+## no_planner_reranking
+- mean_success: 0.000
+- visibility_integral: 62.735
+- corridor_availability: 0.986
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.420
+- disturbance_cost: 0.893
+## no_support_mode_conditioning
+- mean_success: 0.389
+- visibility_integral: 41.209
+- corridor_availability: 0.901
+- reocclusion_rate: 0.027
+- persistence_horizon_mae: 2.406
+- disturbance_cost: 0.643
+## no_wrist_cameras
+- mean_success: 0.361
+- visibility_integral: 42.946
+- corridor_availability: 0.912
+- reocclusion_rate: 0.026
+- persistence_horizon_mae: 2.571
+- disturbance_cost: 0.667
+## no_global_camera
+- mean_success: 0.278
+- visibility_integral: 47.384
+- corridor_availability: 0.917
+- reocclusion_rate: 0.026
+- persistence_horizon_mae: 2.957
+- disturbance_cost: 0.721

artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.json ADDED Viewed

	@@ -0,0 +1,93 @@

+{
+  "full_model": {
+    "per_task_success": {
+      "foliage_proxy": 0.3333333333333333,
+      "bag_proxy": 0.3888888888888889,
+      "cloth_proxy": 0.3333333333333333
+    },
+    "mean_success": 0.35185185185185186,
+    "visibility_integral": 41.31481176614761,
+    "corridor_availability": 0.8930041271227377,
+    "reocclusion_rate": 0.002777777777777778,
+    "persistence_horizon_mae": 0.9662547600565393,
+    "disturbance_cost": 0.6302865350411998
+  },
+  "no_reveal_state_head": {
+    "per_task_success": {
+      "foliage_proxy": 0.3333333333333333,
+      "bag_proxy": 0.3888888888888889,
+      "cloth_proxy": 0.2777777777777778
+    },
+    "mean_success": 0.3333333333333333,
+    "visibility_integral": 6.436306021831654,
+    "corridor_availability": 0.33413351644520406,
+    "reocclusion_rate": 0.008333333333333335,
+    "persistence_horizon_mae": null,
+    "disturbance_cost": 0.5424560326393004
+  },
+  "no_world_model": {
+    "per_task_success": {
+      "foliage_proxy": 0.3333333333333333,
+      "bag_proxy": 0.3888888888888889,
+      "cloth_proxy": 0.2777777777777778
+    },
+    "mean_success": 0.3333333333333333,
+    "visibility_integral": 6.436306021831654,
+    "corridor_availability": 0.33413351644520406,
+    "reocclusion_rate": 0.008333333333333335,
+    "persistence_horizon_mae": 4.395576057914128,
+    "disturbance_cost": 0.5424560326393004
+  },
+  "no_planner_reranking": {
+    "per_task_success": {
+      "foliage_proxy": 0.3333333333333333,
+      "bag_proxy": 0.3888888888888889,
+      "cloth_proxy": 0.2777777777777778
+    },
+    "mean_success": 0.3333333333333333,
+    "visibility_integral": 6.436306021831654,
+    "corridor_availability": 0.33413351644520406,
+    "reocclusion_rate": 0.008333333333333335,
+    "persistence_horizon_mae": 4.395576057914128,
+    "disturbance_cost": 0.5424560326393004
+  },
+  "no_support_mode_conditioning": {
+    "per_task_success": {
+      "foliage_proxy": 0.3333333333333333,
+      "bag_proxy": 0.3888888888888889,
+      "cloth_proxy": 0.3333333333333333
+    },
+    "mean_success": 0.35185185185185186,
+    "visibility_integral": 41.31481176614761,
+    "corridor_availability": 0.8930041271227377,
+    "reocclusion_rate": 0.002777777777777778,
+    "persistence_horizon_mae": 0.9662547600565393,
+    "disturbance_cost": 0.6302865350411998
+  },
+  "no_wrist_cameras": {
+    "per_task_success": {
+      "foliage_proxy": 0.3333333333333333,
+      "bag_proxy": 0.3888888888888889,
+      "cloth_proxy": 0.3333333333333333
+    },
+    "mean_success": 0.35185185185185186,
+    "visibility_integral": 41.34216132428911,
+    "corridor_availability": 0.8971193510073202,
+    "reocclusion_rate": 0.0011574074074074073,
+    "persistence_horizon_mae": 0.9659118890357264,
+    "disturbance_cost": 0.6302977896950863
+  },
+  "no_global_camera": {
+    "per_task_success": {
+      "foliage_proxy": 0.3333333333333333,
+      "bag_proxy": 0.3888888888888889,
+      "cloth_proxy": 0.3333333333333333
+    },
+    "mean_success": 0.35185185185185186,
+    "visibility_integral": 41.33038121020353,
+    "corridor_availability": 0.8943758684175985,
+    "reocclusion_rate": 0.002777777777777778,
+    "persistence_horizon_mae": 0.9659084288095618,
+    "disturbance_cost": 0.6303076523321646
+  }
+}

artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.md ADDED Viewed

	@@ -0,0 +1,57 @@

+# Reveal Ablations
+## full_model
+- mean_success: 0.352
+- visibility_integral: 41.315
+- corridor_availability: 0.893
+- reocclusion_rate: 0.003
+- persistence_horizon_mae: 0.966
+- disturbance_cost: 0.630
+## no_reveal_state_head
+- mean_success: 0.333
+- visibility_integral: 6.436
+- corridor_availability: 0.334
+- reocclusion_rate: 0.008
+- persistence_horizon_mae: 0.000
+- disturbance_cost: 0.542
+## no_world_model
+- mean_success: 0.333
+- visibility_integral: 6.436
+- corridor_availability: 0.334
+- reocclusion_rate: 0.008
+- persistence_horizon_mae: 4.396
+- disturbance_cost: 0.542
+## no_planner_reranking
+- mean_success: 0.333
+- visibility_integral: 6.436
+- corridor_availability: 0.334
+- reocclusion_rate: 0.008
+- persistence_horizon_mae: 4.396
+- disturbance_cost: 0.542
+## no_support_mode_conditioning
+- mean_success: 0.352
+- visibility_integral: 41.315
+- corridor_availability: 0.893
+- reocclusion_rate: 0.003
+- persistence_horizon_mae: 0.966
+- disturbance_cost: 0.630
+## no_wrist_cameras
+- mean_success: 0.352
+- visibility_integral: 41.342
+- corridor_availability: 0.897
+- reocclusion_rate: 0.001
+- persistence_horizon_mae: 0.966
+- disturbance_cost: 0.630
+## no_global_camera
+- mean_success: 0.352
+- visibility_integral: 41.330
+- corridor_availability: 0.894
+- reocclusion_rate: 0.003
+- persistence_horizon_mae: 0.966
+- disturbance_cost: 0.630

artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.partial.json ADDED Viewed

	@@ -0,0 +1,156 @@

+{
+  "checkpoint": "/workspace/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt",
+  "episodes": 18,
+  "sections": {
+    "full_model": {
+      "mean_success": 0.35185185185185186,
+      "visibility_integral": 41.31481176614761,
+      "corridor_availability": 0.8930041271227377,
+      "reocclusion_rate": 0.002777777777777778,
+      "persistence_horizon_mae": 0.9662547600565393,
+      "disturbance_cost": 0.6302865350411998
+    },
+    "no_reveal_state_head": {
+      "mean_success": 0.3333333333333333,
+      "visibility_integral": 6.436306021831654,
+      "corridor_availability": 0.33413351644520406,
+      "reocclusion_rate": 0.008333333333333335,
+      "persistence_horizon_mae": 0.0,
+      "disturbance_cost": 0.5424560326393004
+    },
+    "no_world_model": {
+      "mean_success": 0.3333333333333333,
+      "visibility_integral": 6.436306021831654,
+      "corridor_availability": 0.33413351644520406,
+      "reocclusion_rate": 0.008333333333333335,
+      "persistence_horizon_mae": 4.395576057914128,
+      "disturbance_cost": 0.5424560326393004
+    },
+    "no_planner_reranking": {
+      "mean_success": 0.3333333333333333,
+      "visibility_integral": 6.436306021831654,
+      "corridor_availability": 0.33413351644520406,
+      "reocclusion_rate": 0.008333333333333335,
+      "persistence_horizon_mae": 4.395576057914128,
+      "disturbance_cost": 0.5424560326393004
+    },
+    "no_support_mode_conditioning": {
+      "mean_success": 0.35185185185185186,
+      "visibility_integral": 41.31481176614761,
+      "corridor_availability": 0.8930041271227377,
+      "reocclusion_rate": 0.002777777777777778,
+      "persistence_horizon_mae": 0.9662547600565393,
+      "disturbance_cost": 0.6302865350411998
+    },
+    "no_wrist_cameras": {
+      "mean_success": 0.35185185185185186,
+      "visibility_integral": 41.34216132428911,
+      "corridor_availability": 0.8971193510073202,
+      "reocclusion_rate": 0.0011574074074074073,
+      "persistence_horizon_mae": 0.9659118890357264,
+      "disturbance_cost": 0.6302977896950863
+    },
+    "no_global_camera": {
+      "mean_success": 0.35185185185185186,
+      "visibility_integral": 41.33038121020353,
+      "corridor_availability": 0.8943758684175985,
+      "reocclusion_rate": 0.002777777777777778,
+      "persistence_horizon_mae": 0.9659084288095618,
+      "disturbance_cost": 0.6303076523321646
+    }
+  },
+  "raw": {
+    "full_model": {
+      "per_task_success": {
+        "foliage_proxy": 0.3333333333333333,
+        "bag_proxy": 0.3888888888888889,
+        "cloth_proxy": 0.3333333333333333
+      },
+      "mean_success": 0.35185185185185186,
+      "visibility_integral": 41.31481176614761,
+      "corridor_availability": 0.8930041271227377,
+      "reocclusion_rate": 0.002777777777777778,
+      "persistence_horizon_mae": 0.9662547600565393,
+      "disturbance_cost": 0.6302865350411998
+    },
+    "no_reveal_state_head": {
+      "per_task_success": {
+        "foliage_proxy": 0.3333333333333333,
+        "bag_proxy": 0.3888888888888889,
+        "cloth_proxy": 0.2777777777777778
+      },
+      "mean_success": 0.3333333333333333,
+      "visibility_integral": 6.436306021831654,
+      "corridor_availability": 0.33413351644520406,
+      "reocclusion_rate": 0.008333333333333335,
+      "persistence_horizon_mae": null,
+      "disturbance_cost": 0.5424560326393004
+    },
+    "no_world_model": {
+      "per_task_success": {
+        "foliage_proxy": 0.3333333333333333,
+        "bag_proxy": 0.3888888888888889,
+        "cloth_proxy": 0.2777777777777778
+      },
+      "mean_success": 0.3333333333333333,
+      "visibility_integral": 6.436306021831654,
+      "corridor_availability": 0.33413351644520406,
+      "reocclusion_rate": 0.008333333333333335,
+      "persistence_horizon_mae": 4.395576057914128,
+      "disturbance_cost": 0.5424560326393004
+    },
+    "no_planner_reranking": {
+      "per_task_success": {
+        "foliage_proxy": 0.3333333333333333,
+        "bag_proxy": 0.3888888888888889,
+        "cloth_proxy": 0.2777777777777778
+      },
+      "mean_success": 0.3333333333333333,
+      "visibility_integral": 6.436306021831654,
+      "corridor_availability": 0.33413351644520406,
+      "reocclusion_rate": 0.008333333333333335,
+      "persistence_horizon_mae": 4.395576057914128,
+      "disturbance_cost": 0.5424560326393004
+    },
+    "no_support_mode_conditioning": {
+      "per_task_success": {
+        "foliage_proxy": 0.3333333333333333,
+        "bag_proxy": 0.3888888888888889,
+        "cloth_proxy": 0.3333333333333333
+      },
+      "mean_success": 0.35185185185185186,
+      "visibility_integral": 41.31481176614761,
+      "corridor_availability": 0.8930041271227377,
+      "reocclusion_rate": 0.002777777777777778,
+      "persistence_horizon_mae": 0.9662547600565393,
+      "disturbance_cost": 0.6302865350411998
+    },
+    "no_wrist_cameras": {
+      "per_task_success": {
+        "foliage_proxy": 0.3333333333333333,
+        "bag_proxy": 0.3888888888888889,
+        "cloth_proxy": 0.3333333333333333
+      },
+      "mean_success": 0.35185185185185186,
+      "visibility_integral": 41.34216132428911,
+      "corridor_availability": 0.8971193510073202,
+      "reocclusion_rate": 0.0011574074074074073,
+      "persistence_horizon_mae": 0.9659118890357264,
+      "disturbance_cost": 0.6302977896950863
+    },
+    "no_global_camera": {
+      "per_task_success": {
+        "foliage_proxy": 0.3333333333333333,
+        "bag_proxy": 0.3888888888888889,
+        "cloth_proxy": 0.3333333333333333
+      },
+      "mean_success": 0.35185185185185186,
+      "visibility_integral": 41.33038121020353,
+      "corridor_availability": 0.8943758684175985,
+      "reocclusion_rate": 0.002777777777777778,
+      "persistence_horizon_mae": 0.9659084288095618,
+      "disturbance_cost": 0.6303076523321646
+    }
+  },
+  "elapsed_seconds": 4835.7074475847185
+}

artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep4/ablations.json ADDED Viewed

	@@ -0,0 +1,93 @@

+{
+  "full_model": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.5,
+      "cloth_proxy": 0.5
+    },
+    "mean_success": 0.3333333333333333,
+    "visibility_integral": 42.51341059803963,
+    "corridor_availability": 0.9005658576885859,
+    "reocclusion_rate": 0.004166666666666667,
+    "persistence_horizon_mae": 1.121226767712281,
+    "disturbance_cost": 0.6488037866850694
+  },
+  "no_reveal_state_head": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.5,
+      "cloth_proxy": 0.25
+    },
+    "mean_success": 0.25,
+    "visibility_integral": 7.045467118422191,
+    "corridor_availability": 0.29356995907922584,
+    "reocclusion_rate": 0.009375,
+    "persistence_horizon_mae": null,
+    "disturbance_cost": 0.6037605715294679
+  },
+  "no_world_model": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.5,
+      "cloth_proxy": 0.25
+    },
+    "mean_success": 0.25,
+    "visibility_integral": 7.045467118422191,
+    "corridor_availability": 0.29356995907922584,
+    "reocclusion_rate": 0.009375,
+    "persistence_horizon_mae": 4.413126634701692,
+    "disturbance_cost": 0.6037605715294679
+  },
+  "no_planner_reranking": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.5,
+      "cloth_proxy": 0.25
+    },
+    "mean_success": 0.25,
+    "visibility_integral": 7.045467118422191,
+    "corridor_availability": 0.29356995907922584,
+    "reocclusion_rate": 0.009375,
+    "persistence_horizon_mae": 4.413126634701692,
+    "disturbance_cost": 0.6037605715294679
+  },
+  "no_support_mode_conditioning": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.5,
+      "cloth_proxy": 0.5
+    },
+    "mean_success": 0.3333333333333333,
+    "visibility_integral": 42.51341059803963,
+    "corridor_availability": 0.9005658576885859,
+    "reocclusion_rate": 0.004166666666666667,
+    "persistence_horizon_mae": 1.121226767712281,
+    "disturbance_cost": 0.6488037866850694
+  },
+  "no_wrist_cameras": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.5,
+      "cloth_proxy": 0.5
+    },
+    "mean_success": 0.3333333333333333,
+    "visibility_integral": 42.57222665349642,
+    "corridor_availability": 0.9067386935154597,
+    "reocclusion_rate": 0.0020833333333333333,
+    "persistence_horizon_mae": 1.1191915943883144,
+    "disturbance_cost": 0.6488144403944412
+  },
+  "no_global_camera": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.5,
+      "cloth_proxy": 0.5
+    },
+    "mean_success": 0.3333333333333333,
+    "visibility_integral": 42.558002611001335,
+    "corridor_availability": 0.9036522756020228,
+    "reocclusion_rate": 0.004166666666666667,
+    "persistence_horizon_mae": 1.1191876937919583,
+    "disturbance_cost": 0.6488229470948378
+  }
+}

artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep4/ablations.md ADDED Viewed

	@@ -0,0 +1,57 @@

+# Reveal Ablations
+## full_model
+- mean_success: 0.333
+- visibility_integral: 42.513
+- corridor_availability: 0.901
+- reocclusion_rate: 0.004
+- persistence_horizon_mae: 1.121
+- disturbance_cost: 0.649
+## no_reveal_state_head
+- mean_success: 0.250
+- visibility_integral: 7.045
+- corridor_availability: 0.294
+- reocclusion_rate: 0.009
+- persistence_horizon_mae: 0.000
+- disturbance_cost: 0.604
+## no_world_model
+- mean_success: 0.250
+- visibility_integral: 7.045
+- corridor_availability: 0.294
+- reocclusion_rate: 0.009
+- persistence_horizon_mae: 4.413
+- disturbance_cost: 0.604
+## no_planner_reranking
+- mean_success: 0.250
+- visibility_integral: 7.045
+- corridor_availability: 0.294
+- reocclusion_rate: 0.009
+- persistence_horizon_mae: 4.413
+- disturbance_cost: 0.604
+## no_support_mode_conditioning
+- mean_success: 0.333
+- visibility_integral: 42.513
+- corridor_availability: 0.901
+- reocclusion_rate: 0.004
+- persistence_horizon_mae: 1.121
+- disturbance_cost: 0.649
+## no_wrist_cameras
+- mean_success: 0.333
+- visibility_integral: 42.572
+- corridor_availability: 0.907
+- reocclusion_rate: 0.002
+- persistence_horizon_mae: 1.119
+- disturbance_cost: 0.649
+## no_global_camera
+- mean_success: 0.333
+- visibility_integral: 42.558
+- corridor_availability: 0.904
+- reocclusion_rate: 0.004
+- persistence_horizon_mae: 1.119
+- disturbance_cost: 0.649

artifacts/outputs/reveal_runs/reveal_ablation_v4_det/ablations.json ADDED Viewed

	@@ -0,0 +1,93 @@

+{
+  "full_model": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.73463360468546,
+    "corridor_availability": 0.9864540547132492,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.419941000816309,
+    "disturbance_cost": 0.8931084167626169
+  },
+  "no_reveal_state_head": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.73463360468546,
+    "corridor_availability": 0.9864540547132492,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": null,
+    "disturbance_cost": 0.8931084167626169
+  },
+  "no_world_model": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.73463360468546,
+    "corridor_availability": 0.9864540547132492,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.419941000816309,
+    "disturbance_cost": 0.8931084167626169
+  },
+  "no_planner_reranking": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.73463360468546,
+    "corridor_availability": 0.9864540547132492,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.419941000816309,
+    "disturbance_cost": 0.8931084167626169
+  },
+  "no_support_mode_conditioning": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.73463360468546,
+    "corridor_availability": 0.9864540547132492,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.419941000816309,
+    "disturbance_cost": 0.8931084167626169
+  },
+  "no_wrist_cameras": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.738075998094345,
+    "corridor_availability": 0.9864540547132492,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.602942177767141,
+    "disturbance_cost": 0.8930594937668906
+  },
+  "no_global_camera": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.777364783816864,
+    "corridor_availability": 0.9864540547132492,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 3.000429857770602,
+    "disturbance_cost": 0.8916233273016082
+  }
+}

artifacts/outputs/reveal_runs/reveal_ablation_v4_det/ablations.md ADDED Viewed

	@@ -0,0 +1,57 @@

+# Reveal Ablations
+## full_model
+- mean_success: 0.000
+- visibility_integral: 62.735
+- corridor_availability: 0.986
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.420
+- disturbance_cost: 0.893
+## no_reveal_state_head
+- mean_success: 0.000
+- visibility_integral: 62.735
+- corridor_availability: 0.986
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 0.000
+- disturbance_cost: 0.893
+## no_world_model
+- mean_success: 0.000
+- visibility_integral: 62.735
+- corridor_availability: 0.986
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.420
+- disturbance_cost: 0.893
+## no_planner_reranking
+- mean_success: 0.000
+- visibility_integral: 62.735
+- corridor_availability: 0.986
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.420
+- disturbance_cost: 0.893
+## no_support_mode_conditioning
+- mean_success: 0.000
+- visibility_integral: 62.735
+- corridor_availability: 0.986
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.420
+- disturbance_cost: 0.893
+## no_wrist_cameras
+- mean_success: 0.000
+- visibility_integral: 62.738
+- corridor_availability: 0.986
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.603
+- disturbance_cost: 0.893
+## no_global_camera
+- mean_success: 0.000
+- visibility_integral: 62.777
+- corridor_availability: 0.986
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 3.000
+- disturbance_cost: 0.892

artifacts/outputs/smoke/proxy_backbone_only_smoke/config_resolved.yaml ADDED Viewed

	@@ -0,0 +1,98 @@

+experiment_name: proxy_backbone_only_smoke
+output_dir: /workspace/outputs/smoke
+device: cuda
+seed: 11
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 64
+  train_episodes_per_proxy: 6
+  val_episodes_per_proxy: 2
+  train_dataset_path: /workspace/data/reveal_proxy/proxy_train_smoke_v4.pt
+  val_dataset_path: /workspace/data/reveal_proxy/proxy_val_smoke_v4.pt
+  rebuild_dataset: true
+  chunk_horizon: 4
+  rollout_horizon: 3
+  history_steps: 2
+  planner_candidates: 4
+  seed: 11
+optim:
+  epochs: 2
+  batch_size: 8
+  num_workers: 0
+  lr: 0.001
+  weight_decay: 0.0001
+trainer:
+  policy_type: backbone_only
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: false
+  plan_during_eval: false
+  support_mode_conditioning: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 64
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 64
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 128
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 64
+    history_steps: 2
+    num_layers: 1
+    dropout: 0.1
+  decoder:
+    hidden_dim: 64
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 128
+    dropout: 0.1
+    chunk_size: 4
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 4
+  reveal_head:
+    hidden_dim: 64
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 3
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 4
+    predict_belief_map: true
+  world_model:
+    hidden_dim: 64
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 3
+  planner:
+    hidden_dim: 64
+    num_candidates: 4
+    action_dim: 14
+    utility_margin: 0.1
+loss_weights:
+  action: 1.0
+  support_mode: 0.0
+  corridor: 0.0
+  persistence: 0.0
+  disturbance: 0.0
+  world_model: 0.0
+  belief: 0.0
+  planner_success: 0.0
+  planner_risk: 0.0
+  planner_ranking: 0.0

artifacts/outputs/smoke/proxy_backbone_only_smoke/metrics.json ADDED Viewed

	@@ -0,0 +1,40 @@

+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.16355294627802713,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.16355294627802713,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.06914255395531654,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.06914255395531654,
+      "world_model": 0.0
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.07098196234021868,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.07098196234021868,
+      "world_model": 0.0
+    },
+    "val": {
+      "action": 0.05190564692020416,
+      "planner_ranking": 0.0,
+      "planner_risk": 0.0,
+      "planner_success": 0.0,
+      "total": 0.05190564692020416,
+      "world_model": 0.0
+    }
+  }
+]

artifacts/outputs/smoke/proxy_reveal_state_smoke/config_resolved.yaml ADDED Viewed

	@@ -0,0 +1,98 @@

+experiment_name: proxy_reveal_state_smoke
+output_dir: /workspace/outputs/smoke
+device: cuda
+seed: 11
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 64
+  train_episodes_per_proxy: 6
+  val_episodes_per_proxy: 2
+  train_dataset_path: /workspace/data/reveal_proxy/proxy_train_smoke_v4.pt
+  val_dataset_path: /workspace/data/reveal_proxy/proxy_val_smoke_v4.pt
+  rebuild_dataset: false
+  chunk_horizon: 4
+  rollout_horizon: 3
+  history_steps: 2
+  planner_candidates: 4
+  seed: 11
+optim:
+  epochs: 2
+  batch_size: 8
+  num_workers: 0
+  lr: 0.001
+  weight_decay: 0.0001
+trainer:
+  policy_type: reveal_state
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 64
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 64
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 128
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 64
+    history_steps: 2
+    num_layers: 1
+    dropout: 0.1
+  decoder:
+    hidden_dim: 64
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 128
+    dropout: 0.1
+    chunk_size: 4
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 4
+  reveal_head:
+    hidden_dim: 64
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 3
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 4
+    predict_belief_map: true
+  world_model:
+    hidden_dim: 64
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 3
+  planner:
+    hidden_dim: 64
+    num_candidates: 4
+    action_dim: 14
+    utility_margin: 0.1
+loss_weights:
+  action: 1.0
+  support_mode: 0.15
+  corridor: 0.2
+  persistence: 0.1
+  disturbance: 0.1
+  world_model: 0.2
+  belief: 0.05
+  planner_success: 0.2
+  planner_risk: 0.1
+  planner_ranking: 0.1

artifacts/outputs/smoke/proxy_reveal_state_smoke/metrics.json ADDED Viewed

	@@ -0,0 +1,68 @@

+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.1632463155048234,
+      "belief": 0.5288754488740649,
+      "corridor": 0.5060673143182483,
+      "disturbance": 0.08440807514957019,
+      "persistence": 1.7013718911579676,
+      "planner_ranking": 0.09609956932919365,
+      "planner_risk": 0.08803637006453105,
+      "planner_success": 0.6524881209645953,
+      "reocclusion": 0.7463519998959133,
+      "support_mode": 0.8362165178571429,
+      "total": 2.236373339380537,
+      "uncertainty": 0.572694114276341,
+      "world_model": 7.359470299312046
+    },
+    "val": {
+      "action": 0.07105841860175133,
+      "belief": 0.3773516118526459,
+      "corridor": 0.37264925241470337,
+      "disturbance": 0.03807383216917515,
+      "persistence": 1.6277075409889221,
+      "planner_ranking": 0.09990942850708961,
+      "planner_risk": 0.08137237653136253,
+      "planner_success": 0.6634204685688019,
+      "reocclusion": 0.7349686622619629,
+      "support_mode": 0.7270728349685669,
+      "total": 1.5740689039230347,
+      "uncertainty": 0.5246226787567139,
+      "world_model": 4.816080331802368
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.07130091797028269,
+      "belief": 0.31657502480915617,
+      "corridor": 0.35610165766307283,
+      "disturbance": 0.022400280194623128,
+      "persistence": 1.5677628857748849,
+      "planner_ranking": 0.09598331153392792,
+      "planner_risk": 0.07021375798753329,
+      "planner_success": 0.6178554126194545,
+      "reocclusion": 0.7833022390093122,
+      "support_mode": 0.7107979910714286,
+      "total": 1.2336589864322118,
+      "uncertainty": 0.40418908851487295,
+      "world_model": 3.2488711902073453
+    },
+    "val": {
+      "action": 0.0499270036816597,
+      "belief": 0.24123625457286835,
+      "corridor": 0.2881518602371216,
+      "disturbance": 0.009773310273885727,
+      "persistence": 1.7411235570907593,
+      "planner_ranking": 0.1002776250243187,
+      "planner_risk": 0.05218701809644699,
+      "planner_success": 0.6680125892162323,
+      "reocclusion": 0.7738973498344421,
+      "support_mode": 0.6901583671569824,
+      "total": 0.9521919786930084,
+      "uncertainty": 0.25171157717704773,
+      "world_model": 1.9355762600898743
+    }
+  }
+]

artifacts/outputs/smoke/reveal_ablation_ep2/ablations.json ADDED Viewed

	@@ -0,0 +1,93 @@

+{
+  "full_model": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.62329292297363,
+    "corridor_availability": 0.9855967164039612,
+    "reocclusion_rate": 0.0020833333333333333,
+    "persistence_horizon_mae": 2.8106139592826365,
+    "disturbance_cost": 0.8984478116035461
+  },
+  "no_reveal_state_head": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.666042963663735,
+    "corridor_availability": 0.9876543283462524,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": null,
+    "disturbance_cost": 0.8984478116035461
+  },
+  "no_world_model": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.666042963663735,
+    "corridor_availability": 0.9876543283462524,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.8120017565786837,
+    "disturbance_cost": 0.8984478116035461
+  },
+  "no_planner_reranking": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.666042963663735,
+    "corridor_availability": 0.9876543283462524,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.8120017565786837,
+    "disturbance_cost": 0.8984478116035461
+  },
+  "no_support_mode_conditioning": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.53779284159342,
+    "corridor_availability": 0.9855967164039612,
+    "reocclusion_rate": 0.0020833333333333333,
+    "persistence_horizon_mae": 2.808507453898589,
+    "disturbance_cost": 0.8984478116035461
+  },
+  "no_wrist_cameras": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.67268816630045,
+    "corridor_availability": 0.9876543283462524,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.8158031940460204,
+    "disturbance_cost": 0.8982548316319784
+  },
+  "no_global_camera": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.668721516927086,
+    "corridor_availability": 0.9876543283462524,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.8175474738081294,
+    "disturbance_cost": 0.8983920911947886
+  }
+}

artifacts/outputs/smoke/reveal_ablation_ep2/ablations.md ADDED Viewed

	@@ -0,0 +1,57 @@

+# Reveal Ablations
+## full_model
+- mean_success: 0.000
+- visibility_integral: 62.623
+- corridor_availability: 0.986
+- reocclusion_rate: 0.002
+- persistence_horizon_mae: 2.811
+- disturbance_cost: 0.898
+## no_reveal_state_head
+- mean_success: 0.000
+- visibility_integral: 62.666
+- corridor_availability: 0.988
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 0.000
+- disturbance_cost: 0.898
+## no_world_model
+- mean_success: 0.000
+- visibility_integral: 62.666
+- corridor_availability: 0.988
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.812
+- disturbance_cost: 0.898
+## no_planner_reranking
+- mean_success: 0.000
+- visibility_integral: 62.666
+- corridor_availability: 0.988
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.812
+- disturbance_cost: 0.898
+## no_support_mode_conditioning
+- mean_success: 0.000
+- visibility_integral: 62.538
+- corridor_availability: 0.986
+- reocclusion_rate: 0.002
+- persistence_horizon_mae: 2.809
+- disturbance_cost: 0.898
+## no_wrist_cameras
+- mean_success: 0.000
+- visibility_integral: 62.673
+- corridor_availability: 0.988
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.816
+- disturbance_cost: 0.898
+## no_global_camera
+- mean_success: 0.000
+- visibility_integral: 62.669
+- corridor_availability: 0.988
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.818
+- disturbance_cost: 0.898

artifacts/outputs/smoke/reveal_eval_ep2/reveal_benchmark.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "backbone": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.556946436564125,
+    "corridor_availability": 0.9876543283462524,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 0.0,
+    "disturbance_cost": 0.9013666311899821
+  },
+  "reveal": {
+    "per_task_success": {
+      "foliage_proxy": 0.0,
+      "bag_proxy": 0.0,
+      "cloth_proxy": 0.0
+    },
+    "mean_success": 0.0,
+    "visibility_integral": 62.6706740061442,
+    "corridor_availability": 0.9876543283462524,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.812001740684112,
+    "disturbance_cost": 0.8983492453893026
+  }
+}

artifacts/outputs/smoke/reveal_eval_ep2/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,25 @@

+# Reveal Proxy Benchmark
+## backbone
+- checkpoint: /workspace/outputs/smoke/proxy_backbone_only_smoke/checkpoint_best.pt
+- mean_success: 0.000
+- visibility_integral: 62.557
+- corridor_availability: 0.988
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 0.000
+- disturbance_cost: 0.901
+- foliage_proxy_success: 0.000
+- bag_proxy_success: 0.000
+- cloth_proxy_success: 0.000
+## reveal
+- checkpoint: /workspace/outputs/smoke/proxy_reveal_state_smoke/checkpoint_best.pt
+- mean_success: 0.000
+- visibility_integral: 62.671
+- corridor_availability: 0.988
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.812
+- disturbance_cost: 0.898
+- foliage_proxy_success: 0.000
+- bag_proxy_success: 0.000
+- cloth_proxy_success: 0.000

code/reveal_vla_bimanual/eval/run_ablations.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 import argparse
 import json
 from pathlib import Path
 from eval.ablations import MANDATORY_ABLATIONS
 from eval.report import write_comparison_report
@@ -19,6 +20,7 @@ def main() -> None:
     parser.add_argument("--resolution", type=int, default=None)
     parser.add_argument("--output-root", default="/workspace/reports/reveal_ablation")
     parser.add_argument("--proxies", nargs="*", default=None)
     args = parser.parse_args()
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -28,10 +30,25 @@ def main() -> None:
     output_root = Path(args.output_root)
     output_root.mkdir(parents=True, exist_ok=True)
     sections = {}
     raw = {}
-    for ablation in (None, *MANDATORY_ABLATIONS):
         label = "full_model" if ablation is None else ablation
         metrics = evaluate_model(
             model=model,
             device=device,
@@ -57,8 +74,20 @@ def main() -> None:
             "persistence_horizon_mae": metrics.persistence_horizon_mae or 0.0,
             "disturbance_cost": metrics.disturbance_cost or 0.0,
         }
-    json_path = output_root / "ablations.json"
     json_path.write_text(json.dumps(raw, indent=2), encoding="utf-8")
     write_comparison_report(output_root / "ablations.md", "Reveal Ablations", sections)
     print(json.dumps({"output_json": str(json_path), "sections": sections}, indent=2))

 import argparse
 import json
 from pathlib import Path
+import time
 from eval.ablations import MANDATORY_ABLATIONS
 from eval.report import write_comparison_report
     parser.add_argument("--resolution", type=int, default=None)
     parser.add_argument("--output-root", default="/workspace/reports/reveal_ablation")
     parser.add_argument("--proxies", nargs="*", default=None)
+    parser.add_argument("--resume", action="store_true")
     args = parser.parse_args()
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     output_root = Path(args.output_root)
     output_root.mkdir(parents=True, exist_ok=True)
+    json_path = output_root / "ablations.json"
+    partial_path = output_root / "ablations.partial.json"
     sections = {}
     raw = {}
+    completed_labels: set[str] = set()
+    if args.resume and partial_path.exists():
+        partial = json.loads(partial_path.read_text(encoding="utf-8"))
+        raw = partial.get("raw", {})
+        sections = partial.get("sections", {})
+        completed_labels = set(raw)
+        print(json.dumps({"resume_from": str(partial_path), "completed": sorted(completed_labels)}, indent=2))
+    ablations = (None, *MANDATORY_ABLATIONS)
+    start_time = time.monotonic()
+    for index, ablation in enumerate(ablations, start=1):
         label = "full_model" if ablation is None else ablation
+        if label in completed_labels:
+            continue
+        print(json.dumps({"running": label, "index": index, "total": len(ablations)}, indent=2))
         metrics = evaluate_model(
             model=model,
             device=device,
             "persistence_horizon_mae": metrics.persistence_horizon_mae or 0.0,
             "disturbance_cost": metrics.disturbance_cost or 0.0,
         }
+        partial_path.write_text(
+            json.dumps(
+                {
+                    "checkpoint": args.checkpoint,
+                    "episodes": args.episodes,
+                    "sections": sections,
+                    "raw": raw,
+                    "elapsed_seconds": time.monotonic() - start_time,
+                },
+                indent=2,
+            ),
+            encoding="utf-8",
+        )
     json_path.write_text(json.dumps(raw, indent=2), encoding="utf-8")
     write_comparison_report(output_root / "ablations.md", "Reveal Ablations", sections)
     print(json.dumps({"output_json": str(json_path), "sections": sections}, indent=2))

code/reveal_vla_bimanual/eval/run_reveal_benchmark.py CHANGED Viewed

@@ -23,6 +23,7 @@ from eval.report import write_comparison_report
 from models.action_decoder import ChunkDecoderConfig
 from models.backbones import FrozenVLBackboneConfig
 from models.multiview_fusion import MultiViewFusionConfig
 from models.planner import PlannerConfig
 from models.policy import PolicyConfig
 from models.reveal_head import RevealHeadConfig
@@ -35,6 +36,7 @@ def _policy_config_from_dict(cfg: dict[str, Any]) -> PolicyConfig:
     return PolicyConfig(
         backbone=FrozenVLBackboneConfig(**cfg["backbone"]),
         fusion=MultiViewFusionConfig(**cfg["fusion"]),
         decoder=ChunkDecoderConfig(**cfg["decoder"]),
         reveal_head=RevealHeadConfig(**cfg["reveal_head"]),
         world_model=RevealWMConfig(**cfg["world_model"]),
@@ -56,11 +58,33 @@ def load_model(checkpoint_path: str | Path, device: torch.device) -> tuple[torch
     return model, checkpoint
-def _prepare_batch(observation: dict[str, Any], device: torch.device) -> dict[str, Any]:
     images = torch.from_numpy(observation["images"]).permute(0, 3, 1, 2).unsqueeze(0).float() / 255.0
     proprio = torch.from_numpy(observation["proprio"]).unsqueeze(0).float()
     return {
         "images": images.to(device),
         "proprio": proprio.to(device),
         "texts": [observation["text"]],
     }
@@ -83,6 +107,8 @@ def select_chunk(
     images = _apply_camera_ablation(batch["images"], ablation)
     forward_kwargs = {
         "images": images,
         "proprio": batch["proprio"],
         "texts": batch["texts"],
     }
@@ -118,6 +144,7 @@ def evaluate_model(
     reocclusion_scores = []
     persistence_errors = []
     disturbance_scores = []
     for proxy_offset, proxy_name in enumerate(proxies):
         successes = []
@@ -131,12 +158,25 @@ def evaluate_model(
             episode_visibility = [float(privileged_state["visibility"])]
             episode_corridor = [float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any())]
             episode_disturbance = [float(privileged_state["disturbance_cost"])]
             done = False
             while not done:
-                batch = _prepare_batch(observation, device=device)
                 with torch.no_grad():
                     chunk, outputs = select_chunk(model, batch, ablation=ablation)
                 action = chunk[0, 0].detach().cpu().numpy()
                 observation, _, terminated, truncated, privileged_state = env.step(action)
                 episode_visibility.append(float(privileged_state["visibility"]))
                 episode_corridor.append(float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any()))

 from models.action_decoder import ChunkDecoderConfig
 from models.backbones import FrozenVLBackboneConfig
 from models.multiview_fusion import MultiViewFusionConfig
+from models.observation_memory import ObservationMemoryConfig
 from models.planner import PlannerConfig
 from models.policy import PolicyConfig
 from models.reveal_head import RevealHeadConfig
     return PolicyConfig(
         backbone=FrozenVLBackboneConfig(**cfg["backbone"]),
         fusion=MultiViewFusionConfig(**cfg["fusion"]),
+        memory=ObservationMemoryConfig(**cfg.get("memory", {})),
         decoder=ChunkDecoderConfig(**cfg["decoder"]),
         reveal_head=RevealHeadConfig(**cfg["reveal_head"]),
         world_model=RevealWMConfig(**cfg["world_model"]),
     return model, checkpoint
+def _prepare_batch(
+    observation: dict[str, Any],
+    device: torch.device,
+    history_images: list[np.ndarray] | None = None,
+    history_proprio: list[np.ndarray] | None = None,
+) -> dict[str, Any]:
     images = torch.from_numpy(observation["images"]).permute(0, 3, 1, 2).unsqueeze(0).float() / 255.0
     proprio = torch.from_numpy(observation["proprio"]).unsqueeze(0).float()
+    history_images = history_images or []
+    history_proprio = history_proprio or []
+    if history_images:
+        history_images_tensor = (
+            torch.from_numpy(np.stack(history_images, axis=0)).permute(0, 1, 4, 2, 3).unsqueeze(0).float() / 255.0
+        )
+    else:
+        history_images_tensor = torch.zeros(
+            (1, 0, images.shape[1], images.shape[2], images.shape[3], images.shape[4]),
+            dtype=torch.float32,
+        )
+    if history_proprio:
+        history_proprio_tensor = torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).float()
+    else:
+        history_proprio_tensor = torch.zeros((1, 0, proprio.shape[-1]), dtype=torch.float32)
     return {
         "images": images.to(device),
+        "history_images": history_images_tensor.to(device),
+        "history_proprio": history_proprio_tensor.to(device),
         "proprio": proprio.to(device),
         "texts": [observation["text"]],
     }
     images = _apply_camera_ablation(batch["images"], ablation)
     forward_kwargs = {
         "images": images,
+        "history_images": batch.get("history_images"),
+        "history_proprio": batch.get("history_proprio"),
         "proprio": batch["proprio"],
         "texts": batch["texts"],
     }
     reocclusion_scores = []
     persistence_errors = []
     disturbance_scores = []
+    history_steps = int(getattr(model.config.memory, "history_steps", 0)) if hasattr(model, "config") else 0
     for proxy_offset, proxy_name in enumerate(proxies):
         successes = []
             episode_visibility = [float(privileged_state["visibility"])]
             episode_corridor = [float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any())]
             episode_disturbance = [float(privileged_state["disturbance_cost"])]
+            history_images: list[np.ndarray] = []
+            history_proprio: list[np.ndarray] = []
             done = False
             while not done:
+                batch = _prepare_batch(
+                    observation,
+                    device=device,
+                    history_images=history_images,
+                    history_proprio=history_proprio,
+                )
                 with torch.no_grad():
                     chunk, outputs = select_chunk(model, batch, ablation=ablation)
                 action = chunk[0, 0].detach().cpu().numpy()
+                if history_steps > 0:
+                    if len(history_images) >= history_steps:
+                        history_images = history_images[-history_steps + 1 :]
+                        history_proprio = history_proprio[-history_steps + 1 :]
+                    history_images.append(observation["images"])
+                    history_proprio.append(observation["proprio"])
                 observation, _, terminated, truncated, privileged_state = env.step(action)
                 episode_visibility.append(float(privileged_state["visibility"]))
                 episode_corridor.append(float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any()))

code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py CHANGED Viewed

@@ -18,6 +18,7 @@ from rlbench.environment import Environment
 from models.action_decoder import ChunkDecoderConfig
 from models.backbones import FrozenVLBackboneConfig
 from models.multiview_fusion import MultiViewFusionConfig
 from models.planner import PlannerConfig
 from models.policy import PolicyConfig
 from models.reveal_head import RevealHeadConfig
@@ -31,6 +32,7 @@ def _policy_config_from_checkpoint(checkpoint: dict[str, Any]) -> PolicyConfig:
     return PolicyConfig(
         backbone=FrozenVLBackboneConfig(**cfg["backbone"]),
         fusion=MultiViewFusionConfig(**cfg["fusion"]),
         decoder=ChunkDecoderConfig(**cfg["decoder"]),
         reveal_head=RevealHeadConfig(**cfg["reveal_head"]),
         world_model=RevealWMConfig(**cfg["world_model"]),
@@ -56,6 +58,7 @@ def main() -> None:
     parser.add_argument("--resolution", type=int, default=224)
     parser.add_argument("--device", default="cuda")
     parser.add_argument("--plan", action="store_true")
     parser.add_argument("--disable-support-mode-conditioning", action="store_true")
     parser.add_argument("--headless", action="store_true", default=True)
     args = parser.parse_args()
@@ -67,6 +70,18 @@ def main() -> None:
     model = build_policy(policy_config, trainer_config).to(device)
     model.load_state_dict(checkpoint["state_dict"], strict=True)
     model.eval()
     obs_config = create_obs_config(
         ["front", "wrist_left", "wrist_right"],
@@ -87,13 +102,16 @@ def main() -> None:
     results: dict[str, Any] = {
         "checkpoint": str(Path(args.checkpoint).resolve()),
-        "plan": bool(args.plan),
         "support_mode_conditioning": not args.disable_support_mode_conditioning,
         "episodes_per_task": args.episodes_per_task,
         "episode_length": args.episode_length,
         "resolution": args.resolution,
         "tasks": {},
     }
     try:
         env.launch()
@@ -107,6 +125,9 @@ def main() -> None:
                 language_goal = _episode_language_goal(descriptions)
                 total_reward = 0.0
                 success = 0.0
                 for timestep in range(args.episode_length):
                     images = stack_live_rgb_obs(obs, resolution=args.resolution).unsqueeze(0).to(device)
                     proprio = torch.from_numpy(
@@ -117,21 +138,53 @@ def main() -> None:
                             target_dim=policy_config.fusion.proprio_dim,
                         )
                     ).unsqueeze(0).to(device)
                     with torch.no_grad():
                         if trainer_config.policy_type == "reveal_state":
                             outputs = model(
                                 images=images,
                                 proprio=proprio,
                                 texts=[language_goal],
-                                plan=args.plan,
                                 support_mode_conditioning=not args.disable_support_mode_conditioning,
                             )
                         else:
-                            outputs = model(images=images, proprio=proprio, texts=[language_goal])
                     chosen_chunk = outputs["action_mean"]
-                    if args.plan and "planned_chunk" in outputs:
                         chosen_chunk = outputs["planned_chunk"]
                     step_action = chosen_chunk[0, 0].detach().float().cpu().numpy()
                     env_action = absolute_action_from_delta(obs, step_action, ignore_collisions=True)
                     obs, reward, done = task.step(env_action)
                     total_reward += float(reward)
@@ -160,7 +213,8 @@ def main() -> None:
         "# RLBench Rollout Eval",
         "",
         f"- Checkpoint: `{results['checkpoint']}`",
-        f"- Plan enabled: `{results['plan']}`",
         f"- Support-mode conditioning: `{results['support_mode_conditioning']}`",
         f"- Mean success: `{results['mean_success']:.3f}`",
         "",

 from models.action_decoder import ChunkDecoderConfig
 from models.backbones import FrozenVLBackboneConfig
 from models.multiview_fusion import MultiViewFusionConfig
+from models.observation_memory import ObservationMemoryConfig
 from models.planner import PlannerConfig
 from models.policy import PolicyConfig
 from models.reveal_head import RevealHeadConfig
     return PolicyConfig(
         backbone=FrozenVLBackboneConfig(**cfg["backbone"]),
         fusion=MultiViewFusionConfig(**cfg["fusion"]),
+        memory=ObservationMemoryConfig(**cfg.get("memory", {})),
         decoder=ChunkDecoderConfig(**cfg["decoder"]),
         reveal_head=RevealHeadConfig(**cfg["reveal_head"]),
         world_model=RevealWMConfig(**cfg["world_model"]),
     parser.add_argument("--resolution", type=int, default=224)
     parser.add_argument("--device", default="cuda")
     parser.add_argument("--plan", action="store_true")
+    parser.add_argument("--allow-unsupervised-planning", action="store_true")
     parser.add_argument("--disable-support-mode-conditioning", action="store_true")
     parser.add_argument("--headless", action="store_true", default=True)
     args = parser.parse_args()
     model = build_policy(policy_config, trainer_config).to(device)
     model.load_state_dict(checkpoint["state_dict"], strict=True)
     model.eval()
+    plan_requested = bool(args.plan)
+    plan_applied = plan_requested and trainer_config.policy_type == "reveal_state"
+    planning_note = None
+    if plan_requested and trainer_config.policy_type != "reveal_state":
+        plan_applied = False
+        planning_note = "Planner requested for a backbone-only checkpoint; evaluating the backbone policy only."
+    elif plan_requested and trainer_config.policy_type == "reveal_state" and not args.allow_unsupervised_planning:
+        plan_applied = False
+        planning_note = (
+            "RLBench batches do not provide reveal supervision. Unsupervised reveal planning was disabled; "
+            "use --allow-unsupervised-planning to override."
+        )
     obs_config = create_obs_config(
         ["front", "wrist_left", "wrist_right"],
     results: dict[str, Any] = {
         "checkpoint": str(Path(args.checkpoint).resolve()),
+        "plan_requested": plan_requested,
+        "plan_applied": plan_applied,
         "support_mode_conditioning": not args.disable_support_mode_conditioning,
         "episodes_per_task": args.episodes_per_task,
         "episode_length": args.episode_length,
         "resolution": args.resolution,
         "tasks": {},
     }
+    if planning_note is not None:
+        results["planning_note"] = planning_note
     try:
         env.launch()
                 language_goal = _episode_language_goal(descriptions)
                 total_reward = 0.0
                 success = 0.0
+                history_images: list[np.ndarray] = []
+                history_proprio: list[np.ndarray] = []
+                history_steps = int(getattr(policy_config.memory, "history_steps", 0))
                 for timestep in range(args.episode_length):
                     images = stack_live_rgb_obs(obs, resolution=args.resolution).unsqueeze(0).to(device)
                     proprio = torch.from_numpy(
                             target_dim=policy_config.fusion.proprio_dim,
                         )
                     ).unsqueeze(0).to(device)
+                    if history_images:
+                        history_images_tensor = (
+                            torch.from_numpy(np.stack(history_images, axis=0)).unsqueeze(0).to(device)
+                        )
+                        history_proprio_tensor = (
+                            torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).to(device)
+                        )
+                    else:
+                        history_images_tensor = torch.zeros(
+                            (1, 0, images.shape[1], images.shape[2], images.shape[3], images.shape[4]),
+                            device=device,
+                            dtype=images.dtype,
+                        )
+                        history_proprio_tensor = torch.zeros(
+                            (1, 0, proprio.shape[-1]),
+                            device=device,
+                            dtype=proprio.dtype,
+                        )
                     with torch.no_grad():
                         if trainer_config.policy_type == "reveal_state":
                             outputs = model(
                                 images=images,
                                 proprio=proprio,
                                 texts=[language_goal],
+                                history_images=history_images_tensor,
+                                history_proprio=history_proprio_tensor,
+                                plan=plan_applied,
                                 support_mode_conditioning=not args.disable_support_mode_conditioning,
                             )
                         else:
+                            outputs = model(
+                                images=images,
+                                proprio=proprio,
+                                texts=[language_goal],
+                                history_images=history_images_tensor,
+                                history_proprio=history_proprio_tensor,
+                            )
                     chosen_chunk = outputs["action_mean"]
+                    if plan_applied and "planned_chunk" in outputs:
                         chosen_chunk = outputs["planned_chunk"]
                     step_action = chosen_chunk[0, 0].detach().float().cpu().numpy()
+                    if history_steps > 0:
+                        if len(history_images) >= history_steps:
+                            history_images = history_images[-history_steps + 1 :]
+                            history_proprio = history_proprio[-history_steps + 1 :]
+                        history_images.append(images[0].detach().cpu().numpy())
+                        history_proprio.append(proprio[0].detach().cpu().numpy())
                     env_action = absolute_action_from_delta(obs, step_action, ignore_collisions=True)
                     obs, reward, done = task.step(env_action)
                     total_reward += float(reward)
         "# RLBench Rollout Eval",
         "",
         f"- Checkpoint: `{results['checkpoint']}`",
+        f"- Plan requested: `{results['plan_requested']}`",
+        f"- Plan applied: `{results['plan_applied']}`",
         f"- Support-mode conditioning: `{results['support_mode_conditioning']}`",
         f"- Mean success: `{results['mean_success']:.3f}`",
         "",

code/reveal_vla_bimanual/models/action_decoder.py CHANGED Viewed

@@ -15,6 +15,7 @@ class ChunkDecoderConfig:
     dropout: float = 0.1
     chunk_size: int = 8
     action_dim: int = 14
     num_candidates: int = 8
@@ -30,24 +31,111 @@ class ACTBimanualChunkDecoder(nn.Module):
             batch_first=True,
             norm_first=True,
         )
-        self.decoder = nn.TransformerDecoder(decoder_layer, num_layers=config.num_layers)
         self.query_embed = nn.Embedding(config.chunk_size, config.hidden_dim)
-        self.action_mean = nn.Linear(config.hidden_dim, config.action_dim)
-        self.action_log_std = nn.Linear(config.hidden_dim, config.action_dim)
         self.proposal_score = nn.Sequential(
-            nn.LayerNorm(config.hidden_dim),
-            nn.Linear(config.hidden_dim, 1),
         )
-    def forward(self, scene_tokens: Tensor) -> dict[str, Tensor]:
         batch_size = scene_tokens.shape[0]
         query = self.query_embed.weight.unsqueeze(0).expand(batch_size, -1, -1)
-        decoded = self.decoder(query, scene_tokens)
         return {
-            "decoded_tokens": decoded,
-            "action_mean": self.action_mean(decoded),
-            "action_log_std": self.action_log_std(decoded).clamp(min=-5.0, max=2.0),
-            "proposal_score": self.proposal_score(decoded.mean(dim=1)).squeeze(-1),
         }
     def sample_candidates(self, action_mean: Tensor, action_log_std: Tensor, num_candidates: int | None = None) -> Tensor:
@@ -55,14 +143,17 @@ class ACTBimanualChunkDecoder(nn.Module):
         if num_candidates <= 1:
             return action_mean.unsqueeze(1)
         std = action_log_std.exp()
-        noise = torch.randn(
-            action_mean.size(0),
-            num_candidates,
-            action_mean.size(1),
-            action_mean.size(2),
-            device=action_mean.device,
-            dtype=action_mean.dtype,
-        )
         candidates = action_mean.unsqueeze(1) + noise * std.unsqueeze(1)
         candidates[:, 0] = action_mean
         return candidates

     dropout: float = 0.1
     chunk_size: int = 8
     action_dim: int = 14
+    arm_action_dim: int = 7
     num_candidates: int = 8
             batch_first=True,
             norm_first=True,
         )
+        self.revealer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=config.num_layers)
+        actor_layer = nn.TransformerDecoderLayer(
+            d_model=config.hidden_dim,
+            nhead=config.num_heads,
+            dim_feedforward=config.ff_dim,
+            dropout=config.dropout,
+            batch_first=True,
+            norm_first=True,
+        )
+        self.actor_decoder = nn.TransformerDecoder(actor_layer, num_layers=config.num_layers)
         self.query_embed = nn.Embedding(config.chunk_size, config.hidden_dim)
+        self.actor_role_bias = nn.Parameter(torch.zeros(1, config.chunk_size, config.hidden_dim))
+        self.revealer_mean = nn.Linear(config.hidden_dim, config.arm_action_dim)
+        self.revealer_log_std = nn.Linear(config.hidden_dim, config.arm_action_dim)
+        self.actor_mean = nn.Linear(config.hidden_dim, config.action_dim - config.arm_action_dim)
+        self.actor_log_std = nn.Linear(config.hidden_dim, config.action_dim - config.arm_action_dim)
+        self.coordination = nn.Sequential(
+            nn.LayerNorm(config.hidden_dim * 3),
+            nn.Linear(config.hidden_dim * 3, config.hidden_dim),
+            nn.GELU(),
+            nn.Linear(config.hidden_dim, config.hidden_dim),
+        )
         self.proposal_score = nn.Sequential(
+            nn.LayerNorm(config.hidden_dim * 3),
+            nn.Linear(config.hidden_dim * 3, 1),
         )
+    def _deterministic_candidate_noise(
+        self,
+        action_mean: Tensor,
+        num_candidates: int,
+    ) -> Tensor:
+        batch_size, chunk_size, action_dim = action_mean.shape
+        noise = torch.zeros(
+            batch_size,
+            num_candidates,
+            chunk_size,
+            action_dim,
+            device=action_mean.device,
+            dtype=action_mean.dtype,
+        )
+        if num_candidates <= 1:
+            return noise
+        candidate_index = torch.arange(1, num_candidates, device=action_mean.device, dtype=action_mean.dtype).view(
+            num_candidates - 1, 1, 1
+        )
+        step_index = torch.arange(chunk_size, device=action_mean.device, dtype=action_mean.dtype).view(1, chunk_size, 1)
+        dim_index = torch.arange(action_dim, device=action_mean.device, dtype=action_mean.dtype).view(1, 1, action_dim)
+        base = torch.sin(candidate_index * 0.73 + step_index * 0.37 + dim_index * 0.19)
+        base = base + torch.cos(candidate_index * 1.11 + step_index * 0.17 + dim_index * 0.41)
+        base = base / base.square().mean(dim=(1, 2), keepdim=True).sqrt().clamp_min(1e-6)
+        noise[:, 1:] = base.unsqueeze(0).expand(batch_size, -1, -1, -1)
+        return noise
+    def forward(
+        self,
+        scene_tokens: Tensor,
+        reveal_tokens: Tensor | None = None,
+        memory_token: Tensor | None = None,
+    ) -> dict[str, Tensor]:
         batch_size = scene_tokens.shape[0]
         query = self.query_embed.weight.unsqueeze(0).expand(batch_size, -1, -1)
+        decoder_memory = scene_tokens
+        if reveal_tokens is not None:
+            decoder_memory = torch.cat([decoder_memory, reveal_tokens], dim=1)
+        if memory_token is not None:
+            decoder_memory = torch.cat([decoder_memory, memory_token], dim=1)
+        revealer_tokens = self.revealer_decoder(query, decoder_memory)
+        actor_query = query + self.actor_role_bias
+        actor_tokens = self.actor_decoder(actor_query, torch.cat([decoder_memory, revealer_tokens], dim=1))
+        if reveal_tokens is not None:
+            reveal_context = reveal_tokens.mean(dim=1, keepdim=True).expand(-1, self.config.chunk_size, -1)
+        else:
+            reveal_context = scene_tokens.mean(dim=1, keepdim=True).expand(-1, self.config.chunk_size, -1)
+        coordination_input = torch.cat([revealer_tokens, actor_tokens, reveal_context], dim=-1)
+        coordination = torch.tanh(self.coordination(coordination_input))
+        revealer_tokens = revealer_tokens + coordination
+        actor_tokens = actor_tokens + coordination
+        action_mean = torch.cat([self.revealer_mean(revealer_tokens), self.actor_mean(actor_tokens)], dim=-1)
+        action_log_std = torch.cat(
+            [
+                self.revealer_log_std(revealer_tokens),
+                self.actor_log_std(actor_tokens),
+            ],
+            dim=-1,
+        ).clamp(min=-5.0, max=2.0)
+        proposal_features = torch.cat(
+            [
+                revealer_tokens.mean(dim=1),
+                actor_tokens.mean(dim=1),
+                coordination.mean(dim=1),
+            ],
+            dim=-1,
+        )
         return {
+            "decoded_tokens": torch.cat([revealer_tokens, actor_tokens], dim=-1),
+            "revealer_tokens": revealer_tokens,
+            "actor_tokens": actor_tokens,
+            "coordination_tokens": coordination,
+            "action_mean": action_mean,
+            "action_log_std": action_log_std,
+            "proposal_score": self.proposal_score(proposal_features).squeeze(-1),
         }
     def sample_candidates(self, action_mean: Tensor, action_log_std: Tensor, num_candidates: int | None = None) -> Tensor:
         if num_candidates <= 1:
             return action_mean.unsqueeze(1)
         std = action_log_std.exp()
+        if self.training:
+            noise = torch.randn(
+                action_mean.size(0),
+                num_candidates,
+                action_mean.size(1),
+                action_mean.size(2),
+                device=action_mean.device,
+                dtype=action_mean.dtype,
+            )
+        else:
+            noise = self._deterministic_candidate_noise(action_mean, num_candidates)
         candidates = action_mean.unsqueeze(1) + noise * std.unsqueeze(1)
         candidates[:, 0] = action_mean
         return candidates

code/reveal_vla_bimanual/models/observation_memory.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from __future__ import annotations
+from dataclasses import dataclass
+import torch
+from torch import Tensor, nn
+@dataclass
+class ObservationMemoryConfig:
+    hidden_dim: int = 512
+    history_steps: int = 2
+    num_layers: int = 1
+    dropout: float = 0.1
+class ObservationMemory(nn.Module):
+    def __init__(self, config: ObservationMemoryConfig) -> None:
+        super().__init__()
+        self.config = config
+        self.gru = nn.GRU(
+            input_size=config.hidden_dim,
+            hidden_size=config.hidden_dim,
+            num_layers=config.num_layers,
+            batch_first=True,
+            dropout=config.dropout if config.num_layers > 1 else 0.0,
+        )
+        self.token_proj = nn.Sequential(
+            nn.LayerNorm(config.hidden_dim),
+            nn.Linear(config.hidden_dim, config.hidden_dim),
+            nn.GELU(),
+        )
+        self.uncertainty_head = nn.Sequential(
+            nn.LayerNorm(config.hidden_dim),
+            nn.Linear(config.hidden_dim, 1),
+        )
+    def forward(
+        self,
+        scene_tokens: Tensor,
+        history_scene_tokens: Tensor | None = None,
+    ) -> dict[str, Tensor]:
+        pooled_current = scene_tokens.mean(dim=1, keepdim=True)
+        if history_scene_tokens is not None and history_scene_tokens.numel() > 0:
+            history_pooled = history_scene_tokens.mean(dim=2)
+            sequence = torch.cat([history_pooled, pooled_current], dim=1)
+        else:
+            sequence = pooled_current
+        memory_sequence, hidden = self.gru(sequence)
+        final_state = hidden[-1]
+        return {
+            "memory_sequence": memory_sequence,
+            "memory_state": final_state,
+            "memory_token": self.token_proj(final_state).unsqueeze(1),
+            "memory_uncertainty": torch.nn.functional.softplus(self.uncertainty_head(final_state)).squeeze(-1),
+        }

code/reveal_vla_bimanual/models/planner.py CHANGED Viewed

@@ -3,12 +3,15 @@ from __future__ import annotations
 from dataclasses import dataclass
 import torch
-from torch import Tensor
 @dataclass
 class PlannerConfig:
     num_candidates: int = 8
     corridor_weight: float = 1.0
     persistence_weight: float = 0.5
     proposal_weight: float = 0.5
@@ -18,44 +21,69 @@ class PlannerConfig:
     visibility_weight: float = 0.25
-class RevealPlanner:
     def __init__(self, config: PlannerConfig) -> None:
         self.config = config
-    def score_rollouts(
-        self,
-        rollout_state: dict[str, Tensor],
-        proposal_scores: Tensor,
-        candidate_chunks: Tensor | None = None,
-        belief_gain: Tensor | None = None,
-    ) -> Tensor:
-        corridor_prob = rollout_state["corridor_logits"].sigmoid().amax(dim=-1).mean(dim=(-1, -2))
-        persistence = rollout_state["persistence_horizon"].mean(dim=(-1, -2))
-        disturbance = rollout_state["disturbance_cost"].mean(dim=-1)
-        reocclusion_penalty = torch.relu(1.0 - rollout_state["corridor_logits"].sigmoid().amax(dim=-1)).mean(dim=(-1, -2))
-        task_progress = proposal_scores.new_zeros(proposal_scores.shape)
-        if candidate_chunks is not None:
-            actor_reach = torch.tanh(candidate_chunks[..., 8]).mean(dim=-1)
-            actor_retrieve = torch.tanh(candidate_chunks[..., 13]).amax(dim=-1)
-            task_progress = 0.5 * (actor_reach + 1.0) * 0.5 + 0.5 * (actor_retrieve + 1.0) * 0.5
-        score = (
-            self.config.corridor_weight * corridor_prob
-            + self.config.persistence_weight * persistence
-            + self.config.proposal_weight * proposal_scores
-            + self.config.task_progress_weight * task_progress
-            - self.config.disturbance_weight * disturbance
-            - self.config.reocclusion_weight * reocclusion_penalty
         )
-        if belief_gain is not None:
-            score = score + self.config.visibility_weight * belief_gain
-        return score
-    def select_best(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor], proposal_scores: Tensor) -> dict[str, Tensor]:
-        scores = self.score_rollouts(rollout_state, proposal_scores, candidate_chunks=candidate_chunks)
-        best_idx = scores.argmax(dim=-1)
         batch_indices = torch.arange(candidate_chunks.shape[0], device=candidate_chunks.device)
         return {
-            "scores": scores,
             "best_indices": best_idx,
             "best_chunk": candidate_chunks[batch_indices, best_idx],
         }

 from dataclasses import dataclass
 import torch
+from torch import Tensor, nn
 @dataclass
 class PlannerConfig:
+    hidden_dim: int = 512
     num_candidates: int = 8
+    action_dim: int = 14
+    utility_margin: float = 0.1
     corridor_weight: float = 1.0
     persistence_weight: float = 0.5
     proposal_weight: float = 0.5
     visibility_weight: float = 0.25
+class RevealPlanner(nn.Module):
     def __init__(self, config: PlannerConfig) -> None:
+        super().__init__()
         self.config = config
+        summary_dim = (
+            config.action_dim * 2
+            + 3
+            + 3
+            + 1
+            + 3
+            + 1
+        )
+        self.trunk = nn.Sequential(
+            nn.LayerNorm(summary_dim),
+            nn.Linear(summary_dim, config.hidden_dim),
+            nn.GELU(),
+            nn.Linear(config.hidden_dim, config.hidden_dim),
+            nn.GELU(),
+        )
+        self.success_head = nn.Linear(config.hidden_dim, 1)
+        self.risk_head = nn.Linear(config.hidden_dim, 1)
+    def summarize_candidates(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor]) -> Tensor:
+        candidate_mean = candidate_chunks.mean(dim=2)
+        candidate_terminal = candidate_chunks[:, :, -1]
+        corridor_prob = rollout_state["corridor_logits"].sigmoid().amax(dim=-1).mean(dim=-2)
+        persistence = rollout_state["persistence_horizon"].mean(dim=-2)
+        disturbance = rollout_state["disturbance_cost"].mean(dim=-1, keepdim=True)
+        reocclusion = rollout_state["reocclusion_logit"].sigmoid().mean(dim=-2)
+        uncertainty = rollout_state["uncertainty"].mean(dim=-1, keepdim=True)
+        return torch.cat(
+            [
+                candidate_mean,
+                candidate_terminal,
+                corridor_prob,
+                persistence,
+                disturbance,
+                reocclusion,
+                uncertainty,
+            ],
+            dim=-1,
         )
+    def score_rollouts(self, rollout_state: dict[str, Tensor], candidate_chunks: Tensor) -> dict[str, Tensor]:
+        features = self.summarize_candidates(candidate_chunks, rollout_state)
+        hidden = self.trunk(features)
+        success_logits = self.success_head(hidden).squeeze(-1)
+        risk_values = torch.sigmoid(self.risk_head(hidden)).squeeze(-1)
+        utility_scores = success_logits.sigmoid() - risk_values
+        return {
+            "planner_features": features,
+            "planner_hidden": hidden,
+            "success_logits": success_logits,
+            "risk_values": risk_values,
+            "utility_scores": utility_scores,
+        }
+    def select_best(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor]) -> dict[str, Tensor]:
+        outputs = self.score_rollouts(rollout_state=rollout_state, candidate_chunks=candidate_chunks)
+        best_idx = outputs["utility_scores"].argmax(dim=-1)
         batch_indices = torch.arange(candidate_chunks.shape[0], device=candidate_chunks.device)
         return {
+            **outputs,
             "best_indices": best_idx,
             "best_chunk": candidate_chunks[batch_indices, best_idx],
         }

code/reveal_vla_bimanual/models/policy.py CHANGED Viewed

@@ -9,6 +9,7 @@ from torch import Tensor, nn
 from models.action_decoder import ACTBimanualChunkDecoder, ChunkDecoderConfig
 from models.backbones import FrozenVLBackbone, FrozenVLBackboneConfig
 from models.multiview_fusion import MultiViewFusion, MultiViewFusionConfig
 from models.planner import PlannerConfig, RevealPlanner
 from models.reveal_head import RevealHeadConfig, RevealStateHead
 from models.world_model import RevealWM, RevealWMConfig
@@ -18,6 +19,7 @@ from models.world_model import RevealWM, RevealWMConfig
 class PolicyConfig:
     backbone: FrozenVLBackboneConfig = field(default_factory=FrozenVLBackboneConfig)
     fusion: MultiViewFusionConfig = field(default_factory=MultiViewFusionConfig)
     decoder: ChunkDecoderConfig = field(default_factory=ChunkDecoderConfig)
     reveal_head: RevealHeadConfig = field(default_factory=RevealHeadConfig)
     world_model: RevealWMConfig = field(default_factory=RevealWMConfig)
@@ -30,6 +32,7 @@ class BackboneOnlyPolicy(nn.Module):
         self.config = config
         self.backbone = FrozenVLBackbone(config.backbone)
         self.fusion = MultiViewFusion(config.fusion)
         self.decoder = ACTBimanualChunkDecoder(config.decoder)
     def _encode_language(
@@ -58,16 +61,69 @@ class BackboneOnlyPolicy(nn.Module):
         text_tokens = self._encode_language(images, texts=texts, language_tokens=language_tokens)
         return self.fusion(image_tokens=image_tokens, proprio=proprio, language_tokens=text_tokens)
     def forward(
         self,
         images: Tensor,
         proprio: Tensor,
         texts: Sequence[str] | None = None,
         language_tokens: dict[str, Tensor] | None = None,
     ) -> dict[str, Tensor]:
         scene_tokens = self.encode_scene(images, proprio, texts=texts, language_tokens=language_tokens)
-        decoded = self.decoder(scene_tokens)
         decoded["scene_tokens"] = scene_tokens
         return decoded
@@ -84,21 +140,43 @@ class RevealBimanualPolicy(BackboneOnlyPolicy):
         proprio: Tensor,
         texts: Sequence[str] | None = None,
         language_tokens: dict[str, Tensor] | None = None,
         plan: bool = True,
         support_mode_conditioning: bool = True,
     ) -> dict[str, Tensor]:
-        outputs = super().forward(images, proprio, texts=texts, language_tokens=language_tokens)
-        reveal_state = self.reveal_head(outputs["scene_tokens"])
         outputs["reveal_state"] = reveal_state
-        candidate_chunks = self.decoder.sample_candidates(
-            outputs["action_mean"],
-            outputs["action_log_std"],
-            num_candidates=self.config.decoder.num_candidates,
         )
-        outputs["candidate_chunks"] = candidate_chunks
         if plan:
             batch_size, num_candidates, chunk_size, action_dim = candidate_chunks.shape
             flat_chunks = candidate_chunks.view(batch_size * num_candidates, chunk_size, action_dim)
             tiled_scene = outputs["scene_tokens"].unsqueeze(1).expand(-1, num_candidates, -1, -1)
@@ -118,10 +196,11 @@ class RevealBimanualPolicy(BackboneOnlyPolicy):
             selected = self.planner.select_best(
                 candidate_chunks=candidate_chunks,
                 rollout_state=reshaped_rollout,
-                proposal_scores=outputs["proposal_score"].unsqueeze(-1).expand(-1, num_candidates),
             )
             outputs["planned_rollout"] = reshaped_rollout
             outputs["planned_chunk"] = selected["best_chunk"]
-            outputs["planner_scores"] = selected["scores"]
             outputs["best_candidate_indices"] = selected["best_indices"]
         return outputs

 from models.action_decoder import ACTBimanualChunkDecoder, ChunkDecoderConfig
 from models.backbones import FrozenVLBackbone, FrozenVLBackboneConfig
 from models.multiview_fusion import MultiViewFusion, MultiViewFusionConfig
+from models.observation_memory import ObservationMemory, ObservationMemoryConfig
 from models.planner import PlannerConfig, RevealPlanner
 from models.reveal_head import RevealHeadConfig, RevealStateHead
 from models.world_model import RevealWM, RevealWMConfig
 class PolicyConfig:
     backbone: FrozenVLBackboneConfig = field(default_factory=FrozenVLBackboneConfig)
     fusion: MultiViewFusionConfig = field(default_factory=MultiViewFusionConfig)
+    memory: ObservationMemoryConfig = field(default_factory=ObservationMemoryConfig)
     decoder: ChunkDecoderConfig = field(default_factory=ChunkDecoderConfig)
     reveal_head: RevealHeadConfig = field(default_factory=RevealHeadConfig)
     world_model: RevealWMConfig = field(default_factory=RevealWMConfig)
         self.config = config
         self.backbone = FrozenVLBackbone(config.backbone)
         self.fusion = MultiViewFusion(config.fusion)
+        self.memory = ObservationMemory(config.memory)
         self.decoder = ACTBimanualChunkDecoder(config.decoder)
     def _encode_language(
         text_tokens = self._encode_language(images, texts=texts, language_tokens=language_tokens)
         return self.fusion(image_tokens=image_tokens, proprio=proprio, language_tokens=text_tokens)
+    def _expand_language_tokens_for_history(
+        self,
+        language_tokens: dict[str, Tensor] | None,
+        history_steps: int,
+    ) -> dict[str, Tensor] | None:
+        if language_tokens is None:
+            return None
+        return {
+            key: value.unsqueeze(1).expand(-1, history_steps, *value.shape[1:]).reshape(
+                value.shape[0] * history_steps, *value.shape[1:]
+            )
+            for key, value in language_tokens.items()
+        }
+    def encode_history(
+        self,
+        history_images: Tensor | None,
+        history_proprio: Tensor | None,
+        texts: Sequence[str] | None = None,
+        language_tokens: dict[str, Tensor] | None = None,
+    ) -> Tensor | None:
+        if history_images is None or history_proprio is None or history_images.numel() == 0:
+            return None
+        batch_size, history_steps = history_images.shape[:2]
+        flat_images = history_images.reshape(batch_size * history_steps, *history_images.shape[2:])
+        flat_proprio = history_proprio.reshape(batch_size * history_steps, history_proprio.shape[-1])
+        if language_tokens is None:
+            if texts is None:
+                raise ValueError("Either texts or language_tokens must be provided.")
+            flat_texts = [text for text in texts for _ in range(history_steps)]
+            flat_language_tokens = None
+        else:
+            flat_texts = None
+            flat_language_tokens = self._expand_language_tokens_for_history(language_tokens, history_steps)
+        history_scene = self.encode_scene(
+            flat_images,
+            flat_proprio,
+            texts=flat_texts,
+            language_tokens=flat_language_tokens,
+        )
+        return history_scene.view(batch_size, history_steps, history_scene.shape[1], history_scene.shape[2])
     def forward(
         self,
         images: Tensor,
         proprio: Tensor,
         texts: Sequence[str] | None = None,
         language_tokens: dict[str, Tensor] | None = None,
+        history_images: Tensor | None = None,
+        history_proprio: Tensor | None = None,
     ) -> dict[str, Tensor]:
         scene_tokens = self.encode_scene(images, proprio, texts=texts, language_tokens=language_tokens)
+        history_scene_tokens = self.encode_history(
+            history_images,
+            history_proprio,
+            texts=texts,
+            language_tokens=language_tokens,
+        )
+        memory_output = self.memory(scene_tokens, history_scene_tokens=history_scene_tokens)
+        decoded = self.decoder(scene_tokens, memory_token=memory_output["memory_token"])
         decoded["scene_tokens"] = scene_tokens
+        decoded["history_scene_tokens"] = history_scene_tokens
+        decoded["memory_output"] = memory_output
         return decoded
         proprio: Tensor,
         texts: Sequence[str] | None = None,
         language_tokens: dict[str, Tensor] | None = None,
+        history_images: Tensor | None = None,
+        history_proprio: Tensor | None = None,
         plan: bool = True,
         support_mode_conditioning: bool = True,
+        candidate_chunks_override: Tensor | None = None,
     ) -> dict[str, Tensor]:
+        outputs = super().forward(
+            images,
+            proprio,
+            texts=texts,
+            language_tokens=language_tokens,
+            history_images=history_images,
+            history_proprio=history_proprio,
+        )
+        reveal_state = self.reveal_head(
+            outputs["scene_tokens"],
+            memory_token=outputs["memory_output"]["memory_token"],
+        )
         outputs["reveal_state"] = reveal_state
+        outputs["memory_uncertainty"] = outputs["memory_output"]["memory_uncertainty"]
+        decoded = self.decoder(
+            outputs["scene_tokens"],
+            reveal_tokens=reveal_state["field_tokens"],
+            memory_token=outputs["memory_output"]["memory_token"],
         )
+        outputs.update(decoded)
         if plan:
+            candidate_chunks = candidate_chunks_override
+            if candidate_chunks is None:
+                candidate_chunks = self.decoder.sample_candidates(
+                    outputs["action_mean"],
+                    outputs["action_log_std"],
+                    num_candidates=self.config.decoder.num_candidates,
+                )
+            outputs["candidate_chunks"] = candidate_chunks
             batch_size, num_candidates, chunk_size, action_dim = candidate_chunks.shape
             flat_chunks = candidate_chunks.view(batch_size * num_candidates, chunk_size, action_dim)
             tiled_scene = outputs["scene_tokens"].unsqueeze(1).expand(-1, num_candidates, -1, -1)
             selected = self.planner.select_best(
                 candidate_chunks=candidate_chunks,
                 rollout_state=reshaped_rollout,
             )
             outputs["planned_rollout"] = reshaped_rollout
             outputs["planned_chunk"] = selected["best_chunk"]
+            outputs["planner_success_logits"] = selected["success_logits"]
+            outputs["planner_risk_values"] = selected["risk_values"]
+            outputs["planner_scores"] = selected["utility_scores"]
             outputs["best_candidate_indices"] = selected["best_indices"]
         return outputs

code/reveal_vla_bimanual/models/reveal_head.py CHANGED Viewed

@@ -2,6 +2,8 @@ from __future__ import annotations
 from dataclasses import dataclass
 from torch import Tensor, nn
@@ -12,6 +14,8 @@ class RevealHeadConfig:
     num_approach_templates: int = 32
     rollout_horizon: int = 5
     belief_map_size: int = 32
     predict_belief_map: bool = False
@@ -19,37 +23,96 @@ class RevealStateHead(nn.Module):
     def __init__(self, config: RevealHeadConfig) -> None:
         super().__init__()
         self.config = config
-        self.trunk = nn.Sequential(
             nn.LayerNorm(config.hidden_dim),
             nn.Linear(config.hidden_dim, config.hidden_dim),
             nn.GELU(),
         )
-        self.support_mode = nn.Linear(config.hidden_dim, config.num_support_modes)
-        self.corridor = nn.Linear(
-            config.hidden_dim,
-            config.num_support_modes * config.num_approach_templates,
         )
-        self.persistence = nn.Linear(config.hidden_dim, config.num_support_modes)
-        self.disturbance = nn.Linear(config.hidden_dim, 1)
-        self.belief_map = None
-        if config.predict_belief_map:
-            map_side = config.belief_map_size
-            self.belief_map = nn.Linear(config.hidden_dim, map_side * map_side)
-    def forward(self, scene_tokens: Tensor) -> dict[str, Tensor]:
-        pooled = scene_tokens.mean(dim=1)
-        hidden = self.trunk(pooled)
         output = {
-            "support_mode_logits": self.support_mode(hidden),
-            "corridor_logits": self.corridor(hidden).view(
-                hidden.shape[0],
-                self.config.num_support_modes,
-                self.config.num_approach_templates,
-            ),
-            "persistence_horizon": self.persistence(hidden),
-            "disturbance_cost": self.disturbance(hidden).squeeze(-1),
         }
-        if self.belief_map is not None:
-            side = self.config.belief_map_size
-            output["belief_map"] = self.belief_map(hidden).view(hidden.shape[0], 1, side, side)
         return output

 from dataclasses import dataclass
+import torch
+import torch.nn.functional as F
 from torch import Tensor, nn
     num_approach_templates: int = 32
     rollout_horizon: int = 5
     belief_map_size: int = 32
+    field_size: int = 16
+    num_heads: int = 4
     predict_belief_map: bool = False
     def __init__(self, config: RevealHeadConfig) -> None:
         super().__init__()
         self.config = config
+        self.field_queries = nn.Parameter(
+            torch.randn(config.field_size * config.field_size, config.hidden_dim) * 0.02
+        )
+        self.field_attention = nn.MultiheadAttention(
+            embed_dim=config.hidden_dim,
+            num_heads=config.num_heads,
+            batch_first=True,
+        )
+        self.field_mlp = nn.Sequential(
             nn.LayerNorm(config.hidden_dim),
             nn.Linear(config.hidden_dim, config.hidden_dim),
             nn.GELU(),
+            nn.Linear(config.hidden_dim, config.hidden_dim),
         )
+        self.support_mode = nn.Sequential(
+            nn.LayerNorm(config.hidden_dim * 3),
+            nn.Linear(config.hidden_dim * 3, config.hidden_dim),
+            nn.GELU(),
+            nn.Linear(config.hidden_dim, config.num_support_modes),
+        )
+        self.access_field = nn.Conv2d(config.hidden_dim, config.num_support_modes, kernel_size=1)
+        self.persistence_field = nn.Conv2d(config.hidden_dim, config.num_support_modes, kernel_size=1)
+        self.disturbance_field = nn.Conv2d(config.hidden_dim, 1, kernel_size=1)
+        self.uncertainty_field = nn.Conv2d(config.hidden_dim, 1, kernel_size=1)
+        self.reocclusion_head = nn.Sequential(
+            nn.LayerNorm(config.hidden_dim * 2),
+            nn.Linear(config.hidden_dim * 2, config.hidden_dim),
+            nn.GELU(),
+            nn.Linear(config.hidden_dim, config.num_support_modes),
+        )
+        self.latent_summary = nn.Sequential(
+            nn.LayerNorm(config.hidden_dim * 2),
+            nn.Linear(config.hidden_dim * 2, config.hidden_dim),
+            nn.GELU(),
         )
+    def forward(self, scene_tokens: Tensor, memory_token: Tensor | None = None) -> dict[str, Tensor]:
+        source_tokens = scene_tokens if memory_token is None else torch.cat([scene_tokens, memory_token], dim=1)
+        batch_size = source_tokens.shape[0]
+        field_queries = self.field_queries.unsqueeze(0).expand(batch_size, -1, -1)
+        field_tokens, _ = self.field_attention(field_queries, source_tokens, source_tokens)
+        field_tokens = field_tokens + self.field_mlp(field_tokens)
+        side = self.config.field_size
+        grid = field_tokens.transpose(1, 2).reshape(batch_size, self.config.hidden_dim, side, side)
+        pooled_scene = scene_tokens.mean(dim=1)
+        pooled_field = field_tokens.mean(dim=1)
+        if memory_token is not None:
+            pooled_memory = memory_token.squeeze(1)
+        else:
+            pooled_memory = pooled_scene.new_zeros(pooled_scene.shape)
+        support_input = torch.cat([pooled_scene, pooled_field, pooled_memory], dim=-1)
+        access_field = self.access_field(grid)
+        persistence_field = torch.sigmoid(self.persistence_field(grid))
+        disturbance_field = torch.sigmoid(self.disturbance_field(grid))
+        uncertainty_field = F.softplus(self.uncertainty_field(grid))
+        corridor_source = access_field.amax(dim=-2)
+        corridor_logits = F.interpolate(
+            corridor_source,
+            size=self.config.num_approach_templates,
+            mode="linear",
+            align_corners=False,
+        )
+        access_prob = torch.sigmoid(access_field)
+        weighted_persistence = (persistence_field * access_prob).sum(dim=(-1, -2))
+        access_mass = access_prob.sum(dim=(-1, -2)).clamp_min(1e-4)
+        persistence_horizon = self.config.rollout_horizon * weighted_persistence / access_mass
+        disturbance_cost = disturbance_field.mean(dim=(-1, -2)).squeeze(1)
+        belief_map = access_field.max(dim=1, keepdim=True).values
+        if belief_map.shape[-1] != self.config.belief_map_size:
+            belief_map = F.interpolate(
+                belief_map,
+                size=(self.config.belief_map_size, self.config.belief_map_size),
+                mode="bilinear",
+                align_corners=False,
+            )
+        latent_summary = self.latent_summary(torch.cat([pooled_scene, pooled_field], dim=-1))
         output = {
+            "support_mode_logits": self.support_mode(support_input),
+            "corridor_logits": corridor_logits,
+            "persistence_horizon": persistence_horizon,
+            "disturbance_cost": disturbance_cost,
+            "access_field": access_field,
+            "persistence_field": persistence_field,
+            "disturbance_field": disturbance_field,
+            "uncertainty_field": uncertainty_field,
+            "field_tokens": field_tokens,
+            "latent_summary": latent_summary,
+            "reocclusion_logit": self.reocclusion_head(torch.cat([pooled_field, pooled_memory], dim=-1)),
+            "persistence_uncertainty": uncertainty_field.mean(dim=(-1, -2)).squeeze(1),
         }
+        if self.config.predict_belief_map:
+            output["belief_map"] = belief_map
         return output

code/reveal_vla_bimanual/models/world_model.py CHANGED Viewed

@@ -24,6 +24,9 @@ class RevealWM(nn.Module):
             + config.num_support_modes * config.num_approach_templates
             + config.num_support_modes
             + 1
         )
         self.initial = nn.Sequential(
             nn.LayerNorm(config.hidden_dim + reveal_dim),
@@ -39,6 +42,8 @@ class RevealWM(nn.Module):
         )
         self.persistence = nn.Linear(config.hidden_dim, config.num_support_modes)
         self.disturbance = nn.Linear(config.hidden_dim, 1)
     def _flatten_reveal(self, reveal_state: dict[str, Tensor]) -> Tensor:
         return torch.cat(
@@ -47,6 +52,9 @@ class RevealWM(nn.Module):
                 reveal_state["corridor_logits"].flatten(start_dim=1),
                 reveal_state["persistence_horizon"],
                 reveal_state["disturbance_cost"].unsqueeze(-1),
             ],
             dim=-1,
         )
@@ -67,4 +75,6 @@ class RevealWM(nn.Module):
             ),
             "persistence_horizon": self.persistence(rollout),
             "disturbance_cost": self.disturbance(rollout).squeeze(-1),
         }

             + config.num_support_modes * config.num_approach_templates
             + config.num_support_modes
             + 1
+            + config.num_support_modes
+            + 1
+            + config.hidden_dim
         )
         self.initial = nn.Sequential(
             nn.LayerNorm(config.hidden_dim + reveal_dim),
         )
         self.persistence = nn.Linear(config.hidden_dim, config.num_support_modes)
         self.disturbance = nn.Linear(config.hidden_dim, 1)
+        self.reocclusion = nn.Linear(config.hidden_dim, config.num_support_modes)
+        self.uncertainty = nn.Linear(config.hidden_dim, 1)
     def _flatten_reveal(self, reveal_state: dict[str, Tensor]) -> Tensor:
         return torch.cat(
                 reveal_state["corridor_logits"].flatten(start_dim=1),
                 reveal_state["persistence_horizon"],
                 reveal_state["disturbance_cost"].unsqueeze(-1),
+                reveal_state["reocclusion_logit"],
+                reveal_state["persistence_uncertainty"].unsqueeze(-1),
+                reveal_state["latent_summary"],
             ],
             dim=-1,
         )
             ),
             "persistence_horizon": self.persistence(rollout),
             "disturbance_cost": self.disturbance(rollout).squeeze(-1),
+            "reocclusion_logit": self.reocclusion(rollout),
+            "uncertainty": torch.nn.functional.softplus(self.uncertainty(rollout)).squeeze(-1),
         }

code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/PKG-INFO CHANGED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: reveal-vla-bimanual
 Version: 0.1.0
-Summary: Simulation-first bimanual reveal-and-retrieve prototype
 Requires-Python: <3.11,>=3.10
 Description-Content-Type: text/markdown
 Requires-Dist: accelerate>=0.31.0
@@ -21,7 +21,9 @@ Requires-Dist: transformers>=4.41.0
 # reveal_vla_bimanual
-Simulation-first prototype for bimanual reveal-and-retrieve under elastic occlusion.
 This repo is structured around five top-level modules:
@@ -38,3 +40,66 @@ Current bootstrap priorities:
 3. Add reveal-state supervision and short-horizon planning for synthetic reveal proxies.
 Upstream dependencies are kept in `/workspace/third_party` and pinned in `docs/upstream_pins.md`.

 Metadata-Version: 2.4
 Name: reveal-vla-bimanual
 Version: 0.1.0
+Summary: Language-conditioned bimanual reveal-and-retrieve policy prototype
 Requires-Python: <3.11,>=3.10
 Description-Content-Type: text/markdown
 Requires-Dist: accelerate>=0.31.0
 # reveal_vla_bimanual
+Simulation-first prototype for a language-conditioned bimanual reveal-and-retrieve policy under elastic occlusion.
+This repo is not a generalist VLA backbone in the RT-2 / OpenVLA / Octo sense. The current contribution is the reveal-state machinery layered on top of a frozen vision-language encoder.
 This repo is structured around five top-level modules:
 3. Add reveal-state supervision and short-horizon planning for synthetic reveal proxies.
 Upstream dependencies are kept in `/workspace/third_party` and pinned in `docs/upstream_pins.md`.
+## RLBench env A
+The RLBench / PerAct2 stack is pinned to Python 3.10 and lives in `/workspace/envs/rlbench`.
+Bring it up with:
+```bash
+/workspace/reveal_vla_bimanual/scripts/setup_env_a_rlbench.sh
+/workspace/reveal_vla_bimanual/scripts/setup_rlbench_headless_x.sh
+/workspace/reveal_vla_bimanual/scripts/start_rlbench_x.sh
+```
+Verify GPU GL on the headless display:
+```bash
+DISPLAY=:99 glxinfo -B
+```
+Run the RLBench launch/reset/step smoke test:
+```bash
+env \
+  DISPLAY=:99 \
+  XDG_RUNTIME_DIR=/tmp/runtime-root \
+  COPPELIASIM_ROOT=/workspace/assets/coppeliasim_v4_1_0 \
+  LD_LIBRARY_PATH=/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu:/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu/nvidia:/workspace/assets/coppeliasim_v4_1_0 \
+  QT_QPA_PLATFORM_PLUGIN_PATH=/workspace/assets/coppeliasim_v4_1_0 \
+  /workspace/.tools/micromamba/bin/micromamba run \
+    -r /workspace/.micromamba \
+    -p /workspace/envs/rlbench \
+    python -m sim_rlbench.launch_smoke --headless
+```
+The working benchmark interface is fixed to three cameras only:
+- `front`
+- `wrist_left`
+- `wrist_right`
+The smoke test covers launch, bimanual task reset, canonical observation extraction, and one bimanual action step in `headless=True`, which is the same mode used by the upstream PerAct2-style training stack.
+Generate the PerAct2-compatible train command for the fixed 3-camera interface with:
+```bash
+micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
+  python -m sim_rlbench.smoke_test --print-train-command
+```
+Download the published PerAct2 demos into `/workspace/data/rlbench2` with checksum verification:
+```bash
+micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
+  python -m sim_rlbench.dataset_download --resolution 256 --splits train
+```
+If you want the archives unpacked directly into the demo root expected by RLBench, add `--extract`:
+```bash
+apt-get install -y squashfs-tools
+micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
+  python -m sim_rlbench.dataset_download --resolution 256 --splits train --extract
+```

code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/SOURCES.txt CHANGED Viewed

@@ -4,10 +4,14 @@ eval/__init__.py
 eval/ablations.py
 eval/metrics.py
 eval/report.py
 models/__init__.py
 models/action_decoder.py
 models/backbones.py
 models/multiview_fusion.py
 models/planner.py
 models/policy.py
 models/reveal_head.py
@@ -21,15 +25,26 @@ reveal_vla_bimanual.egg-info/requires.txt
 reveal_vla_bimanual.egg-info/top_level.txt
 sim_reveal/__init__.py
 sim_reveal/base.py
 sim_reveal/labels.py
 sim_reveal/proxy_specs.py
 sim_reveal/teachers.py
 sim_rlbench/__init__.py
 sim_rlbench/camera_spec.py
 sim_rlbench/obs_adapter.py
 sim_rlbench/peract2_runner.py
 sim_rlbench/smoke_test.py
 sim_rlbench/task_splits.py
 train/__init__.py
 train/losses.py
 train/trainer.py

 eval/ablations.py
 eval/metrics.py
 eval/report.py
+eval/run_ablations.py
+eval/run_reveal_benchmark.py
+eval/run_rlbench_rollout_eval.py
 models/__init__.py
 models/action_decoder.py
 models/backbones.py
 models/multiview_fusion.py
+models/observation_memory.py
 models/planner.py
 models/policy.py
 models/reveal_head.py
 reveal_vla_bimanual.egg-info/top_level.txt
 sim_reveal/__init__.py
 sim_reveal/base.py
+sim_reveal/dataset.py
+sim_reveal/generate_dataset.py
+sim_reveal/isaac_smoke.py
+sim_reveal/isaac_wrapper.py
 sim_reveal/labels.py
+sim_reveal/procedural_envs.py
 sim_reveal/proxy_specs.py
 sim_reveal/teachers.py
 sim_rlbench/__init__.py
 sim_rlbench/camera_spec.py
+sim_rlbench/dataset.py
+sim_rlbench/dataset_download.py
+sim_rlbench/generate_smoke_dataset.py
+sim_rlbench/launch_smoke.py
 sim_rlbench/obs_adapter.py
 sim_rlbench/peract2_runner.py
 sim_rlbench/smoke_test.py
 sim_rlbench/task_splits.py
 train/__init__.py
 train/losses.py
+train/run_experiment.py
+train/run_rlbench_experiment.py
 train/trainer.py

code/reveal_vla_bimanual/scripts/start_rlbench_x.sh CHANGED Viewed

@@ -8,11 +8,12 @@ DISPLAY_ID=":${DISPLAY_NUM}"
 LOG_DIR="${ROOT_DIR}/logs"
 LOG_FILE="${LOG_DIR}/x${DISPLAY_NUM}.log"
 PID_FILE="${LOG_DIR}/x${DISPLAY_NUM}.pid"
 DRIVER_VERSION="${NVIDIA_DRIVER_VERSION:-$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1)}"
 DRIVER_BRANCH="${DRIVER_VERSION%%.*}"
 SHIM_ROOT="${ROOT_DIR}/system_shims/nvidia${DRIVER_BRANCH}"
-XORG_CONF="${PROJECT_DIR}/docs/xorg.rtx6000.conf"
 MODULE_PATH="${SHIM_ROOT}/usr/lib/x86_64-linux-gnu/nvidia/xorg,/usr/lib/xorg/modules"
 SHIM_LD_PATH="${SHIM_ROOT}/usr/lib/x86_64-linux-gnu:${SHIM_ROOT}/usr/lib/x86_64-linux-gnu/nvidia"
 XORG_BIN="${XORG_BIN:-$(command -v Xorg || true)}"
@@ -22,6 +23,33 @@ fi
 mkdir -p "${LOG_DIR}"
 find_x_pid() {
   ps -eo pid=,args= | awk -v display="${DISPLAY_ID}" -v conf="${XORG_CONF}" '
     $0 ~ display && $0 ~ conf && $0 ~ /(^|[[:space:]])([^[:space:]]*\/)?Xorg([[:space:]]|$)/ {
@@ -37,6 +65,19 @@ if [[ ! -f "${SHIM_ROOT}/usr/lib/x86_64-linux-gnu/nvidia/xorg/libglxserver_nvidi
   exit 1
 fi
 PID="$(find_x_pid || true)"
 if [[ -n "${PID}" ]]; then
   echo "X already running on ${DISPLAY_ID}"

 LOG_DIR="${ROOT_DIR}/logs"
 LOG_FILE="${LOG_DIR}/x${DISPLAY_NUM}.log"
 PID_FILE="${LOG_DIR}/x${DISPLAY_NUM}.pid"
+XORG_TEMPLATE="${PROJECT_DIR}/docs/xorg.rtx6000.conf"
+XORG_CONF="${LOG_DIR}/x${DISPLAY_NUM}.conf"
 DRIVER_VERSION="${NVIDIA_DRIVER_VERSION:-$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1)}"
 DRIVER_BRANCH="${DRIVER_VERSION%%.*}"
 SHIM_ROOT="${ROOT_DIR}/system_shims/nvidia${DRIVER_BRANCH}"
 MODULE_PATH="${SHIM_ROOT}/usr/lib/x86_64-linux-gnu/nvidia/xorg,/usr/lib/xorg/modules"
 SHIM_LD_PATH="${SHIM_ROOT}/usr/lib/x86_64-linux-gnu:${SHIM_ROOT}/usr/lib/x86_64-linux-gnu/nvidia"
 XORG_BIN="${XORG_BIN:-$(command -v Xorg || true)}"
 mkdir -p "${LOG_DIR}"
+derive_bus_id() {
+  local raw_bus_id="${XORG_BUS_ID_RAW:-$(nvidia-smi --query-gpu=pci.bus_id --format=csv,noheader | head -n1 | tr -d ' ')}"
+  if [[ -n "${XORG_BUS_ID:-}" ]]; then
+    printf '%s\n' "${XORG_BUS_ID}"
+    return 0
+  fi
+  if [[ -z "${raw_bus_id}" ]]; then
+    return 1
+  fi
+  python3 - "${raw_bus_id}" <<'PY'
+import sys
+raw = sys.argv[1]
+_, bus, device_func = raw.split(":")
+device, function = device_func.split(".")
+bus = int(bus, 16)
+device = int(device, 16)
+function = int(function, 16)
+print(f"PCI:{bus}:{device}:{function}")
+PY
+}
+write_xorg_config() {
+  local bus_id="$1"
+  sed -E "s/BusID[[:space:]]+\"[^\"]+\"/BusID          \"${bus_id}\"/" "${XORG_TEMPLATE}" > "${XORG_CONF}"
+}
 find_x_pid() {
   ps -eo pid=,args= | awk -v display="${DISPLAY_ID}" -v conf="${XORG_CONF}" '
     $0 ~ display && $0 ~ conf && $0 ~ /(^|[[:space:]])([^[:space:]]*\/)?Xorg([[:space:]]|$)/ {
   exit 1
 fi
+if [[ ! -f "${XORG_TEMPLATE}" ]]; then
+  echo "missing Xorg template at ${XORG_TEMPLATE}" >&2
+  exit 1
+fi
+BUS_ID="$(derive_bus_id || true)"
+if [[ -z "${BUS_ID}" ]]; then
+  echo "failed to determine NVIDIA BusID from nvidia-smi" >&2
+  exit 1
+fi
+write_xorg_config "${BUS_ID}"
 PID="$(find_x_pid || true)"
 if [[ -n "${PID}" ]]; then
   echo "X already running on ${DISPLAY_ID}"

code/reveal_vla_bimanual/sim_reveal/dataset.py CHANGED Viewed

@@ -7,8 +7,33 @@ import torch
 from torch import Tensor
 from torch.utils.data import Dataset
 from sim_reveal.procedural_envs import available_proxy_names, make_proxy_env, render_views_from_state
 def collect_teacher_dataset(
     proxy_names: Sequence[str] | None = None,
@@ -17,6 +42,8 @@ def collect_teacher_dataset(
     seed: int = 0,
     chunk_horizon: int = 8,
     rollout_horizon: int = 5,
 ) -> dict[str, Any]:
     proxy_names = tuple(proxy_names or available_proxy_names())
     samples: list[dict[str, Any]] = []
@@ -32,19 +59,41 @@ def collect_teacher_dataset(
                 seed=seed + proxy_offset * 10_000 + episode_idx,
                 rollout_horizon=rollout_horizon,
             )
-            _, privileged_state = env.reset(seed=seed + proxy_offset * 10_000 + episode_idx)
             while True:
                 action_chunk, rollout = env.teacher_chunk_and_rollout(
                     chunk_horizon=chunk_horizon,
                     rollout_horizon=rollout_horizon,
                 )
                 samples.append(
                     {
                         "proxy_name": proxy_name,
                         "episode_id": episode_idx,
                         "render_state": env.render_state(privileged_state),
-                        "proprio": env.get_observation(privileged_state)["proprio"].astype("float32"),
-                        "language_goal": env.get_observation(privileged_state)["text"],
                         "action_chunk": action_chunk.astype("float32"),
                         "support_mode": int(privileged_state["support_mode"]),
                         "corridor_feasible": privileged_state["corridor_feasible"].astype("float32"),
@@ -55,10 +104,22 @@ def collect_teacher_dataset(
                         "rollout_corridor_feasible": rollout["rollout_corridor_feasible"].astype("float32"),
                         "rollout_persistence_horizon": rollout["rollout_persistence_horizon"].astype("float32"),
                         "rollout_disturbance_cost": rollout["rollout_disturbance_cost"].astype("float32"),
                     }
                 )
                 proxy_samples += 1
                 _, _, terminated, truncated, privileged_state = env.step(env.teacher_action())
                 if terminated:
                     proxy_success += 1
                 if terminated or truncated:
@@ -69,9 +130,12 @@ def collect_teacher_dataset(
             "teacher_success": proxy_success / float(max(1, episodes_per_proxy)),
         }
     return {
         "resolution": resolution,
         "chunk_horizon": chunk_horizon,
         "rollout_horizon": rollout_horizon,
         "samples": samples,
         "summary": summary,
     }
@@ -98,11 +162,29 @@ class RevealOfflineDataset(Dataset[dict[str, Any]]):
     def __getitem__(self, index: int) -> dict[str, Any]:
         sample = self.samples[index]
         images = render_views_from_state(
             proxy_name=sample["proxy_name"],
             render_state=sample["render_state"],
             resolution=self.resolution,
         )
         stacked = torch.from_numpy(
             torch.stack(
                 [
@@ -113,8 +195,14 @@ class RevealOfflineDataset(Dataset[dict[str, Any]]):
                 dim=0,
             ).numpy()
         ).permute(0, 3, 1, 2).float() / 255.0
         return {
             "images": stacked,
             "proprio": torch.as_tensor(sample["proprio"], dtype=torch.float32),
             "texts": sample["language_goal"],
             "action_chunk": torch.as_tensor(sample["action_chunk"], dtype=torch.float32),
@@ -127,6 +215,17 @@ class RevealOfflineDataset(Dataset[dict[str, Any]]):
             "rollout_corridor_feasible": torch.as_tensor(sample["rollout_corridor_feasible"], dtype=torch.float32),
             "rollout_persistence_horizon": torch.as_tensor(sample["rollout_persistence_horizon"], dtype=torch.float32),
             "rollout_disturbance_cost": torch.as_tensor(sample["rollout_disturbance_cost"], dtype=torch.float32),
             "proxy_name": sample["proxy_name"],
             "episode_id": sample["episode_id"],
         }

 from torch import Tensor
 from torch.utils.data import Dataset
+import numpy as np
 from sim_reveal.procedural_envs import available_proxy_names, make_proxy_env, render_views_from_state
+NOLEAK_PROXY_DATASET_VERSION = "reveal_proxy_v4_noleak_counterfactual"
+LEGACY_PRIVILEGED_RENDER_KEYS = frozenset(
+    {
+        "target_template",
+        "support_mode",
+        "visibility",
+        "actor_template",
+        "actor_progress",
+        "corridor_current",
+    }
+)
+def _assert_noleak_sample(sample: dict[str, Any]) -> None:
+    render_state = sample.get("render_state", {})
+    leaked_keys = sorted(LEGACY_PRIVILEGED_RENDER_KEYS.intersection(render_state))
+    if leaked_keys:
+        joined = ", ".join(leaked_keys)
+        raise ValueError(
+            "Legacy leaked proxy sample detected. Rebuild the dataset with the current "
+            f"sim_reveal/procedural_envs.py. Privileged render keys found: {joined}"
+        )
 def collect_teacher_dataset(
     proxy_names: Sequence[str] | None = None,
     seed: int = 0,
     chunk_horizon: int = 8,
     rollout_horizon: int = 5,
+    history_steps: int = 2,
+    planner_candidates: int = 4,
 ) -> dict[str, Any]:
     proxy_names = tuple(proxy_names or available_proxy_names())
     samples: list[dict[str, Any]] = []
                 seed=seed + proxy_offset * 10_000 + episode_idx,
                 rollout_horizon=rollout_horizon,
             )
+            observation, privileged_state = env.reset(seed=seed + proxy_offset * 10_000 + episode_idx)
+            history_buffer: list[dict[str, Any]] = []
             while True:
                 action_chunk, rollout = env.teacher_chunk_and_rollout(
                     chunk_horizon=chunk_horizon,
                     rollout_horizon=rollout_horizon,
                 )
+                observation = env.get_observation(privileged_state)
+                candidate_action_chunks, candidate_outcomes = env.sample_candidate_action_chunks(
+                    teacher_chunk=action_chunk,
+                    num_candidates=planner_candidates,
+                    rollout_horizon=rollout_horizon,
+                )
+                padded_history_render_states = []
+                padded_history_proprio = []
+                history_count = min(history_steps, len(history_buffer))
+                pad_count = history_steps - history_count
+                if history_count > 0:
+                    recent_history = history_buffer[-history_count:]
+                else:
+                    recent_history = []
+                for _ in range(pad_count):
+                    padded_history_render_states.append(env.render_state(privileged_state))
+                    padded_history_proprio.append(np.zeros_like(observation["proprio"], dtype=np.float32))
+                for item in recent_history:
+                    padded_history_render_states.append(item["render_state"])
+                    padded_history_proprio.append(item["proprio"])
                 samples.append(
                     {
+                        "dataset_version": NOLEAK_PROXY_DATASET_VERSION,
                         "proxy_name": proxy_name,
                         "episode_id": episode_idx,
                         "render_state": env.render_state(privileged_state),
+                        "proprio": observation["proprio"].astype("float32"),
+                        "language_goal": observation["text"],
                         "action_chunk": action_chunk.astype("float32"),
                         "support_mode": int(privileged_state["support_mode"]),
                         "corridor_feasible": privileged_state["corridor_feasible"].astype("float32"),
                         "rollout_corridor_feasible": rollout["rollout_corridor_feasible"].astype("float32"),
                         "rollout_persistence_horizon": rollout["rollout_persistence_horizon"].astype("float32"),
                         "rollout_disturbance_cost": rollout["rollout_disturbance_cost"].astype("float32"),
+                        "history_render_states": padded_history_render_states,
+                        "history_proprio": np.stack(padded_history_proprio, axis=0).astype("float32")
+                        if padded_history_proprio
+                        else np.zeros((0, observation["proprio"].shape[0]), dtype=np.float32),
+                        "candidate_action_chunks": candidate_action_chunks.astype("float32"),
+                        **candidate_outcomes,
                     }
                 )
                 proxy_samples += 1
                 _, _, terminated, truncated, privileged_state = env.step(env.teacher_action())
+                history_buffer.append(
+                    {
+                        "render_state": env.render_state(privileged_state),
+                        "proprio": env.get_observation(privileged_state)["proprio"].astype("float32"),
+                    }
+                )
                 if terminated:
                     proxy_success += 1
                 if terminated or truncated:
             "teacher_success": proxy_success / float(max(1, episodes_per_proxy)),
         }
     return {
+        "dataset_version": NOLEAK_PROXY_DATASET_VERSION,
         "resolution": resolution,
         "chunk_horizon": chunk_horizon,
         "rollout_horizon": rollout_horizon,
+        "history_steps": history_steps,
+        "planner_candidates": planner_candidates,
         "samples": samples,
         "summary": summary,
     }
     def __getitem__(self, index: int) -> dict[str, Any]:
         sample = self.samples[index]
+        _assert_noleak_sample(sample)
         images = render_views_from_state(
             proxy_name=sample["proxy_name"],
             render_state=sample["render_state"],
             resolution=self.resolution,
         )
+        history_images = []
+        for history_state in sample.get("history_render_states", []):
+            rendered = render_views_from_state(
+                proxy_name=sample["proxy_name"],
+                render_state=history_state,
+                resolution=self.resolution,
+            )
+            history_images.append(
+                torch.stack(
+                    [
+                        torch.from_numpy(rendered["front"]),
+                        torch.from_numpy(rendered["wrist_left"]),
+                        torch.from_numpy(rendered["wrist_right"]),
+                    ],
+                    dim=0,
+                )
+            )
         stacked = torch.from_numpy(
             torch.stack(
                 [
                 dim=0,
             ).numpy()
         ).permute(0, 3, 1, 2).float() / 255.0
+        if history_images:
+            history_stacked = torch.stack(history_images, dim=0).permute(0, 1, 4, 2, 3).float() / 255.0
+        else:
+            history_stacked = torch.zeros((0, 3, 3, self.resolution, self.resolution), dtype=torch.float32)
         return {
             "images": stacked,
+            "history_images": history_stacked,
+            "history_proprio": torch.as_tensor(sample.get("history_proprio", []), dtype=torch.float32),
             "proprio": torch.as_tensor(sample["proprio"], dtype=torch.float32),
             "texts": sample["language_goal"],
             "action_chunk": torch.as_tensor(sample["action_chunk"], dtype=torch.float32),
             "rollout_corridor_feasible": torch.as_tensor(sample["rollout_corridor_feasible"], dtype=torch.float32),
             "rollout_persistence_horizon": torch.as_tensor(sample["rollout_persistence_horizon"], dtype=torch.float32),
             "rollout_disturbance_cost": torch.as_tensor(sample["rollout_disturbance_cost"], dtype=torch.float32),
+            "candidate_action_chunks": torch.as_tensor(sample["candidate_action_chunks"], dtype=torch.float32),
+            "candidate_rollout_support_mode": torch.as_tensor(sample["candidate_rollout_support_mode"], dtype=torch.long),
+            "candidate_rollout_corridor_feasible": torch.as_tensor(sample["candidate_rollout_corridor_feasible"], dtype=torch.float32),
+            "candidate_rollout_persistence_horizon": torch.as_tensor(sample["candidate_rollout_persistence_horizon"], dtype=torch.float32),
+            "candidate_rollout_disturbance_cost": torch.as_tensor(sample["candidate_rollout_disturbance_cost"], dtype=torch.float32),
+            "candidate_retrieval_success": torch.as_tensor(sample["candidate_retrieval_success"], dtype=torch.float32),
+            "candidate_final_disturbance_cost": torch.as_tensor(sample["candidate_final_disturbance_cost"], dtype=torch.float32),
+            "candidate_reocclusion_rate": torch.as_tensor(sample["candidate_reocclusion_rate"], dtype=torch.float32),
+            "candidate_visibility_integral": torch.as_tensor(sample["candidate_visibility_integral"], dtype=torch.float32),
+            "candidate_risk": torch.as_tensor(sample["candidate_risk"], dtype=torch.float32),
+            "candidate_utility": torch.as_tensor(sample["candidate_utility"], dtype=torch.float32),
             "proxy_name": sample["proxy_name"],
             "episode_id": sample["episode_id"],
         }

code/reveal_vla_bimanual/sim_reveal/generate_dataset.py CHANGED Viewed

@@ -15,6 +15,8 @@ def main() -> None:
     parser.add_argument("--seed", type=int, default=0)
     parser.add_argument("--chunk-horizon", type=int, default=8)
     parser.add_argument("--rollout-horizon", type=int, default=5)
     parser.add_argument("--output-path", default="/workspace/data/reveal_proxy/reveal_proxy_teacher.pt")
     args = parser.parse_args()
@@ -25,6 +27,8 @@ def main() -> None:
         seed=args.seed,
         chunk_horizon=args.chunk_horizon,
         rollout_horizon=args.rollout_horizon,
     )
     output_path = save_teacher_dataset(Path(args.output_path), dataset_bundle)
     payload = {

     parser.add_argument("--seed", type=int, default=0)
     parser.add_argument("--chunk-horizon", type=int, default=8)
     parser.add_argument("--rollout-horizon", type=int, default=5)
+    parser.add_argument("--history-steps", type=int, default=2)
+    parser.add_argument("--planner-candidates", type=int, default=4)
     parser.add_argument("--output-path", default="/workspace/data/reveal_proxy/reveal_proxy_teacher.pt")
     args = parser.parse_args()
         seed=args.seed,
         chunk_horizon=args.chunk_horizon,
         rollout_horizon=args.rollout_horizon,
+        history_steps=args.history_steps,
+        planner_candidates=args.planner_candidates,
     )
     output_path = save_teacher_dataset(Path(args.output_path), dataset_bundle)
     payload = {

code/reveal_vla_bimanual/sim_reveal/procedural_envs.py CHANGED Viewed

@@ -136,6 +136,12 @@ class ProceduralRevealEnv:
             "disturbance": self.disturbance,
             "target_template": self.target_template,
             "target_depth": self.target_depth,
             "holding": self.holding,
             "transferred": self.transferred,
             "retrieved": self.retrieved,
@@ -151,6 +157,12 @@ class ProceduralRevealEnv:
         self.disturbance = float(state["disturbance"])
         self.target_template = int(state["target_template"])
         self.target_depth = float(state["target_depth"])
         self.holding = bool(state["holding"])
         self.transferred = bool(state["transferred"])
         self.retrieved = bool(state["retrieved"])
@@ -167,6 +179,13 @@ class ProceduralRevealEnv:
         self.disturbance = float(self.rng.uniform(0.02, 0.12))
         self.target_template = int(self.rng.integers(4, self.num_templates - 4))
         self.target_depth = float(self.rng.uniform(0.15, 0.45))
         self.holding = False
         self.transferred = False
         self.retrieved = False
@@ -287,34 +306,25 @@ class ProceduralRevealEnv:
         }
     def render_state(self, privileged_state: dict[str, Any] | None = None) -> dict[str, Any]:
-        privileged_state = privileged_state or self.get_privileged_state()
-        current_mode = int(privileged_state["support_mode"])
         return {
             "opening": float(self.opening),
             "disturbance": float(self.disturbance),
-            "target_template": int(self.target_template),
-            "support_mode": current_mode,
-            "visibility": float(privileged_state["visibility"]),
-            "actor_template": int(self.last_actor_template),
-            "actor_progress": float(self.actor_progress),
-            "corridor_current": privileged_state["corridor_feasible"][current_mode].astype(np.float32),
             "step_fraction": float(self.step_count / max(1, self.max_steps)),
         }
     def _proprio(self, privileged_state: dict[str, Any]) -> np.ndarray:
-        mode = privileged_state["support_mode"]
         features = np.zeros((32,), dtype=np.float32)
-        features[0] = self.opening
-        features[1] = self.disturbance
-        features[2] = privileged_state["visibility"]
-        features[3 + mode] = 1.0
-        features[6] = self.target_template / float(self.num_templates - 1)
-        features[7] = self.last_actor_template / float(self.num_templates - 1)
-        features[8] = self.step_count / float(max(1, self.max_steps))
-        features[9:12] = privileged_state["persistence_horizon"] / float(self.rollout_horizon)
-        features[12] = float(privileged_state["corridor_feasible"][mode].any())
-        features[13] = float(self.retrieved)
-        features[14] = self.actor_progress
         return features
     def get_observation(self, privileged_state: dict[str, Any] | None = None) -> dict[str, Any]:
@@ -331,7 +341,6 @@ class ProceduralRevealEnv:
             "proprio": self._proprio(privileged_state),
             "text": PROXY_GOALS[self.proxy_name],
             "camera_names": self.camera_names,
-            "render_state": render_state,
         }
     def teacher_action(self) -> np.ndarray:
@@ -402,6 +411,105 @@ class ProceduralRevealEnv:
             "rollout_disturbance_cost": np.asarray(rollout_disturbance, dtype=np.float32),
         }
     def step(self, action: np.ndarray) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
         action = np.asarray(action, dtype=np.float32)
         mode = self._mode_from_action(action)
@@ -483,12 +591,13 @@ def render_views_from_state(
     dynamics = PROXY_DYNAMICS[proxy_name]
     opening = float(render_state["opening"])
     disturbance = float(render_state["disturbance"])
-    target_template = int(render_state["target_template"])
-    support_mode = int(render_state["support_mode"])
-    visibility = float(render_state["visibility"])
-    actor_template = int(render_state["actor_template"])
-    actor_progress = float(render_state["actor_progress"])
-    corridor_current = np.asarray(render_state["corridor_current"], dtype=np.float32)
     step_fraction = float(render_state["step_fraction"])
     height = width = resolution
@@ -498,44 +607,65 @@ def render_views_from_state(
     x = np.linspace(0.0, 1.0, width, dtype=np.float32)
     y = np.linspace(0.0, 1.0, height, dtype=np.float32)
     yy, xx = np.meshgrid(y, x, indexing="ij")
-    center_x = target_template / float(max(1, num_templates - 1))
-    gap_width = 0.04 + 0.18 * opening
-    gap_mask = np.abs(xx - center_x) <= gap_width
-    stripe_mask = (np.sin(xx * np.pi * 18.0) > 0.2).astype(np.float32)
     front = base.copy()
-    front[..., 1] += 0.22 * stripe_mask
-    front[..., 0] += 0.07 * stripe_mask
-    front[gap_mask, :] = np.clip(front[gap_mask, :] + np.asarray([0.18, 0.18, 0.18], dtype=np.float32), 0.0, 1.0)
-    target_mask = ((xx - center_x) ** 2 + (yy - 0.76) ** 2) <= (0.03 + 0.015 * visibility) ** 2
-    front[target_mask, 0] = np.clip(front[target_mask, 0] + 0.55 * visibility, 0.0, 1.0)
-    front[target_mask, 1] *= 0.55
-    front[..., 2] = np.clip(front[..., 2] + 0.18 * disturbance + 0.05 * step_fraction, 0.0, 1.0)
     wrist_left = np.full((height, width, 3), 0.12, dtype=np.float32)
-    open_rows = int(opening * height)
-    wrist_left[height - open_rows :, : width // 3, 1] = 0.75
-    wrist_left[height - int(disturbance * height) :, width // 3 : (2 * width) // 3, 0] = 0.85
-    mode_colors = {
-        SupportMode.HOLD: np.asarray([0.92, 0.82, 0.16], dtype=np.float32),
-        SupportMode.TRANSFER: np.asarray([0.16, 0.78, 0.92], dtype=np.float32),
-        SupportMode.PASSIVE: np.asarray([0.86, 0.86, 0.86], dtype=np.float32),
-    }
-    wrist_left[:, (2 * width) // 3 :, :] = mode_colors[SupportMode(support_mode)]
     wrist_right = np.full((height, width, 3), 0.08, dtype=np.float32)
-    template_edges = np.linspace(0, width, num_templates + 1, dtype=np.int32)
-    for template_idx in range(num_templates):
-        col_start = template_edges[template_idx]
-        col_end = template_edges[template_idx + 1]
-        if corridor_current[template_idx] > 0.5:
-            wrist_right[:, col_start:col_end, 1] = 0.70
-        if template_idx == target_template:
-            wrist_right[:, col_start:col_end, 0] = 0.78
-        if template_idx == actor_template:
-            wrist_right[:, col_start:col_end, 2] = 0.90
-    wrist_right[: max(1, int(visibility * height)), :, :] += 0.10
-    wrist_right[height - max(1, int(actor_progress * height)) :, :, 2] += 0.12
     wrist_right = np.clip(wrist_right, 0.0, 1.0)
     return {

             "disturbance": self.disturbance,
             "target_template": self.target_template,
             "target_depth": self.target_depth,
+            "target_center": self.target_center,
+            "target_radius": self.target_radius,
+            "texture_phase": self.texture_phase,
+            "texture_scale": self.texture_scale,
+            "view_bias": self.view_bias,
+            "target_intensity": self.target_intensity,
             "holding": self.holding,
             "transferred": self.transferred,
             "retrieved": self.retrieved,
         self.disturbance = float(state["disturbance"])
         self.target_template = int(state["target_template"])
         self.target_depth = float(state["target_depth"])
+        self.target_center = float(state["target_center"])
+        self.target_radius = float(state["target_radius"])
+        self.texture_phase = float(state["texture_phase"])
+        self.texture_scale = float(state["texture_scale"])
+        self.view_bias = float(state["view_bias"])
+        self.target_intensity = float(state["target_intensity"])
         self.holding = bool(state["holding"])
         self.transferred = bool(state["transferred"])
         self.retrieved = bool(state["retrieved"])
         self.disturbance = float(self.rng.uniform(0.02, 0.12))
         self.target_template = int(self.rng.integers(4, self.num_templates - 4))
         self.target_depth = float(self.rng.uniform(0.15, 0.45))
+        base_center = self.target_template / float(max(1, self.num_templates - 1))
+        self.target_center = float(np.clip(base_center + self.rng.uniform(-0.01, 0.01), 0.06, 0.94))
+        self.target_radius = float(self.rng.uniform(0.022, 0.036))
+        self.texture_phase = float(self.rng.uniform(0.0, 2.0 * np.pi))
+        self.texture_scale = float(self.rng.uniform(0.85, 1.25))
+        self.view_bias = float(self.rng.uniform(-0.12, 0.12))
+        self.target_intensity = float(self.rng.uniform(0.45, 0.8))
         self.holding = False
         self.transferred = False
         self.retrieved = False
         }
     def render_state(self, privileged_state: dict[str, Any] | None = None) -> dict[str, Any]:
         return {
             "opening": float(self.opening),
             "disturbance": float(self.disturbance),
+            "target_center": float(self.target_center),
+            "target_depth": float(self.target_depth),
+            "target_radius": float(self.target_radius),
+            "texture_phase": float(self.texture_phase),
+            "texture_scale": float(self.texture_scale),
+            "view_bias": float(self.view_bias),
+            "target_intensity": float(self.target_intensity),
             "step_fraction": float(self.step_count / max(1, self.max_steps)),
         }
     def _proprio(self, privileged_state: dict[str, Any]) -> np.ndarray:
         features = np.zeros((32,), dtype=np.float32)
+        step_fraction = self.step_count / float(max(1, self.max_steps))
+        features[0] = step_fraction
+        features[1] = np.sin(np.pi * step_fraction)
+        features[2] = np.cos(np.pi * step_fraction)
         return features
     def get_observation(self, privileged_state: dict[str, Any] | None = None) -> dict[str, Any]:
             "proprio": self._proprio(privileged_state),
             "text": PROXY_GOALS[self.proxy_name],
             "camera_names": self.camera_names,
         }
     def teacher_action(self) -> np.ndarray:
             "rollout_disturbance_cost": np.asarray(rollout_disturbance, dtype=np.float32),
         }
+    def evaluate_action_chunk(
+        self,
+        action_chunk: np.ndarray,
+        rollout_horizon: int | None = None,
+    ) -> dict[str, np.ndarray | float]:
+        rollout_horizon = rollout_horizon or self.rollout_horizon
+        snapshot = self.clone_state()
+        rollout_support_mode: list[int] = []
+        rollout_corridor: list[np.ndarray] = []
+        rollout_persistence: list[np.ndarray] = []
+        rollout_disturbance: list[float] = []
+        corridor_open_trace = [float(self.get_privileged_state()["corridor_feasible"][self._current_support_mode()].any())]
+        visibility_trace = [float(self.get_privileged_state()["visibility"])]
+        terminated = False
+        truncated = False
+        privileged_state = self.get_privileged_state()
+        for step, action in enumerate(np.asarray(action_chunk, dtype=np.float32)):
+            _, _, terminated, truncated, privileged_state = self.step(action)
+            if step < rollout_horizon:
+                rollout_support_mode.append(int(privileged_state["support_mode"]))
+                rollout_corridor.append(privileged_state["corridor_feasible"].astype(np.float32))
+                rollout_persistence.append(privileged_state["persistence_horizon"].astype(np.float32))
+                rollout_disturbance.append(float(privileged_state["disturbance_cost"]))
+            corridor_open_trace.append(float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any()))
+            visibility_trace.append(float(privileged_state["visibility"]))
+            if terminated or truncated:
+                break
+        while len(rollout_support_mode) < rollout_horizon:
+            current = self.get_privileged_state()
+            rollout_support_mode.append(int(current["support_mode"]))
+            rollout_corridor.append(current["corridor_feasible"].astype(np.float32))
+            rollout_persistence.append(current["persistence_horizon"].astype(np.float32))
+            rollout_disturbance.append(float(current["disturbance_cost"]))
+        final_state = self.get_privileged_state()
+        reocclusion = float(
+            np.logical_and(
+                np.asarray(corridor_open_trace[:-1]) > 0.5,
+                np.asarray(corridor_open_trace[1:]) <= 0.5,
+            ).mean()
+        ) if len(corridor_open_trace) > 1 else 0.0
+        result: dict[str, np.ndarray | float] = {
+            "rollout_support_mode": np.asarray(rollout_support_mode, dtype=np.int64),
+            "rollout_corridor_feasible": np.asarray(rollout_corridor, dtype=np.float32),
+            "rollout_persistence_horizon": np.asarray(rollout_persistence, dtype=np.float32),
+            "rollout_disturbance_cost": np.asarray(rollout_disturbance, dtype=np.float32),
+            "retrieval_success": float(final_state["retrieval_success"]),
+            "final_disturbance_cost": float(final_state["disturbance_cost"]),
+            "reocclusion_rate": reocclusion,
+            "visibility_integral": float(np.sum(np.asarray(visibility_trace, dtype=np.float32))),
+        }
+        self.restore_state(snapshot)
+        return result
+    def sample_candidate_action_chunks(
+        self,
+        teacher_chunk: np.ndarray,
+        num_candidates: int = 4,
+        rollout_horizon: int | None = None,
+    ) -> tuple[np.ndarray, dict[str, np.ndarray]]:
+        rollout_horizon = rollout_horizon or self.rollout_horizon
+        teacher_chunk = np.asarray(teacher_chunk, dtype=np.float32)
+        candidates = [teacher_chunk.astype(np.float32)]
+        outcomes = [self.evaluate_action_chunk(teacher_chunk, rollout_horizon=rollout_horizon)]
+        for candidate_idx in range(1, num_candidates):
+            candidate = teacher_chunk.copy()
+            revealer_noise = self.rng.normal(loc=0.0, scale=0.20 + 0.03 * candidate_idx, size=candidate[:, :7].shape)
+            actor_noise = self.rng.normal(loc=0.0, scale=0.18 + 0.04 * candidate_idx, size=candidate[:, 7:].shape)
+            candidate[:, :7] = np.clip(candidate[:, :7] + revealer_noise.astype(np.float32), -1.0, 1.0)
+            candidate[:, 7:] = np.clip(candidate[:, 7:] + actor_noise.astype(np.float32), -1.0, 1.0)
+            candidates.append(candidate.astype(np.float32))
+            outcomes.append(self.evaluate_action_chunk(candidate, rollout_horizon=rollout_horizon))
+        stacked_outcomes = {
+            "candidate_rollout_support_mode": np.stack([item["rollout_support_mode"] for item in outcomes], axis=0).astype(np.int64),
+            "candidate_rollout_corridor_feasible": np.stack(
+                [item["rollout_corridor_feasible"] for item in outcomes], axis=0
+            ).astype(np.float32),
+            "candidate_rollout_persistence_horizon": np.stack(
+                [item["rollout_persistence_horizon"] for item in outcomes], axis=0
+            ).astype(np.float32),
+            "candidate_rollout_disturbance_cost": np.stack(
+                [item["rollout_disturbance_cost"] for item in outcomes], axis=0
+            ).astype(np.float32),
+            "candidate_retrieval_success": np.asarray([item["retrieval_success"] for item in outcomes], dtype=np.float32),
+            "candidate_final_disturbance_cost": np.asarray(
+                [item["final_disturbance_cost"] for item in outcomes], dtype=np.float32
+            ),
+            "candidate_reocclusion_rate": np.asarray([item["reocclusion_rate"] for item in outcomes], dtype=np.float32),
+            "candidate_visibility_integral": np.asarray([item["visibility_integral"] for item in outcomes], dtype=np.float32),
+        }
+        stacked_outcomes["candidate_risk"] = np.clip(
+            stacked_outcomes["candidate_final_disturbance_cost"] + stacked_outcomes["candidate_reocclusion_rate"],
+            0.0,
+            1.0,
+        ).astype(np.float32)
+        stacked_outcomes["candidate_utility"] = (
+            stacked_outcomes["candidate_retrieval_success"] - stacked_outcomes["candidate_risk"]
+        ).astype(np.float32)
+        return np.stack(candidates, axis=0).astype(np.float32), stacked_outcomes
     def step(self, action: np.ndarray) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
         action = np.asarray(action, dtype=np.float32)
         mode = self._mode_from_action(action)
     dynamics = PROXY_DYNAMICS[proxy_name]
     opening = float(render_state["opening"])
     disturbance = float(render_state["disturbance"])
+    target_center = float(render_state["target_center"])
+    target_depth = float(render_state["target_depth"])
+    target_radius = float(render_state["target_radius"])
+    texture_phase = float(render_state["texture_phase"])
+    texture_scale = float(render_state["texture_scale"])
+    view_bias = float(render_state["view_bias"])
+    target_intensity = float(render_state["target_intensity"])
     step_fraction = float(render_state["step_fraction"])
     height = width = resolution
     x = np.linspace(0.0, 1.0, width, dtype=np.float32)
     y = np.linspace(0.0, 1.0, height, dtype=np.float32)
     yy, xx = np.meshgrid(y, x, indexing="ij")
+    visibility = np.clip(
+        1.25 * opening - 0.68 * disturbance - 0.24 * target_depth + dynamics.visibility_bias,
+        0.0,
+        1.0,
+    )
+    target_y = 0.74 - 0.22 * target_depth
+    gap_width = np.clip(0.05 + 0.16 * opening - 0.08 * disturbance, 0.02, 0.24)
+    front_center = np.clip(target_center + 0.03 * view_bias, 0.06, 0.94)
+    left_center = np.clip(0.34 + 0.12 * (target_center - 0.5) - 0.05 * view_bias, 0.18, 0.52)
+    right_center = np.clip(0.66 + 0.18 * (target_center - 0.5) + 0.06 * view_bias, 0.42, 0.88)
+    surface_wave = 0.5 + 0.5 * np.sin((xx * (14.0 * texture_scale) + yy * 7.0) * np.pi + texture_phase)
+    weave_wave = 0.5 + 0.5 * np.cos((xx * 6.0 - yy * (10.0 + 2.0 * texture_scale)) * np.pi - 0.6 * texture_phase)
+    clutter = 0.65 * surface_wave + 0.35 * weave_wave
+    disturbance_map = disturbance * (
+        0.55 + 0.45 * np.sin((xx * 9.0 + yy * (12.0 + texture_scale)) * np.pi + 1.3 * texture_phase)
+    )
+    target_mask = ((xx - front_center) ** 2 + ((yy - target_y) / 1.2) ** 2) <= target_radius**2
     front = base.copy()
+    front *= (0.82 + 0.24 * clutter[..., None]).astype(np.float32)
+    occluder_profile = np.abs(xx - front_center) / gap_width + 0.55 * np.abs(yy - (0.56 + 0.08 * view_bias))
+    gap_mask = occluder_profile <= (1.15 + 0.35 * opening)
+    front[gap_mask] = np.clip(front[gap_mask] + np.asarray([0.14, 0.16, 0.14], dtype=np.float32), 0.0, 1.0)
+    target_rgb = np.asarray([0.78, 0.74, 0.58], dtype=np.float32) * target_intensity
+    front[target_mask] = np.clip(
+        front[target_mask] * (1.0 - 0.45 * visibility) + target_rgb * (0.25 + 0.75 * visibility),
+        0.0,
+        1.0,
+    )
+    front[..., 2] = np.clip(front[..., 2] + 0.12 * disturbance_map + 0.04 * step_fraction, 0.0, 1.0)
     wrist_left = np.full((height, width, 3), 0.12, dtype=np.float32)
+    wrist_left *= (0.8 + 0.18 * clutter[..., None]).astype(np.float32)
+    left_slit_width = np.clip(0.04 + 0.18 * opening - 0.10 * disturbance, 0.015, 0.22)
+    left_profile = ((xx - left_center) / left_slit_width) ** 2 + ((yy - 0.58) / (0.40 + 0.10 * opening)) ** 2
+    left_open = left_profile <= 1.0
+    wrist_left[left_open] = np.clip(wrist_left[left_open] + np.asarray([0.08, 0.22, 0.12], dtype=np.float32), 0.0, 1.0)
+    wrist_left[..., 0] = np.clip(wrist_left[..., 0] + 0.18 * disturbance_map, 0.0, 1.0)
+    wrist_left[target_mask] = np.clip(
+        wrist_left[target_mask] * (1.0 - 0.35 * visibility) + target_rgb * (0.18 + 0.52 * visibility),
+        0.0,
+        1.0,
+    )
     wrist_right = np.full((height, width, 3), 0.08, dtype=np.float32)
+    wrist_right *= (0.78 + 0.22 * clutter[..., None]).astype(np.float32)
+    right_band = np.exp(-((xx - right_center) ** 2) / max(1e-4, (0.06 + gap_width) ** 2))
+    right_clear = np.exp(-((yy - (0.52 - 0.12 * target_depth)) ** 2) / max(1e-4, (0.12 + 0.18 * opening) ** 2))
+    wrist_right[..., 1] = np.clip(
+        wrist_right[..., 1] + 0.28 * visibility * right_band * right_clear - 0.10 * disturbance_map,
+        0.0,
+        1.0,
+    )
+    wrist_right[target_mask] = np.clip(
+        wrist_right[target_mask] * (1.0 - 0.40 * visibility) + target_rgb * (0.22 + 0.60 * visibility),
+        0.0,
+        1.0,
+    )
+    wrist_right[..., 2] = np.clip(wrist_right[..., 2] + 0.08 * step_fraction + 0.06 * right_band, 0.0, 1.0)
     wrist_right = np.clip(wrist_right, 0.0, 1.0)
     return {