diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..b09813554542b75455cc2a001025f4753394bb63
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.375,
+      "bag_proxy": 0.4583333333333333,
+      "cloth_proxy": 0.5833333333333334
+    },
+    "mean_success": 0.47222222222222215,
+    "visibility_integral": 37.36026926173104,
+    "corridor_availability": 0.8730104863643646,
+    "reocclusion_rate": 0.04405864197530864,
+    "persistence_horizon_mae": 1.033145775666108,
+    "disturbance_cost": 0.3228136783000082
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..9e6a9ef36c8d466deeea73d78155369b26115e71
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/checkpoint_best.pt
+- mean_success: 0.472
+- visibility_integral: 37.360
+- corridor_availability: 0.873
+- reocclusion_rate: 0.044
+- persistence_horizon_mae: 1.033
+- disturbance_cost: 0.323
+- foliage_proxy_success: 0.375
+- bag_proxy_success: 0.458
+- cloth_proxy_success: 0.583
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/config_resolved.yaml b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eff928d59cc5d589c638a4a1ee7b58917509734b
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/config_resolved.yaml
@@ -0,0 +1,149 @@
+experiment_name: proxy_interaction_r3d_stage1_clip_seed7
+output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
+device: cuda
+seed: 7
+init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
+init_strict: false
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 224
+  dataset_version: reveal_proxy_v6_rgbd_elastic_state
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage1_seed7.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage1_seed7.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 7
+optim:
+  epochs: 4
+  batch_size: 2
+  num_workers: 4
+  lr: 0.0003
+  weight_decay: 0.0001
+trainer:
+  policy_type: elastic_reveal
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+  use_depth: false
+  use_world_model: true
+  use_role_tokens: true
+  compute_equivariance_probe: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 512
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: false
+  fusion:
+    hidden_dim: 512
+    num_cameras: 3
+    num_layers: 4
+    num_heads: 8
+    ff_dim: 2048
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 512
+    action_dim: 14
+    history_steps: 6
+    scene_history_steps: 3
+    belief_history_steps: 8
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    scene_bank_size: 2
+    belief_bank_size: 2
+    num_heads: 8
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 4
+    ff_dim: 2048
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_proposal_modes: 6
+    planner_top_k: 4
+  reveal_head:
+    hidden_dim: 512
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 8
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 512
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+    scene_bank_size: 2
+    belief_bank_size: 2
+  planner:
+    hidden_dim: 512
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 8
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+    top_k: 4
+loss_weights:
+  action: 1.0
+  phase: 0.1
+  arm_role: 0.15
+  support_mode: 0.1
+  corridor: 0.15
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.2
+  belief: 0.05
+  visibility: 0.05
+  clearance: 0.05
+  support_stability: 0.05
+  reocclusion: 0.05
+  occluder_contact: 0.05
+  grasp_affordance: 0.05
+  planner_success: 0.25
+  planner_risk: 0.1
+  planner_ranking: 0.2
+  proposal_reconstruction: 0.1
+  proposal_success: 0.15
+  proposal_ranking: 0.2
+  proposal_diversity: 0.05
+  role_swap_consistency: 0.05
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/diagnostics_full/proxy_diagnostics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/diagnostics_full/proxy_diagnostics.json
new file mode 100644
index 0000000000000000000000000000000000000000..02afb7ed5c23f00d1758269377baba5349de1002
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/diagnostics_full/proxy_diagnostics.json
@@ -0,0 +1,16 @@
+{
+  "planner_top1_accuracy": 0.25396825396825395,
+  "planner_regret": 0.024764427915215492,
+  "planner_score_utility_spearman": 0.1904761791229248,
+  "risk_calibration_mse": 0.010364258661866188,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.022177213802933693,
+  "left_right_equivariance_error": 0.0002942846322184778,
+  "belief_calibration_brier": 0.003581121563911438,
+  "reocclusion_calibration_brier": 0.23373088240623474,
+  "support_stability_mae": 0.022998232394456863,
+  "clearance_auc": 0.8989269585276155,
+  "memory_write_rate": 0.0,
+  "memory_saturation": 0.41934600472450256,
+  "num_samples": 126
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/metrics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..d16d4bc573f9087e2da1899ccba3528521fdbb9f
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/metrics.json
@@ -0,0 +1,230 @@
+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.025519870977400175,
+      "arm_role": 0.03451829462151253,
+      "belief": 0.11532339149432656,
+      "clearance": 0.09198410963122758,
+      "corridor": 0.27232400180664673,
+      "disturbance": 0.005858588227789626,
+      "grasp_affordance": 0.018751464233153464,
+      "occluder_contact": 0.21359099159065967,
+      "persistence": 5.231568055785678,
+      "phase": 0.7372311896678665,
+      "planner_ranking": 0.1646315749647481,
+      "planner_risk": 0.014348083711473067,
+      "planner_success": 0.6091769787029446,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.253575401780493,
+      "proposal_reconstruction": 0.067724266230904,
+      "proposal_success": 0.6851897648491785,
+      "reocclusion": 0.7031442959895309,
+      "role_swap_consistency": 0.00044027801038677857,
+      "support_mode": 0.7282283443430956,
+      "support_stability": 0.15459337279551627,
+      "total": 1.6319934494832424,
+      "uncertainty": 0.013496716971069097,
+      "visibility": 0.11563199924314833,
+      "world_model": 2.671503098223222
+    },
+    "val": {
+      "action": 0.020692157455616526,
+      "arm_role": 9.546122843554865e-05,
+      "belief": 0.09874132736807778,
+      "clearance": 0.08244451738539196,
+      "corridor": 0.2306106292775699,
+      "disturbance": 0.006118982125097694,
+      "grasp_affordance": 0.009981726739732992,
+      "occluder_contact": 0.19720953915800368,
+      "persistence": 3.8672617465730696,
+      "phase": 0.668701058815396,
+      "planner_ranking": 0.03794538755975072,
+      "planner_risk": 0.009814016923349026,
+      "planner_success": 0.5628143776030767,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1249213124078417,
+      "proposal_reconstruction": 0.06329423224642164,
+      "proposal_success": 0.6747160203873165,
+      "reocclusion": 0.692203164100647,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6680677216204386,
+      "support_stability": 0.1511912994411966,
+      "total": 1.358805573175824,
+      "uncertainty": 0.003482046378185115,
+      "visibility": 0.10417925601913816,
+      "world_model": 2.1376701915074907
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.02150821143575988,
+      "arm_role": 1.9482293054071397e-05,
+      "belief": 0.09863162136280725,
+      "clearance": 0.08064276829400924,
+      "corridor": 0.24359133383210416,
+      "disturbance": 0.002735878452234476,
+      "grasp_affordance": 0.009349104797184779,
+      "occluder_contact": 0.1937003313558888,
+      "persistence": 4.076787073262699,
+      "phase": 0.6966290698625655,
+      "planner_ranking": 0.04271617977273956,
+      "planner_risk": 0.010049402082938681,
+      "planner_success": 0.5399472568359674,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1569982820156357,
+      "proposal_reconstruction": 0.06389496966962414,
+      "proposal_success": 0.6711133328407847,
+      "reocclusion": 0.6940537130957498,
+      "role_swap_consistency": 0.00022550253765151655,
+      "support_mode": 0.6837139029777487,
+      "support_stability": 0.14029162690160474,
+      "total": 1.3837347957476271,
+      "uncertainty": 0.0016494125736687157,
+      "visibility": 0.09400421737922424,
+      "world_model": 2.175609592991974
+    },
+    "val": {
+      "action": 0.020051477757829523,
+      "arm_role": 2.626385377793451e-06,
+      "belief": 0.09183884199176516,
+      "clearance": 0.07657587877105153,
+      "corridor": 0.22728621321065084,
+      "disturbance": 0.0016498260886850951,
+      "grasp_affordance": 0.009590831518705403,
+      "occluder_contact": 0.1917984854607355,
+      "persistence": 3.699212070495363,
+      "phase": 0.6689459842348856,
+      "planner_ranking": 0.03331218510795715,
+      "planner_risk": 0.010092773325076061,
+      "planner_success": 0.5014436940352122,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1606994933552213,
+      "proposal_reconstruction": 0.062439400820978104,
+      "proposal_success": 0.675733851061927,
+      "reocclusion": 0.6921006942552234,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6564426545112853,
+      "support_stability": 0.14099458102432508,
+      "total": 1.313369631767273,
+      "uncertainty": 0.0024020517326240973,
+      "visibility": 0.08723713226971172,
+      "world_model": 2.0216772158940635
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.018980447901412845,
+      "arm_role": 2.3090714559505124e-05,
+      "belief": 0.1100015923263827,
+      "clearance": 0.0791148773262872,
+      "corridor": 0.23030528037001852,
+      "disturbance": 0.002447301701405857,
+      "grasp_affordance": 0.009001106255400087,
+      "occluder_contact": 0.21010415864552504,
+      "persistence": 2.0494745795430753,
+      "phase": 0.459073231482381,
+      "planner_ranking": 0.036845811475892686,
+      "planner_risk": 0.011261017404920885,
+      "planner_success": 0.5133467099741491,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1499755538570944,
+      "proposal_reconstruction": 0.062038555780318395,
+      "proposal_success": 0.6672172468370168,
+      "reocclusion": 0.41151915600825667,
+      "role_swap_consistency": 0.0007739521978125561,
+      "support_mode": 0.38595684411013936,
+      "support_stability": 0.1425538511912665,
+      "total": 1.1811942648513154,
+      "uncertainty": 0.000767841034371724,
+      "visibility": 0.10209987125315591,
+      "world_model": 2.070929214904446
+    },
+    "val": {
+      "action": 0.0138629823627453,
+      "arm_role": 0.002011558223822855,
+      "belief": 0.10340341582657799,
+      "clearance": 0.0855481999497565,
+      "corridor": 0.2235906974427284,
+      "disturbance": 0.0011637268657111797,
+      "grasp_affordance": 0.010592727485807642,
+      "occluder_contact": 0.20843842601965343,
+      "persistence": 1.1762515253254346,
+      "phase": 0.3442955078771486,
+      "planner_ranking": 0.03461442932137519,
+      "planner_risk": 0.01165175854065825,
+      "planner_success": 0.45808544967855724,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.3026971003365895,
+      "proposal_reconstruction": 0.05888378312663427,
+      "proposal_success": 0.7430036550476438,
+      "reocclusion": 0.2871374910076459,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.22473623181900215,
+      "support_stability": 0.1320991822414928,
+      "total": 1.1099917330439129,
+      "uncertainty": 0.0005805234163528352,
+      "visibility": 0.09557991185122067,
+      "world_model": 1.9994045325687952
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.014569098466314883,
+      "arm_role": 4.4951576212937916e-05,
+      "belief": 0.09620984569582015,
+      "clearance": 0.07538617284315106,
+      "corridor": 0.21248489566188775,
+      "disturbance": 0.0016758848629270635,
+      "grasp_affordance": 0.008272631588777167,
+      "occluder_contact": 0.19746327033529731,
+      "persistence": 1.1089699098374644,
+      "phase": 0.3716845961765469,
+      "planner_ranking": 0.03254403228879829,
+      "planner_risk": 0.010248634800575772,
+      "planner_success": 0.47941413580279074,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.153262345578658,
+      "proposal_reconstruction": 0.05860933205064055,
+      "proposal_success": 0.6466394141706496,
+      "reocclusion": 0.2566672772173989,
+      "role_swap_consistency": 0.0010398222479868085,
+      "support_mode": 0.21815690070546734,
+      "support_stability": 0.13650912478449145,
+      "total": 1.0633102330861914,
+      "uncertainty": 0.0002461711761398012,
+      "visibility": 0.09588275449984361,
+      "world_model": 1.9903733518111144
+    },
+    "val": {
+      "action": 0.01619998768474611,
+      "arm_role": 3.844006559777174e-06,
+      "belief": 0.09427393618084136,
+      "clearance": 0.07296533326780985,
+      "corridor": 0.2100035525148823,
+      "disturbance": 0.0013519242122204862,
+      "grasp_affordance": 0.007646961093303703,
+      "occluder_contact": 0.1950870676646157,
+      "persistence": 1.3894045449024628,
+      "phase": 0.6804814789192899,
+      "planner_ranking": 0.027768202883649677,
+      "planner_risk": 0.010219628483081044,
+      "planner_success": 0.4819766197885786,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1241777983922807,
+      "proposal_reconstruction": 0.060782825840370994,
+      "proposal_success": 0.6369421221907177,
+      "reocclusion": 0.27461627113913734,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.08716485598531093,
+      "support_stability": 0.13245442648610425,
+      "total": 1.0629130696493483,
+      "uncertainty": 8.45672577761145e-05,
+      "visibility": 0.1013997554306,
+      "world_model": 1.8573077273747278
+    }
+  }
+]
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/summary.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..794d761bcb9fe58941b4f435665e75eb6f536b98
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/summary.json
@@ -0,0 +1,557 @@
+{
+  "experiment_name": "proxy_interaction_r3d_stage1_clip_seed7",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/checkpoint_best.pt",
+  "final_train_total": 1.0633102330861914,
+  "final_val_total": 1.0629130696493483,
+  "train_time_sec": 174.85308933258057,
+  "peak_gpu_memory_mb": 1919.8251953125,
+  "num_train_samples": 382,
+  "num_val_samples": 126,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": {
+    "path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
+    "loaded_keys": 461,
+    "skipped_shape_mismatch_keys": [
+      "memory.gru.weight_ih_l0",
+      "memory.gru.weight_hh_l0",
+      "memory.gru.bias_ih_l0",
+      "memory.gru.bias_hh_l0",
+      "memory.token_proj.0.weight",
+      "memory.token_proj.0.bias",
+      "memory.token_proj.1.weight",
+      "memory.token_proj.1.bias",
+      "decoder.actor_role_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.linear1.weight",
+      "decoder.revealer_decoder.layers.0.linear1.bias",
+      "decoder.revealer_decoder.layers.0.linear2.weight",
+      "decoder.revealer_decoder.layers.0.linear2.bias",
+      "decoder.revealer_decoder.layers.0.norm1.weight",
+      "decoder.revealer_decoder.layers.0.norm1.bias",
+      "decoder.revealer_decoder.layers.0.norm2.weight",
+      "decoder.revealer_decoder.layers.0.norm2.bias",
+      "decoder.revealer_decoder.layers.0.norm3.weight",
+      "decoder.revealer_decoder.layers.0.norm3.bias",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.linear1.weight",
+      "decoder.revealer_decoder.layers.1.linear1.bias",
+      "decoder.revealer_decoder.layers.1.linear2.weight",
+      "decoder.revealer_decoder.layers.1.linear2.bias",
+      "decoder.revealer_decoder.layers.1.norm1.weight",
+      "decoder.revealer_decoder.layers.1.norm1.bias",
+      "decoder.revealer_decoder.layers.1.norm2.weight",
+      "decoder.revealer_decoder.layers.1.norm2.bias",
+      "decoder.revealer_decoder.layers.1.norm3.weight",
+      "decoder.revealer_decoder.layers.1.norm3.bias",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.linear1.weight",
+      "decoder.revealer_decoder.layers.2.linear1.bias",
+      "decoder.revealer_decoder.layers.2.linear2.weight",
+      "decoder.revealer_decoder.layers.2.linear2.bias",
+      "decoder.revealer_decoder.layers.2.norm1.weight",
+      "decoder.revealer_decoder.layers.2.norm1.bias",
+      "decoder.revealer_decoder.layers.2.norm2.weight",
+      "decoder.revealer_decoder.layers.2.norm2.bias",
+      "decoder.revealer_decoder.layers.2.norm3.weight",
+      "decoder.revealer_decoder.layers.2.norm3.bias",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.linear1.weight",
+      "decoder.revealer_decoder.layers.3.linear1.bias",
+      "decoder.revealer_decoder.layers.3.linear2.weight",
+      "decoder.revealer_decoder.layers.3.linear2.bias",
+      "decoder.revealer_decoder.layers.3.norm1.weight",
+      "decoder.revealer_decoder.layers.3.norm1.bias",
+      "decoder.revealer_decoder.layers.3.norm2.weight",
+      "decoder.revealer_decoder.layers.3.norm2.bias",
+      "decoder.revealer_decoder.layers.3.norm3.weight",
+      "decoder.revealer_decoder.layers.3.norm3.bias",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.linear1.weight",
+      "decoder.actor_decoder.layers.0.linear1.bias",
+      "decoder.actor_decoder.layers.0.linear2.weight",
+      "decoder.actor_decoder.layers.0.linear2.bias",
+      "decoder.actor_decoder.layers.0.norm1.weight",
+      "decoder.actor_decoder.layers.0.norm1.bias",
+      "decoder.actor_decoder.layers.0.norm2.weight",
+      "decoder.actor_decoder.layers.0.norm2.bias",
+      "decoder.actor_decoder.layers.0.norm3.weight",
+      "decoder.actor_decoder.layers.0.norm3.bias",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.linear1.weight",
+      "decoder.actor_decoder.layers.1.linear1.bias",
+      "decoder.actor_decoder.layers.1.linear2.weight",
+      "decoder.actor_decoder.layers.1.linear2.bias",
+      "decoder.actor_decoder.layers.1.norm1.weight",
+      "decoder.actor_decoder.layers.1.norm1.bias",
+      "decoder.actor_decoder.layers.1.norm2.weight",
+      "decoder.actor_decoder.layers.1.norm2.bias",
+      "decoder.actor_decoder.layers.1.norm3.weight",
+      "decoder.actor_decoder.layers.1.norm3.bias",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.linear1.weight",
+      "decoder.actor_decoder.layers.2.linear1.bias",
+      "decoder.actor_decoder.layers.2.linear2.weight",
+      "decoder.actor_decoder.layers.2.linear2.bias",
+      "decoder.actor_decoder.layers.2.norm1.weight",
+      "decoder.actor_decoder.layers.2.norm1.bias",
+      "decoder.actor_decoder.layers.2.norm2.weight",
+      "decoder.actor_decoder.layers.2.norm2.bias",
+      "decoder.actor_decoder.layers.2.norm3.weight",
+      "decoder.actor_decoder.layers.2.norm3.bias",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.linear1.weight",
+      "decoder.actor_decoder.layers.3.linear1.bias",
+      "decoder.actor_decoder.layers.3.linear2.weight",
+      "decoder.actor_decoder.layers.3.linear2.bias",
+      "decoder.actor_decoder.layers.3.norm1.weight",
+      "decoder.actor_decoder.layers.3.norm1.bias",
+      "decoder.actor_decoder.layers.3.norm2.weight",
+      "decoder.actor_decoder.layers.3.norm2.bias",
+      "decoder.actor_decoder.layers.3.norm3.weight",
+      "decoder.actor_decoder.layers.3.norm3.bias",
+      "decoder.revealer_mean.weight",
+      "decoder.revealer_mean.bias",
+      "decoder.revealer_log_std.weight",
+      "decoder.revealer_log_std.bias",
+      "decoder.actor_mean.weight",
+      "decoder.actor_mean.bias",
+      "decoder.actor_log_std.weight",
+      "decoder.actor_log_std.bias",
+      "decoder.proposal_score.0.weight",
+      "decoder.proposal_score.0.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias"
+    ],
+    "missing_keys": [
+      "backbone.depth_adapter.depth_proj.0.weight",
+      "backbone.depth_adapter.depth_proj.0.bias",
+      "backbone.depth_adapter.depth_proj.1.weight",
+      "backbone.depth_adapter.depth_proj.1.bias",
+      "backbone.depth_adapter.depth_proj.3.weight",
+      "backbone.depth_adapter.depth_proj.3.bias",
+      "backbone.depth_adapter.geometry_proj.0.weight",
+      "backbone.depth_adapter.geometry_proj.0.bias",
+      "backbone.depth_adapter.geometry_proj.1.weight",
+      "backbone.depth_adapter.geometry_proj.1.bias",
+      "backbone.depth_adapter.camera_proj.0.weight",
+      "backbone.depth_adapter.camera_proj.0.bias",
+      "backbone.depth_adapter.camera_proj.1.weight",
+      "backbone.depth_adapter.camera_proj.1.bias",
+      "fusion.geometry_fusion.attn.in_proj_weight",
+      "fusion.geometry_fusion.attn.in_proj_bias",
+      "fusion.geometry_fusion.attn.out_proj.weight",
+      "fusion.geometry_fusion.attn.out_proj.bias",
+      "fusion.geometry_fusion.gate.0.weight",
+      "fusion.geometry_fusion.gate.0.bias",
+      "fusion.geometry_fusion.gate.1.weight",
+      "fusion.geometry_fusion.gate.1.bias",
+      "fusion.geometry_fusion.gate.3.weight",
+      "fusion.geometry_fusion.gate.3.bias",
+      "fusion.geometry_fusion.out.0.weight",
+      "fusion.geometry_fusion.out.0.bias",
+      "fusion.geometry_fusion.out.1.weight",
+      "fusion.geometry_fusion.out.1.bias",
+      "memory.scene_memory.position_embedding",
+      "memory.scene_memory.bank_queries",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.scene_memory.bank_attention.in_proj_weight",
+      "memory.scene_memory.bank_attention.in_proj_bias",
+      "memory.scene_memory.bank_attention.out_proj.weight",
+      "memory.scene_memory.bank_attention.out_proj.bias",
+      "memory.scene_memory.action_proj.0.weight",
+      "memory.scene_memory.action_proj.0.bias",
+      "memory.scene_memory.action_proj.1.weight",
+      "memory.scene_memory.action_proj.1.bias",
+      "memory.scene_memory.write_gate.0.weight",
+      "memory.scene_memory.write_gate.0.bias",
+      "memory.scene_memory.write_gate.1.weight",
+      "memory.scene_memory.write_gate.1.bias",
+      "memory.scene_memory.write_gate.3.weight",
+      "memory.scene_memory.write_gate.3.bias",
+      "memory.scene_memory.token_proj.0.weight",
+      "memory.scene_memory.token_proj.0.bias",
+      "memory.scene_memory.token_proj.1.weight",
+      "memory.scene_memory.token_proj.1.bias",
+      "memory.belief_memory.position_embedding",
+      "memory.belief_memory.bank_queries",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.belief_memory.bank_attention.in_proj_weight",
+      "memory.belief_memory.bank_attention.in_proj_bias",
+      "memory.belief_memory.bank_attention.out_proj.weight",
+      "memory.belief_memory.bank_attention.out_proj.bias",
+      "memory.belief_memory.action_proj.0.weight",
+      "memory.belief_memory.action_proj.0.bias",
+      "memory.belief_memory.action_proj.1.weight",
+      "memory.belief_memory.action_proj.1.bias",
+      "memory.belief_memory.write_gate.0.weight",
+      "memory.belief_memory.write_gate.0.bias",
+      "memory.belief_memory.write_gate.1.weight",
+      "memory.belief_memory.write_gate.1.bias",
+      "memory.belief_memory.write_gate.3.weight",
+      "memory.belief_memory.write_gate.3.bias",
+      "memory.belief_memory.token_proj.0.weight",
+      "memory.belief_memory.token_proj.0.bias",
+      "memory.belief_memory.token_proj.1.weight",
+      "memory.belief_memory.token_proj.1.bias",
+      "decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.0.linear1.weight",
+      "decoder.arm_decoder.layers.0.linear1.bias",
+      "decoder.arm_decoder.layers.0.linear2.weight",
+      "decoder.arm_decoder.layers.0.linear2.bias",
+      "decoder.arm_decoder.layers.0.norm1.weight",
+      "decoder.arm_decoder.layers.0.norm1.bias",
+      "decoder.arm_decoder.layers.0.norm2.weight",
+      "decoder.arm_decoder.layers.0.norm2.bias",
+      "decoder.arm_decoder.layers.0.norm3.weight",
+      "decoder.arm_decoder.layers.0.norm3.bias",
+      "decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.1.linear1.weight",
+      "decoder.arm_decoder.layers.1.linear1.bias",
+      "decoder.arm_decoder.layers.1.linear2.weight",
+      "decoder.arm_decoder.layers.1.linear2.bias",
+      "decoder.arm_decoder.layers.1.norm1.weight",
+      "decoder.arm_decoder.layers.1.norm1.bias",
+      "decoder.arm_decoder.layers.1.norm2.weight",
+      "decoder.arm_decoder.layers.1.norm2.bias",
+      "decoder.arm_decoder.layers.1.norm3.weight",
+      "decoder.arm_decoder.layers.1.norm3.bias",
+      "decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.2.linear1.weight",
+      "decoder.arm_decoder.layers.2.linear1.bias",
+      "decoder.arm_decoder.layers.2.linear2.weight",
+      "decoder.arm_decoder.layers.2.linear2.bias",
+      "decoder.arm_decoder.layers.2.norm1.weight",
+      "decoder.arm_decoder.layers.2.norm1.bias",
+      "decoder.arm_decoder.layers.2.norm2.weight",
+      "decoder.arm_decoder.layers.2.norm2.bias",
+      "decoder.arm_decoder.layers.2.norm3.weight",
+      "decoder.arm_decoder.layers.2.norm3.bias",
+      "decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.3.linear1.weight",
+      "decoder.arm_decoder.layers.3.linear1.bias",
+      "decoder.arm_decoder.layers.3.linear2.weight",
+      "decoder.arm_decoder.layers.3.linear2.bias",
+      "decoder.arm_decoder.layers.3.norm1.weight",
+      "decoder.arm_decoder.layers.3.norm1.bias",
+      "decoder.arm_decoder.layers.3.norm2.weight",
+      "decoder.arm_decoder.layers.3.norm2.bias",
+      "decoder.arm_decoder.layers.3.norm3.weight",
+      "decoder.arm_decoder.layers.3.norm3.bias",
+      "decoder.arm_identity.weight",
+      "decoder.phase_adapter.weight",
+      "decoder.phase_adapter.bias",
+      "decoder.role_adapter.weight",
+      "decoder.role_adapter.bias",
+      "decoder.context_proj.0.weight",
+      "decoder.context_proj.0.bias",
+      "decoder.context_proj.1.weight",
+      "decoder.context_proj.1.bias",
+      "decoder.arm_head.0.weight",
+      "decoder.arm_head.0.bias",
+      "decoder.arm_head.1.weight",
+      "decoder.arm_head.1.bias",
+      "decoder.arm_mean.weight",
+      "decoder.arm_mean.bias",
+      "decoder.arm_log_std.weight",
+      "decoder.arm_log_std.bias",
+      "decoder.proposal_mode_head.0.weight",
+      "decoder.proposal_mode_head.0.bias",
+      "decoder.proposal_mode_head.1.weight",
+      "decoder.proposal_mode_head.1.bias",
+      "decoder.proposal_mode_head.3.weight",
+      "decoder.proposal_mode_head.3.bias",
+      "decoder.proposal_mode_embeddings.weight",
+      "decoder.proposal_slot_embeddings.weight",
+      "decoder.mode_residual_heads.0.0.weight",
+      "decoder.mode_residual_heads.0.0.bias",
+      "decoder.mode_residual_heads.0.1.weight",
+      "decoder.mode_residual_heads.0.1.bias",
+      "decoder.mode_residual_heads.0.3.weight",
+      "decoder.mode_residual_heads.0.3.bias",
+      "decoder.mode_residual_heads.1.0.weight",
+      "decoder.mode_residual_heads.1.0.bias",
+      "decoder.mode_residual_heads.1.1.weight",
+      "decoder.mode_residual_heads.1.1.bias",
+      "decoder.mode_residual_heads.1.3.weight",
+      "decoder.mode_residual_heads.1.3.bias",
+      "decoder.mode_residual_heads.2.0.weight",
+      "decoder.mode_residual_heads.2.0.bias",
+      "decoder.mode_residual_heads.2.1.weight",
+      "decoder.mode_residual_heads.2.1.bias",
+      "decoder.mode_residual_heads.2.3.weight",
+      "decoder.mode_residual_heads.2.3.bias",
+      "decoder.mode_residual_heads.3.0.weight",
+      "decoder.mode_residual_heads.3.0.bias",
+      "decoder.mode_residual_heads.3.1.weight",
+      "decoder.mode_residual_heads.3.1.bias",
+      "decoder.mode_residual_heads.3.3.weight",
+      "decoder.mode_residual_heads.3.3.bias",
+      "decoder.mode_residual_heads.4.0.weight",
+      "decoder.mode_residual_heads.4.0.bias",
+      "decoder.mode_residual_heads.4.1.weight",
+      "decoder.mode_residual_heads.4.1.bias",
+      "decoder.mode_residual_heads.4.3.weight",
+      "decoder.mode_residual_heads.4.3.bias",
+      "decoder.mode_residual_heads.5.0.weight",
+      "decoder.mode_residual_heads.5.0.bias",
+      "decoder.mode_residual_heads.5.1.weight",
+      "decoder.mode_residual_heads.5.1.bias",
+      "decoder.mode_residual_heads.5.3.weight",
+      "decoder.mode_residual_heads.5.3.bias",
+      "decoder.slot_delta.0.weight",
+      "decoder.slot_delta.0.bias",
+      "decoder.slot_delta.1.weight",
+      "decoder.slot_delta.1.bias",
+      "decoder.slot_delta.3.weight",
+      "decoder.slot_delta.3.bias",
+      "decoder.proposal_score.0.weight",
+      "decoder.proposal_score.0.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias",
+      "decoder.proposal_score.3.weight",
+      "decoder.proposal_score.3.bias",
+      "elastic_state_head.interaction_queries",
+      "elastic_state_head.interaction_attention.in_proj_weight",
+      "elastic_state_head.interaction_attention.in_proj_bias",
+      "elastic_state_head.interaction_attention.out_proj.weight",
+      "elastic_state_head.interaction_attention.out_proj.bias",
+      "elastic_state_head.interaction_mlp.0.weight",
+      "elastic_state_head.interaction_mlp.0.bias",
+      "elastic_state_head.interaction_mlp.1.weight",
+      "elastic_state_head.interaction_mlp.1.bias",
+      "elastic_state_head.interaction_mlp.3.weight",
+      "elastic_state_head.interaction_mlp.3.bias",
+      "elastic_state_head.decoder.field_queries",
+      "elastic_state_head.decoder.field_attention.in_proj_weight",
+      "elastic_state_head.decoder.field_attention.in_proj_bias",
+      "elastic_state_head.decoder.field_attention.out_proj.weight",
+      "elastic_state_head.decoder.field_attention.out_proj.bias",
+      "elastic_state_head.decoder.field_mlp.0.weight",
+      "elastic_state_head.decoder.field_mlp.0.bias",
+      "elastic_state_head.decoder.field_mlp.1.weight",
+      "elastic_state_head.decoder.field_mlp.1.bias",
+      "elastic_state_head.decoder.field_mlp.3.weight",
+      "elastic_state_head.decoder.field_mlp.3.bias",
+      "elastic_state_head.decoder.summary_proj.0.weight",
+      "elastic_state_head.decoder.summary_proj.0.bias",
+      "elastic_state_head.decoder.summary_proj.1.weight",
+      "elastic_state_head.decoder.summary_proj.1.bias",
+      "elastic_state_head.decoder.phase_head.0.weight",
+      "elastic_state_head.decoder.phase_head.0.bias",
+      "elastic_state_head.decoder.phase_head.1.weight",
+      "elastic_state_head.decoder.phase_head.1.bias",
+      "elastic_state_head.decoder.phase_head.3.weight",
+      "elastic_state_head.decoder.phase_head.3.bias",
+      "elastic_state_head.decoder.arm_role_head.0.weight",
+      "elastic_state_head.decoder.arm_role_head.0.bias",
+      "elastic_state_head.decoder.arm_role_head.1.weight",
+      "elastic_state_head.decoder.arm_role_head.1.bias",
+      "elastic_state_head.decoder.arm_role_head.3.weight",
+      "elastic_state_head.decoder.arm_role_head.3.bias",
+      "elastic_state_head.decoder.arm_identity.weight",
+      "elastic_state_head.decoder.support_mode.0.weight",
+      "elastic_state_head.decoder.support_mode.0.bias",
+      "elastic_state_head.decoder.support_mode.1.weight",
+      "elastic_state_head.decoder.support_mode.1.bias",
+      "elastic_state_head.decoder.support_mode.3.weight",
+      "elastic_state_head.decoder.support_mode.3.bias",
+      "elastic_state_head.decoder.access_field.weight",
+      "elastic_state_head.decoder.access_field.bias",
+      "elastic_state_head.decoder.target_belief_field.weight",
+      "elastic_state_head.decoder.target_belief_field.bias",
+      "elastic_state_head.decoder.visibility_field.weight",
+      "elastic_state_head.decoder.visibility_field.bias",
+      "elastic_state_head.decoder.clearance_field.weight",
+      "elastic_state_head.decoder.clearance_field.bias",
+      "elastic_state_head.decoder.occluder_contact_field.weight",
+      "elastic_state_head.decoder.occluder_contact_field.bias",
+      "elastic_state_head.decoder.grasp_affordance_field.weight",
+      "elastic_state_head.decoder.grasp_affordance_field.bias",
+      "elastic_state_head.decoder.support_stability_field.weight",
+      "elastic_state_head.decoder.support_stability_field.bias",
+      "elastic_state_head.decoder.persistence_field.weight",
+      "elastic_state_head.decoder.persistence_field.bias",
+      "elastic_state_head.decoder.reocclusion_field.weight",
+      "elastic_state_head.decoder.reocclusion_field.bias",
+      "elastic_state_head.decoder.disturbance_field.weight",
+      "elastic_state_head.decoder.disturbance_field.bias",
+      "elastic_state_head.decoder.uncertainty_field.weight",
+      "elastic_state_head.decoder.uncertainty_field.bias",
+      "elastic_state_head.decoder.reocclusion_head.0.weight",
+      "elastic_state_head.decoder.reocclusion_head.0.bias",
+      "elastic_state_head.decoder.reocclusion_head.1.weight",
+      "elastic_state_head.decoder.reocclusion_head.1.bias",
+      "elastic_state_head.decoder.reocclusion_head.3.weight",
+      "elastic_state_head.decoder.reocclusion_head.3.bias",
+      "world_model.state_encoder.0.weight",
+      "world_model.state_encoder.0.bias",
+      "world_model.state_encoder.1.weight",
+      "world_model.state_encoder.1.bias",
+      "world_model.scene_memory_proj.0.weight",
+      "world_model.scene_memory_proj.0.bias",
+      "world_model.scene_memory_proj.1.weight",
+      "world_model.scene_memory_proj.1.bias",
+      "world_model.belief_memory_proj.0.weight",
+      "world_model.belief_memory_proj.0.bias",
+      "world_model.belief_memory_proj.1.weight",
+      "world_model.belief_memory_proj.1.bias",
+      "world_model.action_encoder.0.weight",
+      "world_model.action_encoder.0.bias",
+      "world_model.action_encoder.1.weight",
+      "world_model.action_encoder.1.bias",
+      "world_model.transition.weight_ih",
+      "world_model.transition.weight_hh",
+      "world_model.transition.bias_ih",
+      "world_model.transition.bias_hh",
+      "world_model.scene_memory_update.weight",
+      "world_model.scene_memory_update.bias",
+      "world_model.belief_memory_update.weight",
+      "world_model.belief_memory_update.bias",
+      "world_model.compact_decoder.weight",
+      "world_model.compact_decoder.bias",
+      "world_model.target_belief_head.weight",
+      "world_model.target_belief_head.bias",
+      "world_model.visibility_head.weight",
+      "world_model.visibility_head.bias",
+      "world_model.clearance_head.weight",
+      "world_model.clearance_head.bias",
+      "world_model.occluder_contact_head.weight",
+      "world_model.occluder_contact_head.bias",
+      "world_model.grasp_affordance_head.weight",
+      "world_model.grasp_affordance_head.bias",
+      "world_model.support_stability_head.weight",
+      "world_model.support_stability_head.bias",
+      "world_model.persistence_head.weight",
+      "world_model.persistence_head.bias",
+      "world_model.reocclusion_head.weight",
+      "world_model.reocclusion_head.bias",
+      "world_model.disturbance_head.weight",
+      "world_model.disturbance_head.bias",
+      "world_model.uncertainty_head.weight",
+      "world_model.uncertainty_head.bias",
+      "world_model.access_head.weight",
+      "world_model.access_head.bias",
+      "planner.residual.trunk.0.weight",
+      "planner.residual.trunk.0.bias",
+      "planner.residual.trunk.1.weight",
+      "planner.residual.trunk.1.bias",
+      "planner.residual.trunk.3.weight",
+      "planner.residual.trunk.3.bias",
+      "planner.residual.success_head.weight",
+      "planner.residual.success_head.bias",
+      "planner.residual.risk_head.weight",
+      "planner.residual.risk_head.bias",
+      "planner.residual.residual_head.weight",
+      "planner.residual.residual_head.bias"
+    ],
+    "unexpected_keys": []
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..78f463cae8a8270ef40d9dd9e7696812b95f2b69
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5833333333333334,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5555555555555555,
+    "visibility_integral": 31.92372977733612,
+    "corridor_availability": 0.8500884034567409,
+    "reocclusion_rate": 0.029287114566719827,
+    "persistence_horizon_mae": 0.894922278028389,
+    "disturbance_cost": 0.28616168903600836
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..707fc6e70e75c11e0e87ac7960db0a33969301db
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/checkpoint_best.pt
+- mean_success: 0.556
+- visibility_integral: 31.924
+- corridor_availability: 0.850
+- reocclusion_rate: 0.029
+- persistence_horizon_mae: 0.895
+- disturbance_cost: 0.286
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.583
+- cloth_proxy_success: 0.667
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/config_resolved.yaml b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb7af2bd2871f10553592409954b935d914cf98d
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/config_resolved.yaml
@@ -0,0 +1,149 @@
+experiment_name: proxy_interaction_r3d_stage1_clip_seed8
+output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
+device: cuda
+seed: 8
+init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
+init_strict: false
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 224
+  dataset_version: reveal_proxy_v6_rgbd_elastic_state
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage1_seed8.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage1_seed8.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 8
+optim:
+  epochs: 4
+  batch_size: 2
+  num_workers: 4
+  lr: 0.0003
+  weight_decay: 0.0001
+trainer:
+  policy_type: elastic_reveal
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+  use_depth: false
+  use_world_model: true
+  use_role_tokens: true
+  compute_equivariance_probe: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 512
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: false
+  fusion:
+    hidden_dim: 512
+    num_cameras: 3
+    num_layers: 4
+    num_heads: 8
+    ff_dim: 2048
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 512
+    action_dim: 14
+    history_steps: 6
+    scene_history_steps: 3
+    belief_history_steps: 8
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    scene_bank_size: 2
+    belief_bank_size: 2
+    num_heads: 8
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 4
+    ff_dim: 2048
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_proposal_modes: 6
+    planner_top_k: 4
+  reveal_head:
+    hidden_dim: 512
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 8
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 512
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+    scene_bank_size: 2
+    belief_bank_size: 2
+  planner:
+    hidden_dim: 512
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 8
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+    top_k: 4
+loss_weights:
+  action: 1.0
+  phase: 0.1
+  arm_role: 0.15
+  support_mode: 0.1
+  corridor: 0.15
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.2
+  belief: 0.05
+  visibility: 0.05
+  clearance: 0.05
+  support_stability: 0.05
+  reocclusion: 0.05
+  occluder_contact: 0.05
+  grasp_affordance: 0.05
+  planner_success: 0.25
+  planner_risk: 0.1
+  planner_ranking: 0.2
+  proposal_reconstruction: 0.1
+  proposal_success: 0.15
+  proposal_ranking: 0.2
+  proposal_diversity: 0.05
+  role_swap_consistency: 0.05
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/diagnostics_full/proxy_diagnostics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/diagnostics_full/proxy_diagnostics.json
new file mode 100644
index 0000000000000000000000000000000000000000..15a6d8212f84fcd886cfbf4336788174e2b49d33
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/diagnostics_full/proxy_diagnostics.json
@@ -0,0 +1,16 @@
+{
+  "planner_top1_accuracy": 0.25984251968503935,
+  "planner_regret": 0.024652592837810516,
+  "planner_score_utility_spearman": 0.15748029947280884,
+  "risk_calibration_mse": 0.010109159164130688,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.02039325051009655,
+  "left_right_equivariance_error": 8.317838273796951e-05,
+  "belief_calibration_brier": 0.0039802417159080505,
+  "reocclusion_calibration_brier": 0.2667863667011261,
+  "support_stability_mae": 0.023258011788129807,
+  "clearance_auc": 0.9407927438472715,
+  "memory_write_rate": 0.0,
+  "memory_saturation": 0.5879086852073669,
+  "num_samples": 127
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/metrics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..7307319477932bd083bfcbc36d02d39929231535
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/metrics.json
@@ -0,0 +1,230 @@
+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.025799189747862168,
+      "arm_role": 0.027215735138398815,
+      "belief": 0.11522909954034222,
+      "clearance": 0.09597517975181809,
+      "corridor": 0.3045216482132673,
+      "disturbance": 0.006567074132739083,
+      "grasp_affordance": 0.02625927054055074,
+      "occluder_contact": 0.2161167692295544,
+      "persistence": 7.305491891831004,
+      "phase": 0.7473598300474477,
+      "planner_ranking": 0.14102927445574143,
+      "planner_risk": 0.014660530898254365,
+      "planner_success": 0.596433128830026,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.26868818193206,
+      "proposal_reconstruction": 0.06815405646387819,
+      "proposal_success": 0.6748700912710259,
+      "reocclusion": 0.7006335564308765,
+      "role_swap_consistency": 0.0005011227108655176,
+      "support_mode": 0.7077700629908377,
+      "support_stability": 0.1599257462645798,
+      "total": 1.733834327203441,
+      "uncertainty": 0.022427979406115357,
+      "visibility": 0.11316451830155562,
+      "world_model": 2.674901399312843
+    },
+    "val": {
+      "action": 0.02199536032276228,
+      "arm_role": 9.8040056428772e-06,
+      "belief": 0.0978035525768064,
+      "clearance": 0.07755720446584746,
+      "corridor": 0.24431297194678336,
+      "disturbance": 0.0019795258613157785,
+      "grasp_affordance": 0.008650467454572208,
+      "occluder_contact": 0.20205649081617594,
+      "persistence": 4.437129996716976,
+      "phase": 0.6695621414110065,
+      "planner_ranking": 0.04436381870164041,
+      "planner_risk": 0.010196975797498453,
+      "planner_success": 0.5646271030418575,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1638631131500006,
+      "proposal_reconstruction": 0.06484090705635026,
+      "proposal_success": 0.6649224627763033,
+      "reocclusion": 0.7438069470226765,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.673728191293776,
+      "support_stability": 0.13629821891663596,
+      "total": 1.4150245506316423,
+      "uncertainty": 0.002036258225416532,
+      "visibility": 0.09110353700816631,
+      "world_model": 2.210838695988059
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.02220674532499769,
+      "arm_role": 4.0168849585568094e-05,
+      "belief": 0.10375202887969491,
+      "clearance": 0.08468958432176663,
+      "corridor": 0.24882320250282114,
+      "disturbance": 0.002981857188692701,
+      "grasp_affordance": 0.00994103324857994,
+      "occluder_contact": 0.20824503820604054,
+      "persistence": 4.263324179262391,
+      "phase": 0.7222360341336714,
+      "planner_ranking": 0.044953017053952174,
+      "planner_risk": 0.010661984013600143,
+      "planner_success": 0.5370719069273684,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1506784087076236,
+      "proposal_reconstruction": 0.06470025059674422,
+      "proposal_success": 0.6748968515720667,
+      "reocclusion": 0.7042920837539652,
+      "role_swap_consistency": 0.00024932249915769023,
+      "support_mode": 0.6881518938154451,
+      "support_stability": 0.1487102357972979,
+      "total": 1.3995415040959862,
+      "uncertainty": 0.0019858729011069556,
+      "visibility": 0.09729615078156531,
+      "world_model": 2.178037493952906
+    },
+    "val": {
+      "action": 0.029678026388864964,
+      "arm_role": 0.0003116108114227245,
+      "belief": 0.10797233448829502,
+      "clearance": 0.08150003047194332,
+      "corridor": 0.2509052273235284,
+      "disturbance": 0.002103368451003007,
+      "grasp_affordance": 0.008963905274868011,
+      "occluder_contact": 0.2007133779115975,
+      "persistence": 4.478599248453975,
+      "phase": 0.7040554136037827,
+      "planner_ranking": 0.03813048706929578,
+      "planner_risk": 0.01057393318569666,
+      "planner_success": 0.5217722351662815,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1685641314834356,
+      "proposal_reconstruction": 0.07131227233912796,
+      "proposal_success": 0.6757729910314083,
+      "reocclusion": 0.6976062525063753,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.7273222031071782,
+      "support_stability": 0.1463006478443276,
+      "total": 1.3876731358468533,
+      "uncertainty": 0.0005028243003835087,
+      "visibility": 0.10090084094554186,
+      "world_model": 2.023001086898148
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.022834130358048446,
+      "arm_role": 3.6339485208401505e-05,
+      "belief": 0.10015391417978946,
+      "clearance": 0.08339313631243418,
+      "corridor": 0.24550532728082536,
+      "disturbance": 0.002419849791671015,
+      "grasp_affordance": 0.011102509094860541,
+      "occluder_contact": 0.20242435567041966,
+      "persistence": 4.354869382134127,
+      "phase": 0.6933721572316754,
+      "planner_ranking": 0.04187904763565859,
+      "planner_risk": 0.010259467963658331,
+      "planner_success": 0.5138571092283538,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1488539314394846,
+      "proposal_reconstruction": 0.06509613401758733,
+      "proposal_success": 0.6776590312962757,
+      "reocclusion": 0.70495132540221,
+      "role_swap_consistency": 0.0003516697920602868,
+      "support_mode": 0.6823001881544503,
+      "support_stability": 0.14350243961116718,
+      "total": 1.378995967473035,
+      "uncertainty": 0.0031733291824921203,
+      "visibility": 0.09716511293465555,
+      "world_model": 2.104598100584839
+    },
+    "val": {
+      "action": 0.02644303720444441,
+      "arm_role": 4.627731826190029e-06,
+      "belief": 0.10258024383801967,
+      "clearance": 0.07597982959123328,
+      "corridor": 0.2423992605181411,
+      "disturbance": 0.0015974244740846189,
+      "grasp_affordance": 0.007909159859991632,
+      "occluder_contact": 0.19435308501124382,
+      "persistence": 3.919285401701927,
+      "phase": 0.6770087121985853,
+      "planner_ranking": 0.030531517459166935,
+      "planner_risk": 0.010262692154356046,
+      "planner_success": 0.5169326290488243,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.138186807744205,
+      "proposal_reconstruction": 0.06911751109873876,
+      "proposal_success": 0.6695848302915692,
+      "reocclusion": 0.6975388880819082,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6884247697889805,
+      "support_stability": 0.13594868587097153,
+      "total": 1.3366163168102503,
+      "uncertainty": 0.0006479808544099797,
+      "visibility": 0.09649082575924695,
+      "world_model": 2.0216304706409574
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.021160060905544235,
+      "arm_role": 5.587545364939105e-05,
+      "belief": 0.10077974488909956,
+      "clearance": 0.08377115065670762,
+      "corridor": 0.2723994788211522,
+      "disturbance": 0.0028603613238174243,
+      "grasp_affordance": 0.011514163958835196,
+      "occluder_contact": 0.20602131318983607,
+      "persistence": 3.0813600014851317,
+      "phase": 0.6817607779777487,
+      "planner_ranking": 0.031658034657560674,
+      "planner_risk": 0.010394540625284256,
+      "planner_success": 0.5069346120532271,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.132226309851202,
+      "proposal_reconstruction": 0.06328810811900967,
+      "proposal_success": 0.6744790461050902,
+      "reocclusion": 0.6852282721022661,
+      "role_swap_consistency": 0.0005754872515272832,
+      "support_mode": 0.6633978239528796,
+      "support_stability": 0.14488365837977468,
+      "total": 1.293662095569191,
+      "uncertainty": 0.0023333917296635863,
+      "visibility": 0.09853576490392235,
+      "world_model": 2.0413369105748482
+    },
+    "val": {
+      "action": 0.017367416352499276,
+      "arm_role": 7.692722565622034e-07,
+      "belief": 0.1027774921967648,
+      "clearance": 0.08752925635781139,
+      "corridor": 0.26156787533545867,
+      "disturbance": 0.0016430629628985116,
+      "grasp_affordance": 0.010058694657345768,
+      "occluder_contact": 0.21157401148229837,
+      "persistence": 1.0993698399979621,
+      "phase": 0.6142133427783847,
+      "planner_ranking": 0.03328441088268619,
+      "planner_risk": 0.010188427979301196,
+      "planner_success": 0.4918641885742545,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1239634547382593,
+      "proposal_reconstruction": 0.06056849448941648,
+      "proposal_success": 0.6778606250882149,
+      "reocclusion": 0.5640022717416286,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.5024671151768416,
+      "support_stability": 0.13648800805094652,
+      "total": 1.1350205279886723,
+      "uncertainty": 0.0008341338888158134,
+      "visibility": 0.0982570193009451,
+      "world_model": 1.93993010930717
+    }
+  }
+]
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/summary.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..692d7e32568f5777dfc09b58a949219b0197ed09
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/summary.json
@@ -0,0 +1,557 @@
+{
+  "experiment_name": "proxy_interaction_r3d_stage1_clip_seed8",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/checkpoint_best.pt",
+  "final_train_total": 1.293662095569191,
+  "final_val_total": 1.1350205279886723,
+  "train_time_sec": 146.87081933021545,
+  "peak_gpu_memory_mb": 1891.1337890625,
+  "num_train_samples": 381,
+  "num_val_samples": 127,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": {
+    "path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
+    "loaded_keys": 461,
+    "skipped_shape_mismatch_keys": [
+      "memory.gru.weight_ih_l0",
+      "memory.gru.weight_hh_l0",
+      "memory.gru.bias_ih_l0",
+      "memory.gru.bias_hh_l0",
+      "memory.token_proj.0.weight",
+      "memory.token_proj.0.bias",
+      "memory.token_proj.1.weight",
+      "memory.token_proj.1.bias",
+      "decoder.actor_role_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.linear1.weight",
+      "decoder.revealer_decoder.layers.0.linear1.bias",
+      "decoder.revealer_decoder.layers.0.linear2.weight",
+      "decoder.revealer_decoder.layers.0.linear2.bias",
+      "decoder.revealer_decoder.layers.0.norm1.weight",
+      "decoder.revealer_decoder.layers.0.norm1.bias",
+      "decoder.revealer_decoder.layers.0.norm2.weight",
+      "decoder.revealer_decoder.layers.0.norm2.bias",
+      "decoder.revealer_decoder.layers.0.norm3.weight",
+      "decoder.revealer_decoder.layers.0.norm3.bias",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.linear1.weight",
+      "decoder.revealer_decoder.layers.1.linear1.bias",
+      "decoder.revealer_decoder.layers.1.linear2.weight",
+      "decoder.revealer_decoder.layers.1.linear2.bias",
+      "decoder.revealer_decoder.layers.1.norm1.weight",
+      "decoder.revealer_decoder.layers.1.norm1.bias",
+      "decoder.revealer_decoder.layers.1.norm2.weight",
+      "decoder.revealer_decoder.layers.1.norm2.bias",
+      "decoder.revealer_decoder.layers.1.norm3.weight",
+      "decoder.revealer_decoder.layers.1.norm3.bias",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.linear1.weight",
+      "decoder.revealer_decoder.layers.2.linear1.bias",
+      "decoder.revealer_decoder.layers.2.linear2.weight",
+      "decoder.revealer_decoder.layers.2.linear2.bias",
+      "decoder.revealer_decoder.layers.2.norm1.weight",
+      "decoder.revealer_decoder.layers.2.norm1.bias",
+      "decoder.revealer_decoder.layers.2.norm2.weight",
+      "decoder.revealer_decoder.layers.2.norm2.bias",
+      "decoder.revealer_decoder.layers.2.norm3.weight",
+      "decoder.revealer_decoder.layers.2.norm3.bias",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.linear1.weight",
+      "decoder.revealer_decoder.layers.3.linear1.bias",
+      "decoder.revealer_decoder.layers.3.linear2.weight",
+      "decoder.revealer_decoder.layers.3.linear2.bias",
+      "decoder.revealer_decoder.layers.3.norm1.weight",
+      "decoder.revealer_decoder.layers.3.norm1.bias",
+      "decoder.revealer_decoder.layers.3.norm2.weight",
+      "decoder.revealer_decoder.layers.3.norm2.bias",
+      "decoder.revealer_decoder.layers.3.norm3.weight",
+      "decoder.revealer_decoder.layers.3.norm3.bias",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.linear1.weight",
+      "decoder.actor_decoder.layers.0.linear1.bias",
+      "decoder.actor_decoder.layers.0.linear2.weight",
+      "decoder.actor_decoder.layers.0.linear2.bias",
+      "decoder.actor_decoder.layers.0.norm1.weight",
+      "decoder.actor_decoder.layers.0.norm1.bias",
+      "decoder.actor_decoder.layers.0.norm2.weight",
+      "decoder.actor_decoder.layers.0.norm2.bias",
+      "decoder.actor_decoder.layers.0.norm3.weight",
+      "decoder.actor_decoder.layers.0.norm3.bias",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.linear1.weight",
+      "decoder.actor_decoder.layers.1.linear1.bias",
+      "decoder.actor_decoder.layers.1.linear2.weight",
+      "decoder.actor_decoder.layers.1.linear2.bias",
+      "decoder.actor_decoder.layers.1.norm1.weight",
+      "decoder.actor_decoder.layers.1.norm1.bias",
+      "decoder.actor_decoder.layers.1.norm2.weight",
+      "decoder.actor_decoder.layers.1.norm2.bias",
+      "decoder.actor_decoder.layers.1.norm3.weight",
+      "decoder.actor_decoder.layers.1.norm3.bias",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.linear1.weight",
+      "decoder.actor_decoder.layers.2.linear1.bias",
+      "decoder.actor_decoder.layers.2.linear2.weight",
+      "decoder.actor_decoder.layers.2.linear2.bias",
+      "decoder.actor_decoder.layers.2.norm1.weight",
+      "decoder.actor_decoder.layers.2.norm1.bias",
+      "decoder.actor_decoder.layers.2.norm2.weight",
+      "decoder.actor_decoder.layers.2.norm2.bias",
+      "decoder.actor_decoder.layers.2.norm3.weight",
+      "decoder.actor_decoder.layers.2.norm3.bias",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.linear1.weight",
+      "decoder.actor_decoder.layers.3.linear1.bias",
+      "decoder.actor_decoder.layers.3.linear2.weight",
+      "decoder.actor_decoder.layers.3.linear2.bias",
+      "decoder.actor_decoder.layers.3.norm1.weight",
+      "decoder.actor_decoder.layers.3.norm1.bias",
+      "decoder.actor_decoder.layers.3.norm2.weight",
+      "decoder.actor_decoder.layers.3.norm2.bias",
+      "decoder.actor_decoder.layers.3.norm3.weight",
+      "decoder.actor_decoder.layers.3.norm3.bias",
+      "decoder.revealer_mean.weight",
+      "decoder.revealer_mean.bias",
+      "decoder.revealer_log_std.weight",
+      "decoder.revealer_log_std.bias",
+      "decoder.actor_mean.weight",
+      "decoder.actor_mean.bias",
+      "decoder.actor_log_std.weight",
+      "decoder.actor_log_std.bias",
+      "decoder.proposal_score.0.weight",
+      "decoder.proposal_score.0.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias"
+    ],
+    "missing_keys": [
+      "backbone.depth_adapter.depth_proj.0.weight",
+      "backbone.depth_adapter.depth_proj.0.bias",
+      "backbone.depth_adapter.depth_proj.1.weight",
+      "backbone.depth_adapter.depth_proj.1.bias",
+      "backbone.depth_adapter.depth_proj.3.weight",
+      "backbone.depth_adapter.depth_proj.3.bias",
+      "backbone.depth_adapter.geometry_proj.0.weight",
+      "backbone.depth_adapter.geometry_proj.0.bias",
+      "backbone.depth_adapter.geometry_proj.1.weight",
+      "backbone.depth_adapter.geometry_proj.1.bias",
+      "backbone.depth_adapter.camera_proj.0.weight",
+      "backbone.depth_adapter.camera_proj.0.bias",
+      "backbone.depth_adapter.camera_proj.1.weight",
+      "backbone.depth_adapter.camera_proj.1.bias",
+      "fusion.geometry_fusion.attn.in_proj_weight",
+      "fusion.geometry_fusion.attn.in_proj_bias",
+      "fusion.geometry_fusion.attn.out_proj.weight",
+      "fusion.geometry_fusion.attn.out_proj.bias",
+      "fusion.geometry_fusion.gate.0.weight",
+      "fusion.geometry_fusion.gate.0.bias",
+      "fusion.geometry_fusion.gate.1.weight",
+      "fusion.geometry_fusion.gate.1.bias",
+      "fusion.geometry_fusion.gate.3.weight",
+      "fusion.geometry_fusion.gate.3.bias",
+      "fusion.geometry_fusion.out.0.weight",
+      "fusion.geometry_fusion.out.0.bias",
+      "fusion.geometry_fusion.out.1.weight",
+      "fusion.geometry_fusion.out.1.bias",
+      "memory.scene_memory.position_embedding",
+      "memory.scene_memory.bank_queries",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.scene_memory.bank_attention.in_proj_weight",
+      "memory.scene_memory.bank_attention.in_proj_bias",
+      "memory.scene_memory.bank_attention.out_proj.weight",
+      "memory.scene_memory.bank_attention.out_proj.bias",
+      "memory.scene_memory.action_proj.0.weight",
+      "memory.scene_memory.action_proj.0.bias",
+      "memory.scene_memory.action_proj.1.weight",
+      "memory.scene_memory.action_proj.1.bias",
+      "memory.scene_memory.write_gate.0.weight",
+      "memory.scene_memory.write_gate.0.bias",
+      "memory.scene_memory.write_gate.1.weight",
+      "memory.scene_memory.write_gate.1.bias",
+      "memory.scene_memory.write_gate.3.weight",
+      "memory.scene_memory.write_gate.3.bias",
+      "memory.scene_memory.token_proj.0.weight",
+      "memory.scene_memory.token_proj.0.bias",
+      "memory.scene_memory.token_proj.1.weight",
+      "memory.scene_memory.token_proj.1.bias",
+      "memory.belief_memory.position_embedding",
+      "memory.belief_memory.bank_queries",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.belief_memory.bank_attention.in_proj_weight",
+      "memory.belief_memory.bank_attention.in_proj_bias",
+      "memory.belief_memory.bank_attention.out_proj.weight",
+      "memory.belief_memory.bank_attention.out_proj.bias",
+      "memory.belief_memory.action_proj.0.weight",
+      "memory.belief_memory.action_proj.0.bias",
+      "memory.belief_memory.action_proj.1.weight",
+      "memory.belief_memory.action_proj.1.bias",
+      "memory.belief_memory.write_gate.0.weight",
+      "memory.belief_memory.write_gate.0.bias",
+      "memory.belief_memory.write_gate.1.weight",
+      "memory.belief_memory.write_gate.1.bias",
+      "memory.belief_memory.write_gate.3.weight",
+      "memory.belief_memory.write_gate.3.bias",
+      "memory.belief_memory.token_proj.0.weight",
+      "memory.belief_memory.token_proj.0.bias",
+      "memory.belief_memory.token_proj.1.weight",
+      "memory.belief_memory.token_proj.1.bias",
+      "decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.0.linear1.weight",
+      "decoder.arm_decoder.layers.0.linear1.bias",
+      "decoder.arm_decoder.layers.0.linear2.weight",
+      "decoder.arm_decoder.layers.0.linear2.bias",
+      "decoder.arm_decoder.layers.0.norm1.weight",
+      "decoder.arm_decoder.layers.0.norm1.bias",
+      "decoder.arm_decoder.layers.0.norm2.weight",
+      "decoder.arm_decoder.layers.0.norm2.bias",
+      "decoder.arm_decoder.layers.0.norm3.weight",
+      "decoder.arm_decoder.layers.0.norm3.bias",
+      "decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.1.linear1.weight",
+      "decoder.arm_decoder.layers.1.linear1.bias",
+      "decoder.arm_decoder.layers.1.linear2.weight",
+      "decoder.arm_decoder.layers.1.linear2.bias",
+      "decoder.arm_decoder.layers.1.norm1.weight",
+      "decoder.arm_decoder.layers.1.norm1.bias",
+      "decoder.arm_decoder.layers.1.norm2.weight",
+      "decoder.arm_decoder.layers.1.norm2.bias",
+      "decoder.arm_decoder.layers.1.norm3.weight",
+      "decoder.arm_decoder.layers.1.norm3.bias",
+      "decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.2.linear1.weight",
+      "decoder.arm_decoder.layers.2.linear1.bias",
+      "decoder.arm_decoder.layers.2.linear2.weight",
+      "decoder.arm_decoder.layers.2.linear2.bias",
+      "decoder.arm_decoder.layers.2.norm1.weight",
+      "decoder.arm_decoder.layers.2.norm1.bias",
+      "decoder.arm_decoder.layers.2.norm2.weight",
+      "decoder.arm_decoder.layers.2.norm2.bias",
+      "decoder.arm_decoder.layers.2.norm3.weight",
+      "decoder.arm_decoder.layers.2.norm3.bias",
+      "decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.3.linear1.weight",
+      "decoder.arm_decoder.layers.3.linear1.bias",
+      "decoder.arm_decoder.layers.3.linear2.weight",
+      "decoder.arm_decoder.layers.3.linear2.bias",
+      "decoder.arm_decoder.layers.3.norm1.weight",
+      "decoder.arm_decoder.layers.3.norm1.bias",
+      "decoder.arm_decoder.layers.3.norm2.weight",
+      "decoder.arm_decoder.layers.3.norm2.bias",
+      "decoder.arm_decoder.layers.3.norm3.weight",
+      "decoder.arm_decoder.layers.3.norm3.bias",
+      "decoder.arm_identity.weight",
+      "decoder.phase_adapter.weight",
+      "decoder.phase_adapter.bias",
+      "decoder.role_adapter.weight",
+      "decoder.role_adapter.bias",
+      "decoder.context_proj.0.weight",
+      "decoder.context_proj.0.bias",
+      "decoder.context_proj.1.weight",
+      "decoder.context_proj.1.bias",
+      "decoder.arm_head.0.weight",
+      "decoder.arm_head.0.bias",
+      "decoder.arm_head.1.weight",
+      "decoder.arm_head.1.bias",
+      "decoder.arm_mean.weight",
+      "decoder.arm_mean.bias",
+      "decoder.arm_log_std.weight",
+      "decoder.arm_log_std.bias",
+      "decoder.proposal_mode_head.0.weight",
+      "decoder.proposal_mode_head.0.bias",
+      "decoder.proposal_mode_head.1.weight",
+      "decoder.proposal_mode_head.1.bias",
+      "decoder.proposal_mode_head.3.weight",
+      "decoder.proposal_mode_head.3.bias",
+      "decoder.proposal_mode_embeddings.weight",
+      "decoder.proposal_slot_embeddings.weight",
+      "decoder.mode_residual_heads.0.0.weight",
+      "decoder.mode_residual_heads.0.0.bias",
+      "decoder.mode_residual_heads.0.1.weight",
+      "decoder.mode_residual_heads.0.1.bias",
+      "decoder.mode_residual_heads.0.3.weight",
+      "decoder.mode_residual_heads.0.3.bias",
+      "decoder.mode_residual_heads.1.0.weight",
+      "decoder.mode_residual_heads.1.0.bias",
+      "decoder.mode_residual_heads.1.1.weight",
+      "decoder.mode_residual_heads.1.1.bias",
+      "decoder.mode_residual_heads.1.3.weight",
+      "decoder.mode_residual_heads.1.3.bias",
+      "decoder.mode_residual_heads.2.0.weight",
+      "decoder.mode_residual_heads.2.0.bias",
+      "decoder.mode_residual_heads.2.1.weight",
+      "decoder.mode_residual_heads.2.1.bias",
+      "decoder.mode_residual_heads.2.3.weight",
+      "decoder.mode_residual_heads.2.3.bias",
+      "decoder.mode_residual_heads.3.0.weight",
+      "decoder.mode_residual_heads.3.0.bias",
+      "decoder.mode_residual_heads.3.1.weight",
+      "decoder.mode_residual_heads.3.1.bias",
+      "decoder.mode_residual_heads.3.3.weight",
+      "decoder.mode_residual_heads.3.3.bias",
+      "decoder.mode_residual_heads.4.0.weight",
+      "decoder.mode_residual_heads.4.0.bias",
+      "decoder.mode_residual_heads.4.1.weight",
+      "decoder.mode_residual_heads.4.1.bias",
+      "decoder.mode_residual_heads.4.3.weight",
+      "decoder.mode_residual_heads.4.3.bias",
+      "decoder.mode_residual_heads.5.0.weight",
+      "decoder.mode_residual_heads.5.0.bias",
+      "decoder.mode_residual_heads.5.1.weight",
+      "decoder.mode_residual_heads.5.1.bias",
+      "decoder.mode_residual_heads.5.3.weight",
+      "decoder.mode_residual_heads.5.3.bias",
+      "decoder.slot_delta.0.weight",
+      "decoder.slot_delta.0.bias",
+      "decoder.slot_delta.1.weight",
+      "decoder.slot_delta.1.bias",
+      "decoder.slot_delta.3.weight",
+      "decoder.slot_delta.3.bias",
+      "decoder.proposal_score.0.weight",
+      "decoder.proposal_score.0.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias",
+      "decoder.proposal_score.3.weight",
+      "decoder.proposal_score.3.bias",
+      "elastic_state_head.interaction_queries",
+      "elastic_state_head.interaction_attention.in_proj_weight",
+      "elastic_state_head.interaction_attention.in_proj_bias",
+      "elastic_state_head.interaction_attention.out_proj.weight",
+      "elastic_state_head.interaction_attention.out_proj.bias",
+      "elastic_state_head.interaction_mlp.0.weight",
+      "elastic_state_head.interaction_mlp.0.bias",
+      "elastic_state_head.interaction_mlp.1.weight",
+      "elastic_state_head.interaction_mlp.1.bias",
+      "elastic_state_head.interaction_mlp.3.weight",
+      "elastic_state_head.interaction_mlp.3.bias",
+      "elastic_state_head.decoder.field_queries",
+      "elastic_state_head.decoder.field_attention.in_proj_weight",
+      "elastic_state_head.decoder.field_attention.in_proj_bias",
+      "elastic_state_head.decoder.field_attention.out_proj.weight",
+      "elastic_state_head.decoder.field_attention.out_proj.bias",
+      "elastic_state_head.decoder.field_mlp.0.weight",
+      "elastic_state_head.decoder.field_mlp.0.bias",
+      "elastic_state_head.decoder.field_mlp.1.weight",
+      "elastic_state_head.decoder.field_mlp.1.bias",
+      "elastic_state_head.decoder.field_mlp.3.weight",
+      "elastic_state_head.decoder.field_mlp.3.bias",
+      "elastic_state_head.decoder.summary_proj.0.weight",
+      "elastic_state_head.decoder.summary_proj.0.bias",
+      "elastic_state_head.decoder.summary_proj.1.weight",
+      "elastic_state_head.decoder.summary_proj.1.bias",
+      "elastic_state_head.decoder.phase_head.0.weight",
+      "elastic_state_head.decoder.phase_head.0.bias",
+      "elastic_state_head.decoder.phase_head.1.weight",
+      "elastic_state_head.decoder.phase_head.1.bias",
+      "elastic_state_head.decoder.phase_head.3.weight",
+      "elastic_state_head.decoder.phase_head.3.bias",
+      "elastic_state_head.decoder.arm_role_head.0.weight",
+      "elastic_state_head.decoder.arm_role_head.0.bias",
+      "elastic_state_head.decoder.arm_role_head.1.weight",
+      "elastic_state_head.decoder.arm_role_head.1.bias",
+      "elastic_state_head.decoder.arm_role_head.3.weight",
+      "elastic_state_head.decoder.arm_role_head.3.bias",
+      "elastic_state_head.decoder.arm_identity.weight",
+      "elastic_state_head.decoder.support_mode.0.weight",
+      "elastic_state_head.decoder.support_mode.0.bias",
+      "elastic_state_head.decoder.support_mode.1.weight",
+      "elastic_state_head.decoder.support_mode.1.bias",
+      "elastic_state_head.decoder.support_mode.3.weight",
+      "elastic_state_head.decoder.support_mode.3.bias",
+      "elastic_state_head.decoder.access_field.weight",
+      "elastic_state_head.decoder.access_field.bias",
+      "elastic_state_head.decoder.target_belief_field.weight",
+      "elastic_state_head.decoder.target_belief_field.bias",
+      "elastic_state_head.decoder.visibility_field.weight",
+      "elastic_state_head.decoder.visibility_field.bias",
+      "elastic_state_head.decoder.clearance_field.weight",
+      "elastic_state_head.decoder.clearance_field.bias",
+      "elastic_state_head.decoder.occluder_contact_field.weight",
+      "elastic_state_head.decoder.occluder_contact_field.bias",
+      "elastic_state_head.decoder.grasp_affordance_field.weight",
+      "elastic_state_head.decoder.grasp_affordance_field.bias",
+      "elastic_state_head.decoder.support_stability_field.weight",
+      "elastic_state_head.decoder.support_stability_field.bias",
+      "elastic_state_head.decoder.persistence_field.weight",
+      "elastic_state_head.decoder.persistence_field.bias",
+      "elastic_state_head.decoder.reocclusion_field.weight",
+      "elastic_state_head.decoder.reocclusion_field.bias",
+      "elastic_state_head.decoder.disturbance_field.weight",
+      "elastic_state_head.decoder.disturbance_field.bias",
+      "elastic_state_head.decoder.uncertainty_field.weight",
+      "elastic_state_head.decoder.uncertainty_field.bias",
+      "elastic_state_head.decoder.reocclusion_head.0.weight",
+      "elastic_state_head.decoder.reocclusion_head.0.bias",
+      "elastic_state_head.decoder.reocclusion_head.1.weight",
+      "elastic_state_head.decoder.reocclusion_head.1.bias",
+      "elastic_state_head.decoder.reocclusion_head.3.weight",
+      "elastic_state_head.decoder.reocclusion_head.3.bias",
+      "world_model.state_encoder.0.weight",
+      "world_model.state_encoder.0.bias",
+      "world_model.state_encoder.1.weight",
+      "world_model.state_encoder.1.bias",
+      "world_model.scene_memory_proj.0.weight",
+      "world_model.scene_memory_proj.0.bias",
+      "world_model.scene_memory_proj.1.weight",
+      "world_model.scene_memory_proj.1.bias",
+      "world_model.belief_memory_proj.0.weight",
+      "world_model.belief_memory_proj.0.bias",
+      "world_model.belief_memory_proj.1.weight",
+      "world_model.belief_memory_proj.1.bias",
+      "world_model.action_encoder.0.weight",
+      "world_model.action_encoder.0.bias",
+      "world_model.action_encoder.1.weight",
+      "world_model.action_encoder.1.bias",
+      "world_model.transition.weight_ih",
+      "world_model.transition.weight_hh",
+      "world_model.transition.bias_ih",
+      "world_model.transition.bias_hh",
+      "world_model.scene_memory_update.weight",
+      "world_model.scene_memory_update.bias",
+      "world_model.belief_memory_update.weight",
+      "world_model.belief_memory_update.bias",
+      "world_model.compact_decoder.weight",
+      "world_model.compact_decoder.bias",
+      "world_model.target_belief_head.weight",
+      "world_model.target_belief_head.bias",
+      "world_model.visibility_head.weight",
+      "world_model.visibility_head.bias",
+      "world_model.clearance_head.weight",
+      "world_model.clearance_head.bias",
+      "world_model.occluder_contact_head.weight",
+      "world_model.occluder_contact_head.bias",
+      "world_model.grasp_affordance_head.weight",
+      "world_model.grasp_affordance_head.bias",
+      "world_model.support_stability_head.weight",
+      "world_model.support_stability_head.bias",
+      "world_model.persistence_head.weight",
+      "world_model.persistence_head.bias",
+      "world_model.reocclusion_head.weight",
+      "world_model.reocclusion_head.bias",
+      "world_model.disturbance_head.weight",
+      "world_model.disturbance_head.bias",
+      "world_model.uncertainty_head.weight",
+      "world_model.uncertainty_head.bias",
+      "world_model.access_head.weight",
+      "world_model.access_head.bias",
+      "planner.residual.trunk.0.weight",
+      "planner.residual.trunk.0.bias",
+      "planner.residual.trunk.1.weight",
+      "planner.residual.trunk.1.bias",
+      "planner.residual.trunk.3.weight",
+      "planner.residual.trunk.3.bias",
+      "planner.residual.success_head.weight",
+      "planner.residual.success_head.bias",
+      "planner.residual.risk_head.weight",
+      "planner.residual.risk_head.bias",
+      "planner.residual.residual_head.weight",
+      "planner.residual.residual_head.bias"
+    ],
+    "unexpected_keys": []
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..f9f4afaf8b7e90803ec0958bb386e00ff8fb571a
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.5,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5694444444444443,
+    "visibility_integral": 32.623872251146366,
+    "corridor_availability": 0.889709601799647,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.1627785900766536,
+    "disturbance_cost": 0.2332938505957524
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..f18ad9ff0d4589962fc44ae064bebaa8a51fd460
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/checkpoint_best.pt
+- mean_success: 0.569
+- visibility_integral: 32.624
+- corridor_availability: 0.890
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.163
+- disturbance_cost: 0.233
+- foliage_proxy_success: 0.500
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.667
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/config_resolved.yaml b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..426f4d7345307a7df8fe8fff3536b2bef0b1a763
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/config_resolved.yaml
@@ -0,0 +1,149 @@
+experiment_name: proxy_interaction_r3d_stage1_clip_seed9
+output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
+device: cuda
+seed: 9
+init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
+init_strict: false
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 224
+  dataset_version: reveal_proxy_v6_rgbd_elastic_state
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage1_seed9.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage1_seed9.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 9
+optim:
+  epochs: 4
+  batch_size: 2
+  num_workers: 4
+  lr: 0.0003
+  weight_decay: 0.0001
+trainer:
+  policy_type: elastic_reveal
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+  use_depth: false
+  use_world_model: true
+  use_role_tokens: true
+  compute_equivariance_probe: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 512
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: false
+  fusion:
+    hidden_dim: 512
+    num_cameras: 3
+    num_layers: 4
+    num_heads: 8
+    ff_dim: 2048
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 512
+    action_dim: 14
+    history_steps: 6
+    scene_history_steps: 3
+    belief_history_steps: 8
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    scene_bank_size: 2
+    belief_bank_size: 2
+    num_heads: 8
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 4
+    ff_dim: 2048
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_proposal_modes: 6
+    planner_top_k: 4
+  reveal_head:
+    hidden_dim: 512
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 8
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 512
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+    scene_bank_size: 2
+    belief_bank_size: 2
+  planner:
+    hidden_dim: 512
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 8
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+    top_k: 4
+loss_weights:
+  action: 1.0
+  phase: 0.1
+  arm_role: 0.15
+  support_mode: 0.1
+  corridor: 0.15
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.2
+  belief: 0.05
+  visibility: 0.05
+  clearance: 0.05
+  support_stability: 0.05
+  reocclusion: 0.05
+  occluder_contact: 0.05
+  grasp_affordance: 0.05
+  planner_success: 0.25
+  planner_risk: 0.1
+  planner_ranking: 0.2
+  proposal_reconstruction: 0.1
+  proposal_success: 0.15
+  proposal_ranking: 0.2
+  proposal_diversity: 0.05
+  role_swap_consistency: 0.05
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/diagnostics_full/proxy_diagnostics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/diagnostics_full/proxy_diagnostics.json
new file mode 100644
index 0000000000000000000000000000000000000000..2647e97370154cf256897d5dd2051143d5f74d5c
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/diagnostics_full/proxy_diagnostics.json
@@ -0,0 +1,16 @@
+{
+  "planner_top1_accuracy": 0.2890625,
+  "planner_regret": 0.02300698682665825,
+  "planner_score_utility_spearman": 0.22968751192092896,
+  "risk_calibration_mse": 0.010304542258381844,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.022611485794186592,
+  "left_right_equivariance_error": 8.689248215887346e-05,
+  "belief_calibration_brier": 0.0043337177485227585,
+  "reocclusion_calibration_brier": 0.22800305485725403,
+  "support_stability_mae": 0.02859283983707428,
+  "clearance_auc": 0.6329041426155311,
+  "memory_write_rate": 0.0,
+  "memory_saturation": 0.2469944953918457,
+  "num_samples": 128
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/metrics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..599d894dece12750e4f4bbe429d7b3385b8d9cbf
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/metrics.json
@@ -0,0 +1,230 @@
+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.027812569460978633,
+      "arm_role": 0.030137697646492406,
+      "belief": 0.12157152328444154,
+      "clearance": 0.09282162053216444,
+      "corridor": 0.2851656379864404,
+      "disturbance": 0.004553798715077344,
+      "grasp_affordance": 0.018851539715634365,
+      "occluder_contact": 0.2132460696916831,
+      "persistence": 5.642576662878807,
+      "phase": 0.7761939600894325,
+      "planner_ranking": 0.17902961440620282,
+      "planner_risk": 0.013923984336035668,
+      "planner_success": 0.6199151214800382,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.2823116054660395,
+      "proposal_reconstruction": 0.06912861580127164,
+      "proposal_success": 0.6811760576147782,
+      "reocclusion": 0.7353295496419856,
+      "role_swap_consistency": 0.0005873552748725113,
+      "support_mode": 0.7828435195119757,
+      "support_stability": 0.16347284512594343,
+      "total": 1.6866143584251403,
+      "uncertainty": 0.019001170223897423,
+      "visibility": 0.11754893544865282,
+      "world_model": 2.710779071795313
+    },
+    "val": {
+      "action": 0.02170204828144051,
+      "arm_role": 6.762321064002208e-06,
+      "belief": 0.10080993873998523,
+      "clearance": 0.08166962582617998,
+      "corridor": 0.23909102065954357,
+      "disturbance": 0.001983066906802833,
+      "grasp_affordance": 0.008535019573173486,
+      "occluder_contact": 0.2112727805506438,
+      "persistence": 3.857563339173794,
+      "phase": 0.6654304726980627,
+      "planner_ranking": 0.04032187890697969,
+      "planner_risk": 0.011350331830726645,
+      "planner_success": 0.5934910103678703,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1493350621312857,
+      "proposal_reconstruction": 0.06338102876907215,
+      "proposal_success": 0.6806164355948567,
+      "reocclusion": 0.6909330077469349,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6831411011517048,
+      "support_stability": 0.13910080850473605,
+      "total": 1.458911145105958,
+      "uncertainty": 0.0033405375688744243,
+      "visibility": 0.09547075629234314,
+      "world_model": 2.5560860373079777
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.023493385471795733,
+      "arm_role": 0.0002928718140250758,
+      "belief": 0.10523007610126546,
+      "clearance": 0.08677955961933262,
+      "corridor": 0.25750191186211613,
+      "disturbance": 0.0031594517295421777,
+      "grasp_affordance": 0.01005841078187682,
+      "occluder_contact": 0.20920588836858148,
+      "persistence": 4.331643560058192,
+      "phase": 0.7189607319078948,
+      "planner_ranking": 0.05423959079287933,
+      "planner_risk": 0.010427037446980217,
+      "planner_success": 0.5849820621703801,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1505002517449228,
+      "proposal_reconstruction": 0.06525950771021216,
+      "proposal_success": 0.6752778025049913,
+      "reocclusion": 0.7005268357302014,
+      "role_swap_consistency": 0.0007142310405278726,
+      "support_mode": 0.70107421875,
+      "support_stability": 0.14081861141480898,
+      "total": 1.432289683818817,
+      "uncertainty": 0.002551493341237993,
+      "visibility": 0.10134971671198544,
+      "world_model": 2.237849539204648
+    },
+    "val": {
+      "action": 0.021186921891057864,
+      "arm_role": 3.6694105953749556e-07,
+      "belief": 0.09995241661090404,
+      "clearance": 0.08146111795213073,
+      "corridor": 0.24082361184991896,
+      "disturbance": 0.001976304362585779,
+      "grasp_affordance": 0.00922958003502572,
+      "occluder_contact": 0.21127386414445937,
+      "persistence": 3.7571401111781597,
+      "phase": 0.6817005267366767,
+      "planner_ranking": 0.03515352255374182,
+      "planner_risk": 0.01038273600534012,
+      "planner_success": 0.5073812543414533,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1285581476986408,
+      "proposal_reconstruction": 0.0629420520272106,
+      "proposal_success": 0.6745674163103104,
+      "reocclusion": 0.6919681001454592,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6647901809774339,
+      "support_stability": 0.14570825529517606,
+      "total": 1.3415670674294233,
+      "uncertainty": 0.0013466343752952525,
+      "visibility": 0.09475092665525153,
+      "world_model": 2.1340785464271903
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.021538028542540576,
+      "arm_role": 2.1901883577045642e-05,
+      "belief": 0.10526431232298675,
+      "clearance": 0.08594944182979433,
+      "corridor": 0.24735975777240177,
+      "disturbance": 0.0026733651749964336,
+      "grasp_affordance": 0.010091915089440974,
+      "occluder_contact": 0.20871730721310566,
+      "persistence": 4.281911664887478,
+      "phase": 0.6870194284539474,
+      "planner_ranking": 0.04152601579832519,
+      "planner_risk": 0.01045033406331449,
+      "planner_success": 0.5353652712545897,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1453557397189893,
+      "proposal_reconstruction": 0.06370952629337186,
+      "proposal_success": 0.6778088651205364,
+      "reocclusion": 0.6986164701612372,
+      "role_swap_consistency": 0.0004750598012929243,
+      "support_mode": 0.6878212376644737,
+      "support_stability": 0.1362508504700504,
+      "total": 1.384049719885776,
+      "uncertainty": 0.001396400365047157,
+      "visibility": 0.09892214826847377,
+      "world_model": 2.1307888821551675
+    },
+    "val": {
+      "action": 0.021681111145881005,
+      "arm_role": 0.0003864255304506514,
+      "belief": 0.10844068287406117,
+      "clearance": 0.08775011514080688,
+      "corridor": 0.23830276518128812,
+      "disturbance": 0.0019835491895037194,
+      "grasp_affordance": 0.011450761739979498,
+      "occluder_contact": 0.21598492935299873,
+      "persistence": 3.682887438684702,
+      "phase": 0.6754010105505586,
+      "planner_ranking": 0.03584061572041719,
+      "planner_risk": 0.010325502114255869,
+      "planner_success": 0.49944606237113476,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1196386851370335,
+      "proposal_reconstruction": 0.0637086319620721,
+      "proposal_success": 0.6784614324569702,
+      "reocclusion": 0.6908501861616969,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6635435968637466,
+      "support_stability": 0.14290154923219234,
+      "total": 1.3013203730806708,
+      "uncertainty": 0.002612559406315995,
+      "visibility": 0.10054636449785903,
+      "world_model": 1.9632274899631739
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.02116909674123714,
+      "arm_role": 0.00017300687338176526,
+      "belief": 0.10208533270970771,
+      "clearance": 0.08287150121637081,
+      "corridor": 0.24314571875882776,
+      "disturbance": 0.002553280315360577,
+      "grasp_affordance": 0.010202447837218642,
+      "occluder_contact": 0.20370756677891078,
+      "persistence": 3.4343402633541507,
+      "phase": 0.6811472039473684,
+      "planner_ranking": 0.03300265433170257,
+      "planner_risk": 0.010154466018828221,
+      "planner_success": 0.5132313249338615,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1288216785380716,
+      "proposal_reconstruction": 0.06323393973472871,
+      "proposal_success": 0.6770071575516149,
+      "reocclusion": 0.7064933630980943,
+      "role_swap_consistency": 0.0003766025873023625,
+      "support_mode": 0.7007555509868421,
+      "support_stability": 0.1340178519732466,
+      "total": 1.314924956309168,
+      "uncertainty": 0.0012071453580622467,
+      "visibility": 0.09558045302370662,
+      "world_model": 2.054408212398228
+    },
+    "val": {
+      "action": 0.021696553943911567,
+      "arm_role": 6.053594985289124e-07,
+      "belief": 0.0983218071050942,
+      "clearance": 0.07689482159912586,
+      "corridor": 0.29242096332018264,
+      "disturbance": 0.0041615761442699295,
+      "grasp_affordance": 0.0100187708158046,
+      "occluder_contact": 0.19618010916747153,
+      "persistence": 4.662721422035247,
+      "phase": 0.6692422716878355,
+      "planner_ranking": 0.030305169929533804,
+      "planner_risk": 0.010842124038390466,
+      "planner_success": 0.5005343491211534,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1591037698090076,
+      "proposal_reconstruction": 0.06389545585261658,
+      "proposal_success": 0.6826766086742282,
+      "reocclusion": 0.7785650952719152,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6616131067276001,
+      "support_stability": 0.1388778503460344,
+      "total": 1.3739404007792473,
+      "uncertainty": 2.288464340693963e-05,
+      "visibility": 0.09415236074710265,
+      "world_model": 1.9970475500449538
+    }
+  }
+]
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/summary.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..32d58b6061f625b571b5d8838426b6fce2c685f1
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/summary.json
@@ -0,0 +1,557 @@
+{
+  "experiment_name": "proxy_interaction_r3d_stage1_clip_seed9",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/checkpoint_best.pt",
+  "final_train_total": 1.314924956309168,
+  "final_val_total": 1.3739404007792473,
+  "train_time_sec": 146.7574381828308,
+  "peak_gpu_memory_mb": 1915.8154296875,
+  "num_train_samples": 380,
+  "num_val_samples": 128,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": {
+    "path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
+    "loaded_keys": 461,
+    "skipped_shape_mismatch_keys": [
+      "memory.gru.weight_ih_l0",
+      "memory.gru.weight_hh_l0",
+      "memory.gru.bias_ih_l0",
+      "memory.gru.bias_hh_l0",
+      "memory.token_proj.0.weight",
+      "memory.token_proj.0.bias",
+      "memory.token_proj.1.weight",
+      "memory.token_proj.1.bias",
+      "decoder.actor_role_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.linear1.weight",
+      "decoder.revealer_decoder.layers.0.linear1.bias",
+      "decoder.revealer_decoder.layers.0.linear2.weight",
+      "decoder.revealer_decoder.layers.0.linear2.bias",
+      "decoder.revealer_decoder.layers.0.norm1.weight",
+      "decoder.revealer_decoder.layers.0.norm1.bias",
+      "decoder.revealer_decoder.layers.0.norm2.weight",
+      "decoder.revealer_decoder.layers.0.norm2.bias",
+      "decoder.revealer_decoder.layers.0.norm3.weight",
+      "decoder.revealer_decoder.layers.0.norm3.bias",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.linear1.weight",
+      "decoder.revealer_decoder.layers.1.linear1.bias",
+      "decoder.revealer_decoder.layers.1.linear2.weight",
+      "decoder.revealer_decoder.layers.1.linear2.bias",
+      "decoder.revealer_decoder.layers.1.norm1.weight",
+      "decoder.revealer_decoder.layers.1.norm1.bias",
+      "decoder.revealer_decoder.layers.1.norm2.weight",
+      "decoder.revealer_decoder.layers.1.norm2.bias",
+      "decoder.revealer_decoder.layers.1.norm3.weight",
+      "decoder.revealer_decoder.layers.1.norm3.bias",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.linear1.weight",
+      "decoder.revealer_decoder.layers.2.linear1.bias",
+      "decoder.revealer_decoder.layers.2.linear2.weight",
+      "decoder.revealer_decoder.layers.2.linear2.bias",
+      "decoder.revealer_decoder.layers.2.norm1.weight",
+      "decoder.revealer_decoder.layers.2.norm1.bias",
+      "decoder.revealer_decoder.layers.2.norm2.weight",
+      "decoder.revealer_decoder.layers.2.norm2.bias",
+      "decoder.revealer_decoder.layers.2.norm3.weight",
+      "decoder.revealer_decoder.layers.2.norm3.bias",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.linear1.weight",
+      "decoder.revealer_decoder.layers.3.linear1.bias",
+      "decoder.revealer_decoder.layers.3.linear2.weight",
+      "decoder.revealer_decoder.layers.3.linear2.bias",
+      "decoder.revealer_decoder.layers.3.norm1.weight",
+      "decoder.revealer_decoder.layers.3.norm1.bias",
+      "decoder.revealer_decoder.layers.3.norm2.weight",
+      "decoder.revealer_decoder.layers.3.norm2.bias",
+      "decoder.revealer_decoder.layers.3.norm3.weight",
+      "decoder.revealer_decoder.layers.3.norm3.bias",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.linear1.weight",
+      "decoder.actor_decoder.layers.0.linear1.bias",
+      "decoder.actor_decoder.layers.0.linear2.weight",
+      "decoder.actor_decoder.layers.0.linear2.bias",
+      "decoder.actor_decoder.layers.0.norm1.weight",
+      "decoder.actor_decoder.layers.0.norm1.bias",
+      "decoder.actor_decoder.layers.0.norm2.weight",
+      "decoder.actor_decoder.layers.0.norm2.bias",
+      "decoder.actor_decoder.layers.0.norm3.weight",
+      "decoder.actor_decoder.layers.0.norm3.bias",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.linear1.weight",
+      "decoder.actor_decoder.layers.1.linear1.bias",
+      "decoder.actor_decoder.layers.1.linear2.weight",
+      "decoder.actor_decoder.layers.1.linear2.bias",
+      "decoder.actor_decoder.layers.1.norm1.weight",
+      "decoder.actor_decoder.layers.1.norm1.bias",
+      "decoder.actor_decoder.layers.1.norm2.weight",
+      "decoder.actor_decoder.layers.1.norm2.bias",
+      "decoder.actor_decoder.layers.1.norm3.weight",
+      "decoder.actor_decoder.layers.1.norm3.bias",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.linear1.weight",
+      "decoder.actor_decoder.layers.2.linear1.bias",
+      "decoder.actor_decoder.layers.2.linear2.weight",
+      "decoder.actor_decoder.layers.2.linear2.bias",
+      "decoder.actor_decoder.layers.2.norm1.weight",
+      "decoder.actor_decoder.layers.2.norm1.bias",
+      "decoder.actor_decoder.layers.2.norm2.weight",
+      "decoder.actor_decoder.layers.2.norm2.bias",
+      "decoder.actor_decoder.layers.2.norm3.weight",
+      "decoder.actor_decoder.layers.2.norm3.bias",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.linear1.weight",
+      "decoder.actor_decoder.layers.3.linear1.bias",
+      "decoder.actor_decoder.layers.3.linear2.weight",
+      "decoder.actor_decoder.layers.3.linear2.bias",
+      "decoder.actor_decoder.layers.3.norm1.weight",
+      "decoder.actor_decoder.layers.3.norm1.bias",
+      "decoder.actor_decoder.layers.3.norm2.weight",
+      "decoder.actor_decoder.layers.3.norm2.bias",
+      "decoder.actor_decoder.layers.3.norm3.weight",
+      "decoder.actor_decoder.layers.3.norm3.bias",
+      "decoder.revealer_mean.weight",
+      "decoder.revealer_mean.bias",
+      "decoder.revealer_log_std.weight",
+      "decoder.revealer_log_std.bias",
+      "decoder.actor_mean.weight",
+      "decoder.actor_mean.bias",
+      "decoder.actor_log_std.weight",
+      "decoder.actor_log_std.bias",
+      "decoder.proposal_score.0.weight",
+      "decoder.proposal_score.0.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias"
+    ],
+    "missing_keys": [
+      "backbone.depth_adapter.depth_proj.0.weight",
+      "backbone.depth_adapter.depth_proj.0.bias",
+      "backbone.depth_adapter.depth_proj.1.weight",
+      "backbone.depth_adapter.depth_proj.1.bias",
+      "backbone.depth_adapter.depth_proj.3.weight",
+      "backbone.depth_adapter.depth_proj.3.bias",
+      "backbone.depth_adapter.geometry_proj.0.weight",
+      "backbone.depth_adapter.geometry_proj.0.bias",
+      "backbone.depth_adapter.geometry_proj.1.weight",
+      "backbone.depth_adapter.geometry_proj.1.bias",
+      "backbone.depth_adapter.camera_proj.0.weight",
+      "backbone.depth_adapter.camera_proj.0.bias",
+      "backbone.depth_adapter.camera_proj.1.weight",
+      "backbone.depth_adapter.camera_proj.1.bias",
+      "fusion.geometry_fusion.attn.in_proj_weight",
+      "fusion.geometry_fusion.attn.in_proj_bias",
+      "fusion.geometry_fusion.attn.out_proj.weight",
+      "fusion.geometry_fusion.attn.out_proj.bias",
+      "fusion.geometry_fusion.gate.0.weight",
+      "fusion.geometry_fusion.gate.0.bias",
+      "fusion.geometry_fusion.gate.1.weight",
+      "fusion.geometry_fusion.gate.1.bias",
+      "fusion.geometry_fusion.gate.3.weight",
+      "fusion.geometry_fusion.gate.3.bias",
+      "fusion.geometry_fusion.out.0.weight",
+      "fusion.geometry_fusion.out.0.bias",
+      "fusion.geometry_fusion.out.1.weight",
+      "fusion.geometry_fusion.out.1.bias",
+      "memory.scene_memory.position_embedding",
+      "memory.scene_memory.bank_queries",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.scene_memory.bank_attention.in_proj_weight",
+      "memory.scene_memory.bank_attention.in_proj_bias",
+      "memory.scene_memory.bank_attention.out_proj.weight",
+      "memory.scene_memory.bank_attention.out_proj.bias",
+      "memory.scene_memory.action_proj.0.weight",
+      "memory.scene_memory.action_proj.0.bias",
+      "memory.scene_memory.action_proj.1.weight",
+      "memory.scene_memory.action_proj.1.bias",
+      "memory.scene_memory.write_gate.0.weight",
+      "memory.scene_memory.write_gate.0.bias",
+      "memory.scene_memory.write_gate.1.weight",
+      "memory.scene_memory.write_gate.1.bias",
+      "memory.scene_memory.write_gate.3.weight",
+      "memory.scene_memory.write_gate.3.bias",
+      "memory.scene_memory.token_proj.0.weight",
+      "memory.scene_memory.token_proj.0.bias",
+      "memory.scene_memory.token_proj.1.weight",
+      "memory.scene_memory.token_proj.1.bias",
+      "memory.belief_memory.position_embedding",
+      "memory.belief_memory.bank_queries",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.belief_memory.bank_attention.in_proj_weight",
+      "memory.belief_memory.bank_attention.in_proj_bias",
+      "memory.belief_memory.bank_attention.out_proj.weight",
+      "memory.belief_memory.bank_attention.out_proj.bias",
+      "memory.belief_memory.action_proj.0.weight",
+      "memory.belief_memory.action_proj.0.bias",
+      "memory.belief_memory.action_proj.1.weight",
+      "memory.belief_memory.action_proj.1.bias",
+      "memory.belief_memory.write_gate.0.weight",
+      "memory.belief_memory.write_gate.0.bias",
+      "memory.belief_memory.write_gate.1.weight",
+      "memory.belief_memory.write_gate.1.bias",
+      "memory.belief_memory.write_gate.3.weight",
+      "memory.belief_memory.write_gate.3.bias",
+      "memory.belief_memory.token_proj.0.weight",
+      "memory.belief_memory.token_proj.0.bias",
+      "memory.belief_memory.token_proj.1.weight",
+      "memory.belief_memory.token_proj.1.bias",
+      "decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.0.linear1.weight",
+      "decoder.arm_decoder.layers.0.linear1.bias",
+      "decoder.arm_decoder.layers.0.linear2.weight",
+      "decoder.arm_decoder.layers.0.linear2.bias",
+      "decoder.arm_decoder.layers.0.norm1.weight",
+      "decoder.arm_decoder.layers.0.norm1.bias",
+      "decoder.arm_decoder.layers.0.norm2.weight",
+      "decoder.arm_decoder.layers.0.norm2.bias",
+      "decoder.arm_decoder.layers.0.norm3.weight",
+      "decoder.arm_decoder.layers.0.norm3.bias",
+      "decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.1.linear1.weight",
+      "decoder.arm_decoder.layers.1.linear1.bias",
+      "decoder.arm_decoder.layers.1.linear2.weight",
+      "decoder.arm_decoder.layers.1.linear2.bias",
+      "decoder.arm_decoder.layers.1.norm1.weight",
+      "decoder.arm_decoder.layers.1.norm1.bias",
+      "decoder.arm_decoder.layers.1.norm2.weight",
+      "decoder.arm_decoder.layers.1.norm2.bias",
+      "decoder.arm_decoder.layers.1.norm3.weight",
+      "decoder.arm_decoder.layers.1.norm3.bias",
+      "decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.2.linear1.weight",
+      "decoder.arm_decoder.layers.2.linear1.bias",
+      "decoder.arm_decoder.layers.2.linear2.weight",
+      "decoder.arm_decoder.layers.2.linear2.bias",
+      "decoder.arm_decoder.layers.2.norm1.weight",
+      "decoder.arm_decoder.layers.2.norm1.bias",
+      "decoder.arm_decoder.layers.2.norm2.weight",
+      "decoder.arm_decoder.layers.2.norm2.bias",
+      "decoder.arm_decoder.layers.2.norm3.weight",
+      "decoder.arm_decoder.layers.2.norm3.bias",
+      "decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.3.linear1.weight",
+      "decoder.arm_decoder.layers.3.linear1.bias",
+      "decoder.arm_decoder.layers.3.linear2.weight",
+      "decoder.arm_decoder.layers.3.linear2.bias",
+      "decoder.arm_decoder.layers.3.norm1.weight",
+      "decoder.arm_decoder.layers.3.norm1.bias",
+      "decoder.arm_decoder.layers.3.norm2.weight",
+      "decoder.arm_decoder.layers.3.norm2.bias",
+      "decoder.arm_decoder.layers.3.norm3.weight",
+      "decoder.arm_decoder.layers.3.norm3.bias",
+      "decoder.arm_identity.weight",
+      "decoder.phase_adapter.weight",
+      "decoder.phase_adapter.bias",
+      "decoder.role_adapter.weight",
+      "decoder.role_adapter.bias",
+      "decoder.context_proj.0.weight",
+      "decoder.context_proj.0.bias",
+      "decoder.context_proj.1.weight",
+      "decoder.context_proj.1.bias",
+      "decoder.arm_head.0.weight",
+      "decoder.arm_head.0.bias",
+      "decoder.arm_head.1.weight",
+      "decoder.arm_head.1.bias",
+      "decoder.arm_mean.weight",
+      "decoder.arm_mean.bias",
+      "decoder.arm_log_std.weight",
+      "decoder.arm_log_std.bias",
+      "decoder.proposal_mode_head.0.weight",
+      "decoder.proposal_mode_head.0.bias",
+      "decoder.proposal_mode_head.1.weight",
+      "decoder.proposal_mode_head.1.bias",
+      "decoder.proposal_mode_head.3.weight",
+      "decoder.proposal_mode_head.3.bias",
+      "decoder.proposal_mode_embeddings.weight",
+      "decoder.proposal_slot_embeddings.weight",
+      "decoder.mode_residual_heads.0.0.weight",
+      "decoder.mode_residual_heads.0.0.bias",
+      "decoder.mode_residual_heads.0.1.weight",
+      "decoder.mode_residual_heads.0.1.bias",
+      "decoder.mode_residual_heads.0.3.weight",
+      "decoder.mode_residual_heads.0.3.bias",
+      "decoder.mode_residual_heads.1.0.weight",
+      "decoder.mode_residual_heads.1.0.bias",
+      "decoder.mode_residual_heads.1.1.weight",
+      "decoder.mode_residual_heads.1.1.bias",
+      "decoder.mode_residual_heads.1.3.weight",
+      "decoder.mode_residual_heads.1.3.bias",
+      "decoder.mode_residual_heads.2.0.weight",
+      "decoder.mode_residual_heads.2.0.bias",
+      "decoder.mode_residual_heads.2.1.weight",
+      "decoder.mode_residual_heads.2.1.bias",
+      "decoder.mode_residual_heads.2.3.weight",
+      "decoder.mode_residual_heads.2.3.bias",
+      "decoder.mode_residual_heads.3.0.weight",
+      "decoder.mode_residual_heads.3.0.bias",
+      "decoder.mode_residual_heads.3.1.weight",
+      "decoder.mode_residual_heads.3.1.bias",
+      "decoder.mode_residual_heads.3.3.weight",
+      "decoder.mode_residual_heads.3.3.bias",
+      "decoder.mode_residual_heads.4.0.weight",
+      "decoder.mode_residual_heads.4.0.bias",
+      "decoder.mode_residual_heads.4.1.weight",
+      "decoder.mode_residual_heads.4.1.bias",
+      "decoder.mode_residual_heads.4.3.weight",
+      "decoder.mode_residual_heads.4.3.bias",
+      "decoder.mode_residual_heads.5.0.weight",
+      "decoder.mode_residual_heads.5.0.bias",
+      "decoder.mode_residual_heads.5.1.weight",
+      "decoder.mode_residual_heads.5.1.bias",
+      "decoder.mode_residual_heads.5.3.weight",
+      "decoder.mode_residual_heads.5.3.bias",
+      "decoder.slot_delta.0.weight",
+      "decoder.slot_delta.0.bias",
+      "decoder.slot_delta.1.weight",
+      "decoder.slot_delta.1.bias",
+      "decoder.slot_delta.3.weight",
+      "decoder.slot_delta.3.bias",
+      "decoder.proposal_score.0.weight",
+      "decoder.proposal_score.0.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias",
+      "decoder.proposal_score.3.weight",
+      "decoder.proposal_score.3.bias",
+      "elastic_state_head.interaction_queries",
+      "elastic_state_head.interaction_attention.in_proj_weight",
+      "elastic_state_head.interaction_attention.in_proj_bias",
+      "elastic_state_head.interaction_attention.out_proj.weight",
+      "elastic_state_head.interaction_attention.out_proj.bias",
+      "elastic_state_head.interaction_mlp.0.weight",
+      "elastic_state_head.interaction_mlp.0.bias",
+      "elastic_state_head.interaction_mlp.1.weight",
+      "elastic_state_head.interaction_mlp.1.bias",
+      "elastic_state_head.interaction_mlp.3.weight",
+      "elastic_state_head.interaction_mlp.3.bias",
+      "elastic_state_head.decoder.field_queries",
+      "elastic_state_head.decoder.field_attention.in_proj_weight",
+      "elastic_state_head.decoder.field_attention.in_proj_bias",
+      "elastic_state_head.decoder.field_attention.out_proj.weight",
+      "elastic_state_head.decoder.field_attention.out_proj.bias",
+      "elastic_state_head.decoder.field_mlp.0.weight",
+      "elastic_state_head.decoder.field_mlp.0.bias",
+      "elastic_state_head.decoder.field_mlp.1.weight",
+      "elastic_state_head.decoder.field_mlp.1.bias",
+      "elastic_state_head.decoder.field_mlp.3.weight",
+      "elastic_state_head.decoder.field_mlp.3.bias",
+      "elastic_state_head.decoder.summary_proj.0.weight",
+      "elastic_state_head.decoder.summary_proj.0.bias",
+      "elastic_state_head.decoder.summary_proj.1.weight",
+      "elastic_state_head.decoder.summary_proj.1.bias",
+      "elastic_state_head.decoder.phase_head.0.weight",
+      "elastic_state_head.decoder.phase_head.0.bias",
+      "elastic_state_head.decoder.phase_head.1.weight",
+      "elastic_state_head.decoder.phase_head.1.bias",
+      "elastic_state_head.decoder.phase_head.3.weight",
+      "elastic_state_head.decoder.phase_head.3.bias",
+      "elastic_state_head.decoder.arm_role_head.0.weight",
+      "elastic_state_head.decoder.arm_role_head.0.bias",
+      "elastic_state_head.decoder.arm_role_head.1.weight",
+      "elastic_state_head.decoder.arm_role_head.1.bias",
+      "elastic_state_head.decoder.arm_role_head.3.weight",
+      "elastic_state_head.decoder.arm_role_head.3.bias",
+      "elastic_state_head.decoder.arm_identity.weight",
+      "elastic_state_head.decoder.support_mode.0.weight",
+      "elastic_state_head.decoder.support_mode.0.bias",
+      "elastic_state_head.decoder.support_mode.1.weight",
+      "elastic_state_head.decoder.support_mode.1.bias",
+      "elastic_state_head.decoder.support_mode.3.weight",
+      "elastic_state_head.decoder.support_mode.3.bias",
+      "elastic_state_head.decoder.access_field.weight",
+      "elastic_state_head.decoder.access_field.bias",
+      "elastic_state_head.decoder.target_belief_field.weight",
+      "elastic_state_head.decoder.target_belief_field.bias",
+      "elastic_state_head.decoder.visibility_field.weight",
+      "elastic_state_head.decoder.visibility_field.bias",
+      "elastic_state_head.decoder.clearance_field.weight",
+      "elastic_state_head.decoder.clearance_field.bias",
+      "elastic_state_head.decoder.occluder_contact_field.weight",
+      "elastic_state_head.decoder.occluder_contact_field.bias",
+      "elastic_state_head.decoder.grasp_affordance_field.weight",
+      "elastic_state_head.decoder.grasp_affordance_field.bias",
+      "elastic_state_head.decoder.support_stability_field.weight",
+      "elastic_state_head.decoder.support_stability_field.bias",
+      "elastic_state_head.decoder.persistence_field.weight",
+      "elastic_state_head.decoder.persistence_field.bias",
+      "elastic_state_head.decoder.reocclusion_field.weight",
+      "elastic_state_head.decoder.reocclusion_field.bias",
+      "elastic_state_head.decoder.disturbance_field.weight",
+      "elastic_state_head.decoder.disturbance_field.bias",
+      "elastic_state_head.decoder.uncertainty_field.weight",
+      "elastic_state_head.decoder.uncertainty_field.bias",
+      "elastic_state_head.decoder.reocclusion_head.0.weight",
+      "elastic_state_head.decoder.reocclusion_head.0.bias",
+      "elastic_state_head.decoder.reocclusion_head.1.weight",
+      "elastic_state_head.decoder.reocclusion_head.1.bias",
+      "elastic_state_head.decoder.reocclusion_head.3.weight",
+      "elastic_state_head.decoder.reocclusion_head.3.bias",
+      "world_model.state_encoder.0.weight",
+      "world_model.state_encoder.0.bias",
+      "world_model.state_encoder.1.weight",
+      "world_model.state_encoder.1.bias",
+      "world_model.scene_memory_proj.0.weight",
+      "world_model.scene_memory_proj.0.bias",
+      "world_model.scene_memory_proj.1.weight",
+      "world_model.scene_memory_proj.1.bias",
+      "world_model.belief_memory_proj.0.weight",
+      "world_model.belief_memory_proj.0.bias",
+      "world_model.belief_memory_proj.1.weight",
+      "world_model.belief_memory_proj.1.bias",
+      "world_model.action_encoder.0.weight",
+      "world_model.action_encoder.0.bias",
+      "world_model.action_encoder.1.weight",
+      "world_model.action_encoder.1.bias",
+      "world_model.transition.weight_ih",
+      "world_model.transition.weight_hh",
+      "world_model.transition.bias_ih",
+      "world_model.transition.bias_hh",
+      "world_model.scene_memory_update.weight",
+      "world_model.scene_memory_update.bias",
+      "world_model.belief_memory_update.weight",
+      "world_model.belief_memory_update.bias",
+      "world_model.compact_decoder.weight",
+      "world_model.compact_decoder.bias",
+      "world_model.target_belief_head.weight",
+      "world_model.target_belief_head.bias",
+      "world_model.visibility_head.weight",
+      "world_model.visibility_head.bias",
+      "world_model.clearance_head.weight",
+      "world_model.clearance_head.bias",
+      "world_model.occluder_contact_head.weight",
+      "world_model.occluder_contact_head.bias",
+      "world_model.grasp_affordance_head.weight",
+      "world_model.grasp_affordance_head.bias",
+      "world_model.support_stability_head.weight",
+      "world_model.support_stability_head.bias",
+      "world_model.persistence_head.weight",
+      "world_model.persistence_head.bias",
+      "world_model.reocclusion_head.weight",
+      "world_model.reocclusion_head.bias",
+      "world_model.disturbance_head.weight",
+      "world_model.disturbance_head.bias",
+      "world_model.uncertainty_head.weight",
+      "world_model.uncertainty_head.bias",
+      "world_model.access_head.weight",
+      "world_model.access_head.bias",
+      "planner.residual.trunk.0.weight",
+      "planner.residual.trunk.0.bias",
+      "planner.residual.trunk.1.weight",
+      "planner.residual.trunk.1.bias",
+      "planner.residual.trunk.3.weight",
+      "planner.residual.trunk.3.bias",
+      "planner.residual.success_head.weight",
+      "planner.residual.success_head.bias",
+      "planner.residual.risk_head.weight",
+      "planner.residual.risk_head.bias",
+      "planner.residual.residual_head.weight",
+      "planner.residual.residual_head.bias"
+    ],
+    "unexpected_keys": []
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..106d477e0c21a4bfde902673314a22e61b797ed1
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4583333333333333,
+      "bag_proxy": 0.5833333333333334,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5694444444444445,
+    "visibility_integral": 32.2005988392565,
+    "corridor_availability": 0.8664570152759552,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.1903364318709135,
+    "disturbance_cost": 0.35011103795841336
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..7a2f382b792c5bce2887645704df5a2e5222e4c2
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/checkpoint_best.pt
+- mean_success: 0.569
+- visibility_integral: 32.201
+- corridor_availability: 0.866
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.190
+- disturbance_cost: 0.350
+- foliage_proxy_success: 0.458
+- bag_proxy_success: 0.583
+- cloth_proxy_success: 0.667
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..08850e6f7b5edbd0adc7220b1e222d63ef26ca67
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5833333333333334,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5555555555555555,
+    "visibility_integral": 33.31703626612822,
+    "corridor_availability": 0.886079938047462,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.1836884579143008,
+    "disturbance_cost": 0.3696938648612963
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..b3e9c782e98fe444da6e45e40b38e06d7b52d108
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/checkpoint_best.pt
+- mean_success: 0.556
+- visibility_integral: 33.317
+- corridor_availability: 0.886
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.184
+- disturbance_cost: 0.370
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.583
+- cloth_proxy_success: 0.667
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..3c2f8e1902fac34a5642a51fc0e6cf1d1a8e19ba
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4583333333333333,
+      "bag_proxy": 0.5833333333333334,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5694444444444445,
+    "visibility_integral": 32.571378606888985,
+    "corridor_availability": 0.8744470203916231,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.249059588784357,
+    "disturbance_cost": 0.34120469799058306
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..65a295fda51ef2d3df990ae4028ce8ee3cad8aee
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/checkpoint_best.pt
+- mean_success: 0.569
+- visibility_integral: 32.571
+- corridor_availability: 0.874
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.249
+- disturbance_cost: 0.341
+- foliage_proxy_success: 0.458
+- bag_proxy_success: 0.583
+- cloth_proxy_success: 0.667
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/config_resolved.yaml b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..441cbf8ec8fa39123f486d3ba1787de5632aa000
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/config_resolved.yaml
@@ -0,0 +1,147 @@
+experiment_name: proxy_interaction_r3d_stage1_dummy_seed13
+output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
+device: cuda
+seed: 13
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 96
+  dataset_version: reveal_proxy_v6_rgbd_elastic_state
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage1_dummy_seed13.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage1_dummy_seed13.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 13
+optim:
+  epochs: 4
+  batch_size: 16
+  num_workers: 4
+  lr: 0.001
+  weight_decay: 0.0001
+trainer:
+  policy_type: elastic_reveal
+  use_bf16: false
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+  use_depth: false
+  use_world_model: true
+  use_role_tokens: true
+  compute_equivariance_probe: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 192
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 192
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 384
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 192
+    action_dim: 14
+    history_steps: 6
+    scene_history_steps: 3
+    belief_history_steps: 8
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    scene_bank_size: 2
+    belief_bank_size: 2
+    num_heads: 4
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 192
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 384
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_proposal_modes: 6
+    planner_top_k: 4
+  reveal_head:
+    hidden_dim: 192
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 4
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 192
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 4
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+    scene_bank_size: 2
+    belief_bank_size: 2
+  planner:
+    hidden_dim: 192
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 4
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+    top_k: 4
+loss_weights:
+  action: 1.0
+  phase: 0.15
+  arm_role: 0.2
+  support_mode: 0.15
+  corridor: 0.2
+  persistence: 0.1
+  disturbance: 0.1
+  world_model: 0.25
+  belief: 0.05
+  visibility: 0.05
+  clearance: 0.05
+  support_stability: 0.05
+  reocclusion: 0.05
+  occluder_contact: 0.05
+  grasp_affordance: 0.05
+  planner_success: 0.2
+  planner_risk: 0.1
+  planner_ranking: 0.1
+  proposal_reconstruction: 0.2
+  proposal_success: 0.1
+  proposal_ranking: 0.1
+  proposal_diversity: 0.05
+  role_swap_consistency: 0.05
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/diagnostics_full/proxy_diagnostics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/diagnostics_full/proxy_diagnostics.json
new file mode 100644
index 0000000000000000000000000000000000000000..3a14abed5ac92c7c5a742ae1ab28660371821f46
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/diagnostics_full/proxy_diagnostics.json
@@ -0,0 +1,16 @@
+{
+  "planner_top1_accuracy": 0.2595419847328244,
+  "planner_regret": 0.015185066498816013,
+  "planner_score_utility_spearman": 0.25190839171409607,
+  "risk_calibration_mse": 0.011332111433148384,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.02456846833229065,
+  "left_right_equivariance_error": 0.007538194466820534,
+  "belief_calibration_brier": 0.0055354926735162735,
+  "reocclusion_calibration_brier": 0.2274838089942932,
+  "support_stability_mae": 0.030257930979132652,
+  "clearance_auc": 0.7414014153848468,
+  "memory_write_rate": 0.0,
+  "memory_saturation": 0.7680174112319946,
+  "num_samples": 131
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/metrics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..7e8ffdbfd0f3e9b6dd4e25065fe252303547d909
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/metrics.json
@@ -0,0 +1,230 @@
+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.029530804604291916,
+      "arm_role": 0.19113596672893132,
+      "belief": 0.19201900158077478,
+      "clearance": 0.1937584774568677,
+      "corridor": 0.30155759242673713,
+      "disturbance": 0.018230090441647917,
+      "grasp_affordance": 0.1115249302238226,
+      "occluder_contact": 0.29577948339283466,
+      "persistence": 5.046393771966298,
+      "phase": 0.835017109910647,
+      "planner_ranking": 0.6733469751973947,
+      "planner_risk": 0.04033496890527507,
+      "planner_success": 0.6355331862966219,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.276770144701004,
+      "proposal_reconstruction": 0.07184042579804857,
+      "proposal_success": 0.6676094954212507,
+      "reocclusion": 0.6988904004295667,
+      "role_swap_consistency": 0.0006935761872834215,
+      "support_mode": 0.7387049297491709,
+      "support_stability": 0.22416748199611902,
+      "total": 2.4212693075339,
+      "uncertainty": 0.32931591259936493,
+      "visibility": 0.23356754829486212,
+      "world_model": 4.170340110858281
+    },
+    "val": {
+      "action": 0.023605089427696332,
+      "arm_role": 8.891185360779572e-05,
+      "belief": 0.112466166416804,
+      "clearance": 0.08774211009343465,
+      "corridor": 0.2502693798806932,
+      "disturbance": 0.0037313879001885653,
+      "grasp_affordance": 0.013532540657454066,
+      "occluder_contact": 0.2236137886842092,
+      "persistence": 4.796973652309841,
+      "phase": 0.6506193346447415,
+      "planner_ranking": 0.45240074396133423,
+      "planner_risk": 0.012336155710120996,
+      "planner_success": 0.6348234679963853,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1647081640031602,
+      "proposal_reconstruction": 0.06623147221075164,
+      "proposal_success": 0.6723773082097372,
+      "reocclusion": 0.6799028648270501,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6129622724321153,
+      "support_stability": 0.14574629151158863,
+      "total": 1.9533665710025363,
+      "uncertainty": 0.057104989886283875,
+      "visibility": 0.09962501211298837,
+      "world_model": 3.08394538031684
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.02052135338696341,
+      "arm_role": 0.00010673219821910607,
+      "belief": 0.11743779480457306,
+      "clearance": 0.09043452050536871,
+      "corridor": 0.24632801488041878,
+      "disturbance": 0.003475519949764324,
+      "grasp_affordance": 0.01625332736875862,
+      "occluder_contact": 0.2240921917061011,
+      "persistence": 4.695922573407491,
+      "phase": 0.49508154888947803,
+      "planner_ranking": 0.14279444872712097,
+      "planner_risk": 0.0141817982463787,
+      "planner_success": 0.593176061908404,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.165678009390831,
+      "proposal_reconstruction": 0.06292749894782901,
+      "proposal_success": 0.674570898214976,
+      "reocclusion": 0.3844434078782797,
+      "role_swap_consistency": 0.00039524554207067314,
+      "support_mode": 0.17358588459561966,
+      "support_stability": 0.1374168156956633,
+      "total": 1.6440163105726242,
+      "uncertainty": 0.047071967429171004,
+      "visibility": 0.11256152174125116,
+      "world_model": 2.4736096411943436
+    },
+    "val": {
+      "action": 0.020492848422792222,
+      "arm_role": 0.0002776960156754487,
+      "belief": 0.1081986419028706,
+      "clearance": 0.08335375868611866,
+      "corridor": 0.24787565734651354,
+      "disturbance": 0.0022675159141524797,
+      "grasp_affordance": 0.012290253303945065,
+      "occluder_contact": 0.21959979832172394,
+      "persistence": 4.647055625915527,
+      "phase": 0.4316861795054542,
+      "planner_ranking": 0.06341143821676572,
+      "planner_risk": 0.015357115098999606,
+      "planner_success": 0.5689369605647193,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1283477942148845,
+      "proposal_reconstruction": 0.06308732968237665,
+      "proposal_success": 0.6809348861376444,
+      "reocclusion": 0.2748950504594379,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.0006280758987284369,
+      "support_stability": 0.14622381826241812,
+      "total": 1.6025353935029771,
+      "uncertainty": 0.02438033703300688,
+      "visibility": 0.10466726124286652,
+      "world_model": 2.558868553903368
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.01646478761297961,
+      "arm_role": 9.377782756322024e-05,
+      "belief": 0.10991635639220476,
+      "clearance": 0.0843405183404684,
+      "corridor": 0.2701566057900588,
+      "disturbance": 0.0031300995663817353,
+      "grasp_affordance": 0.012393822447241595,
+      "occluder_contact": 0.21479063170651594,
+      "persistence": 2.6339182580510774,
+      "phase": 0.431367311005791,
+      "planner_ranking": 0.06486702508603533,
+      "planner_risk": 0.013548698586722216,
+      "planner_success": 0.5643768397470316,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1353335281213124,
+      "proposal_reconstruction": 0.05951391921068231,
+      "proposal_success": 0.6731756230195364,
+      "reocclusion": 0.2623978331685066,
+      "role_swap_consistency": 0.00040521422973445925,
+      "support_mode": 0.000605581031171217,
+      "support_stability": 0.1400139912342032,
+      "total": 1.2923575937747955,
+      "uncertainty": 0.02004621450517637,
+      "visibility": 0.10328224146117766,
+      "world_model": 2.1331751296917596
+    },
+    "val": {
+      "action": 0.018090524814195104,
+      "arm_role": 4.204427063490989e-05,
+      "belief": 0.11348766502406862,
+      "clearance": 0.0778748012251324,
+      "corridor": 0.24816315703921848,
+      "disturbance": 0.0018734507805978258,
+      "grasp_affordance": 0.008446878753602505,
+      "occluder_contact": 0.2068953894906574,
+      "persistence": 1.9170836640728846,
+      "phase": 0.4777056227127711,
+      "planner_ranking": 0.07497243583202362,
+      "planner_risk": 0.012007931971715556,
+      "planner_success": 0.5846167008082072,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1227490504582722,
+      "proposal_reconstruction": 0.06178469873136944,
+      "proposal_success": 0.6768591006596884,
+      "reocclusion": 0.2698347626460923,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.0005942495643264718,
+      "support_stability": 0.14820611890819338,
+      "total": 1.2714158693949382,
+      "uncertainty": 0.004030831908393238,
+      "visibility": 0.09794799155659145,
+      "world_model": 2.303717931111654
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.015296258614398539,
+      "arm_role": 9.897743439069018e-05,
+      "belief": 0.10741911331812541,
+      "clearance": 0.07931565772742033,
+      "corridor": 0.23081608302891254,
+      "disturbance": 0.00287542298125724,
+      "grasp_affordance": 0.008955261165586611,
+      "occluder_contact": 0.21085621416568756,
+      "persistence": 1.6830786913633347,
+      "phase": 0.4407324629525344,
+      "planner_ranking": 0.053573422211532794,
+      "planner_risk": 0.011835894741428396,
+      "planner_success": 0.5389373525977135,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1375357458988826,
+      "proposal_reconstruction": 0.05875217309221625,
+      "proposal_success": 0.669308491051197,
+      "reocclusion": 0.26737124752253294,
+      "role_swap_consistency": 0.00044258072254403186,
+      "support_mode": 0.0058784369854644565,
+      "support_stability": 0.13682511821389198,
+      "total": 1.1672432621320088,
+      "uncertainty": 0.007140855586233859,
+      "visibility": 0.094703309237957,
+      "world_model": 2.072191367546717
+    },
+    "val": {
+      "action": 0.016218292733861342,
+      "arm_role": 0.00022501617463098632,
+      "belief": 0.10660513407654232,
+      "clearance": 0.07916852169566685,
+      "corridor": 0.23598399923907387,
+      "disturbance": 0.0013176489026389187,
+      "grasp_affordance": 0.009249631315469742,
+      "occluder_contact": 0.2084801279836231,
+      "persistence": 1.9978744321399264,
+      "phase": 0.46462951434983146,
+      "planner_ranking": 0.04140180618398719,
+      "planner_risk": 0.011076963868820004,
+      "planner_success": 0.5154120292928484,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1469912661446466,
+      "proposal_reconstruction": 0.05962582967347569,
+      "proposal_success": 0.6495795779758029,
+      "reocclusion": 0.2503652158710692,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.0004595977985041423,
+      "support_stability": 0.14600716531276703,
+      "total": 1.2128634585274591,
+      "uncertainty": 0.007759603775209851,
+      "visibility": 0.09225249456034766,
+      "world_model": 2.1404969029956393
+    }
+  }
+]
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/summary.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..0bcf8eccd89d9b325ac633686ea46db0f65b4fc9
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/summary.json
@@ -0,0 +1,14 @@
+{
+  "experiment_name": "proxy_interaction_r3d_stage1_dummy_seed13",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/checkpoint_best.pt",
+  "final_train_total": 1.1672432621320088,
+  "final_val_total": 1.2128634585274591,
+  "train_time_sec": 18.091050624847412,
+  "peak_gpu_memory_mb": 631.1953125,
+  "num_train_samples": 380,
+  "num_val_samples": 131,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": null
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..e1e68216fdc4019115c6b7f17c7c5392df457bc6
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.625,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5694444444444445,
+    "visibility_integral": 32.801942747500206,
+    "corridor_availability": 0.8877548724412918,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 1.4711664057066363,
+    "disturbance_cost": 0.37882790300581193
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..9054a1385328752a45327544b4851a9bde36a967
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/checkpoint_best.pt
+- mean_success: 0.569
+- visibility_integral: 32.802
+- corridor_availability: 0.888
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 1.471
+- disturbance_cost: 0.379
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.625
+- cloth_proxy_success: 0.667
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_planner/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_planner/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..74ca2c61f4ec3939c696eed7007e9a865d6c211e
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_planner/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5833333333333334,
+      "cloth_proxy": 0.625
+    },
+    "mean_success": 0.5416666666666666,
+    "visibility_integral": 34.428366212381256,
+    "corridor_availability": 0.8909231291876899,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 1.4917179537341767,
+    "disturbance_cost": 0.39409097459995085
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_planner/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_planner/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..b51fcb7f86b07b0fd9c750161ce3da0abaedebcc
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_planner/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/checkpoint_best.pt
+- mean_success: 0.542
+- visibility_integral: 34.428
+- corridor_availability: 0.891
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 1.492
+- disturbance_cost: 0.394
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.583
+- cloth_proxy_success: 0.625
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_role_symmetry/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_role_symmetry/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..074c536d46ae8f64444e0ebd2a0258f013678675
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_role_symmetry/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.625,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5694444444444445,
+    "visibility_integral": 33.27109728753567,
+    "corridor_availability": 0.8943836614489555,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 1.488106187582016,
+    "disturbance_cost": 0.3667886131960485
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_role_symmetry/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_role_symmetry/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..e053392771c0bba70b22dd3b446d5815443573a4
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_role_symmetry/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/checkpoint_best.pt
+- mean_success: 0.569
+- visibility_integral: 33.271
+- corridor_availability: 0.894
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 1.488
+- disturbance_cost: 0.367
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.625
+- cloth_proxy_success: 0.667
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/config_resolved.yaml b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..51fa0f37d7643addb961bc9cea308c2069430f10
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/config_resolved.yaml
@@ -0,0 +1,147 @@
+experiment_name: proxy_interaction_r3d_stage1_dummy_seed14
+output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
+device: cuda
+seed: 14
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 96
+  dataset_version: reveal_proxy_v6_rgbd_elastic_state
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage1_dummy_seed14.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage1_dummy_seed14.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 14
+optim:
+  epochs: 4
+  batch_size: 16
+  num_workers: 4
+  lr: 0.001
+  weight_decay: 0.0001
+trainer:
+  policy_type: elastic_reveal
+  use_bf16: false
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+  use_depth: false
+  use_world_model: true
+  use_role_tokens: true
+  compute_equivariance_probe: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 192
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 192
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 384
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 192
+    action_dim: 14
+    history_steps: 6
+    scene_history_steps: 3
+    belief_history_steps: 8
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    scene_bank_size: 2
+    belief_bank_size: 2
+    num_heads: 4
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 192
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 384
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_proposal_modes: 6
+    planner_top_k: 4
+  reveal_head:
+    hidden_dim: 192
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 4
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 192
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 4
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+    scene_bank_size: 2
+    belief_bank_size: 2
+  planner:
+    hidden_dim: 192
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 4
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+    top_k: 4
+loss_weights:
+  action: 1.0
+  phase: 0.15
+  arm_role: 0.2
+  support_mode: 0.15
+  corridor: 0.2
+  persistence: 0.1
+  disturbance: 0.1
+  world_model: 0.25
+  belief: 0.05
+  visibility: 0.05
+  clearance: 0.05
+  support_stability: 0.05
+  reocclusion: 0.05
+  occluder_contact: 0.05
+  grasp_affordance: 0.05
+  planner_success: 0.2
+  planner_risk: 0.1
+  planner_ranking: 0.1
+  proposal_reconstruction: 0.2
+  proposal_success: 0.1
+  proposal_ranking: 0.1
+  proposal_diversity: 0.05
+  role_swap_consistency: 0.05
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/diagnostics_full/proxy_diagnostics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/diagnostics_full/proxy_diagnostics.json
new file mode 100644
index 0000000000000000000000000000000000000000..ebb4765c1498984c08d3c8a93c1d27d350126cf0
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/diagnostics_full/proxy_diagnostics.json
@@ -0,0 +1,16 @@
+{
+  "planner_top1_accuracy": 0.2846153846153846,
+  "planner_regret": 0.014314642176032066,
+  "planner_score_utility_spearman": 0.2153846174478531,
+  "risk_calibration_mse": 0.010775926522910595,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.02589959278702736,
+  "left_right_equivariance_error": 0.008901518605211201,
+  "belief_calibration_brier": 0.005614265333861113,
+  "reocclusion_calibration_brier": 0.28406235575675964,
+  "support_stability_mae": 0.025872904807329178,
+  "clearance_auc": 0.5220335124994485,
+  "memory_write_rate": 0.0,
+  "memory_saturation": 0.7309081554412842,
+  "num_samples": 130
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/metrics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..d96faaa0d4b76c519009bf3a1267c5281727e646
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/metrics.json
@@ -0,0 +1,230 @@
+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.033738364155093827,
+      "arm_role": 0.2658534389071671,
+      "belief": 0.1663714082290729,
+      "clearance": 0.1995344152674079,
+      "corridor": 0.2937144724031289,
+      "disturbance": 0.01641949706633265,
+      "grasp_affordance": 0.07253360034277041,
+      "occluder_contact": 0.262634892637531,
+      "persistence": 5.348720759153366,
+      "phase": 0.9128680676221848,
+      "planner_ranking": 0.7161665211121241,
+      "planner_risk": 0.03542382351588458,
+      "planner_success": 0.6313644871115685,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.328845535715421,
+      "proposal_reconstruction": 0.07508338304857413,
+      "proposal_success": 0.6797524491945902,
+      "reocclusion": 0.7106639867027601,
+      "role_swap_consistency": 0.0008167610091428893,
+      "support_mode": 0.7801499888300896,
+      "support_stability": 0.21256058973570666,
+      "total": 2.46435983479023,
+      "uncertainty": 0.17734388983808458,
+      "visibility": 0.16707653552293777,
+      "world_model": 4.078198651472728
+    },
+    "val": {
+      "action": 0.023770140690935984,
+      "arm_role": 0.0004891494075612476,
+      "belief": 0.11787863655222787,
+      "clearance": 0.08211326102415721,
+      "corridor": 0.2646504044532776,
+      "disturbance": 0.0077974022262626225,
+      "grasp_affordance": 0.010528300681875812,
+      "occluder_contact": 0.23685429162449306,
+      "persistence": 4.643319712744819,
+      "phase": 0.6877350012461344,
+      "planner_ranking": 0.5576971173286438,
+      "planner_risk": 0.012001174760775434,
+      "planner_success": 0.6474077436659071,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.2473273674647014,
+      "proposal_reconstruction": 0.06659724977281359,
+      "proposal_success": 0.6868854032622443,
+      "reocclusion": 0.6894112494256761,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.7945182191001045,
+      "support_stability": 0.13977908922566307,
+      "total": 1.9791885084576077,
+      "uncertainty": 0.016744557561145887,
+      "visibility": 0.09745695524745518,
+      "world_model": 3.0115205181969538
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.02093995890269677,
+      "arm_role": 0.00021873527142209545,
+      "belief": 0.1156839697311322,
+      "clearance": 0.09139195084571838,
+      "corridor": 0.2529828678816557,
+      "disturbance": 0.003422619032789953,
+      "grasp_affordance": 0.017661277670413256,
+      "occluder_contact": 0.22792026090125242,
+      "persistence": 4.702208956082662,
+      "phase": 0.5312556164960066,
+      "planner_ranking": 0.20636002533137798,
+      "planner_risk": 0.015822513572250802,
+      "planner_success": 0.5910777151584625,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1696062982082367,
+      "proposal_reconstruction": 0.06334876082837582,
+      "proposal_success": 0.6708702544371287,
+      "reocclusion": 0.5039266211291155,
+      "role_swap_consistency": 0.0005020403975019386,
+      "support_mode": 0.3201311229883383,
+      "support_stability": 0.13968352818240723,
+      "total": 1.6841449290513992,
+      "uncertainty": 0.026018289965577424,
+      "visibility": 0.11011519034703572,
+      "world_model": 2.466151461005211
+    },
+    "val": {
+      "action": 0.020535202903880015,
+      "arm_role": 0.00012925987215971368,
+      "belief": 0.10588792545927896,
+      "clearance": 0.08000239895449744,
+      "corridor": 0.23227471278773415,
+      "disturbance": 0.0022439691221936503,
+      "grasp_affordance": 0.011653332453635003,
+      "occluder_contact": 0.21834516359700096,
+      "persistence": 4.46406364440918,
+      "phase": 0.4118766354189979,
+      "planner_ranking": 0.0892416491276688,
+      "planner_risk": 0.0152344209038549,
+      "planner_success": 0.6057713859611087,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.133669826719496,
+      "proposal_reconstruction": 0.06398758581942982,
+      "proposal_success": 0.6783458656734891,
+      "reocclusion": 0.2840655545393626,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.0015922162112676436,
+      "support_stability": 0.13890525698661804,
+      "total": 1.584020005332099,
+      "uncertainty": 0.014379701991048124,
+      "visibility": 0.09630187600851059,
+      "world_model": 2.5434003671010337
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.017165315182258684,
+      "arm_role": 0.00014243966719125942,
+      "belief": 0.1267746559654673,
+      "clearance": 0.09291451362272103,
+      "corridor": 0.2539026445398728,
+      "disturbance": 0.0040997157484525815,
+      "grasp_affordance": 0.016216314087311428,
+      "occluder_contact": 0.2287510900447766,
+      "persistence": 2.7297142073512077,
+      "phase": 0.4553527260820071,
+      "planner_ranking": 0.0675589762783299,
+      "planner_risk": 0.012244323831206808,
+      "planner_success": 0.5227356925606728,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1367994795242946,
+      "proposal_reconstruction": 0.06006583757698536,
+      "proposal_success": 0.6718559389313062,
+      "reocclusion": 0.28394716791808605,
+      "role_swap_consistency": 0.000532965175807476,
+      "support_mode": 0.0007756326898136953,
+      "support_stability": 0.14084124999741712,
+      "total": 1.2956190605958302,
+      "uncertainty": 0.011363255020114593,
+      "visibility": 0.11323032714426517,
+      "world_model": 2.120655362804731
+    },
+    "val": {
+      "action": 0.016470486712124612,
+      "arm_role": 0.00015339441274085807,
+      "belief": 0.15912896229161155,
+      "clearance": 0.07826702462302314,
+      "corridor": 0.21473425957891676,
+      "disturbance": 0.0018082650106710692,
+      "grasp_affordance": 0.008080463701238235,
+      "occluder_contact": 0.22728429403569964,
+      "persistence": 1.846471561325921,
+      "phase": 0.4164143088791106,
+      "planner_ranking": 0.05541756912134588,
+      "planner_risk": 0.011288604181673791,
+      "planner_success": 0.5237696303261651,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1311746835708618,
+      "proposal_reconstruction": 0.06064582823051347,
+      "proposal_success": 0.6669412983788384,
+      "reocclusion": 0.27248211950063705,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.00040661103816496,
+      "support_stability": 0.13817799753612942,
+      "total": 1.241025275654263,
+      "uncertainty": 0.003020187374204397,
+      "visibility": 0.11647009683979882,
+      "world_model": 2.323344442579481
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.015070427674800158,
+      "arm_role": 0.0002641689807205694,
+      "belief": 0.141230215318501,
+      "clearance": 0.07984113336230318,
+      "corridor": 0.225482989102602,
+      "disturbance": 0.0017908170169296984,
+      "grasp_affordance": 0.008550037746317685,
+      "occluder_contact": 0.21477928136785826,
+      "persistence": 1.6129546587665875,
+      "phase": 0.42590194568037987,
+      "planner_ranking": 0.04456973075866699,
+      "planner_risk": 0.010397601523436606,
+      "planner_success": 0.49412518242994946,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1504750202099483,
+      "proposal_reconstruction": 0.058567725432415806,
+      "proposal_success": 0.6462936575214068,
+      "reocclusion": 0.2506879176944494,
+      "role_swap_consistency": 0.000550856914439161,
+      "support_mode": 0.0003065853112881693,
+      "support_stability": 0.1366732595488429,
+      "total": 1.134415107468764,
+      "uncertainty": 0.0035936666245106608,
+      "visibility": 0.10351777387162049,
+      "world_model": 2.024999057253202
+    },
+    "val": {
+      "action": 0.016186242405739095,
+      "arm_role": 0.0002410423346898622,
+      "belief": 0.12203978498776753,
+      "clearance": 0.07702170064051946,
+      "corridor": 0.21113747523890602,
+      "disturbance": 0.0014993647216922706,
+      "grasp_affordance": 0.008119617278377214,
+      "occluder_contact": 0.21474246515168083,
+      "persistence": 1.9725701610247295,
+      "phase": 0.4842751953336928,
+      "planner_ranking": 0.04342265882425838,
+      "planner_risk": 0.01107009764139851,
+      "planner_success": 0.5070097777578566,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1282474862204657,
+      "proposal_reconstruction": 0.05997827731900745,
+      "proposal_success": 0.6469291316138374,
+      "reocclusion": 0.2716698878341251,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.00020467836778455725,
+      "support_stability": 0.13836157073577246,
+      "total": 1.2091523673799303,
+      "uncertainty": 0.0025335378272251952,
+      "visibility": 0.09879730641841888,
+      "world_model": 2.1507359743118286
+    }
+  }
+]
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/summary.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..a85631cd9ae07a289d7e835a0f3a9e72081231f9
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/summary.json
@@ -0,0 +1,14 @@
+{
+  "experiment_name": "proxy_interaction_r3d_stage1_dummy_seed14",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/checkpoint_best.pt",
+  "final_train_total": 1.134415107468764,
+  "final_val_total": 1.2091523673799303,
+  "train_time_sec": 23.220722675323486,
+  "peak_gpu_memory_mb": 626.4716796875,
+  "num_train_samples": 381,
+  "num_val_samples": 130,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": null
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..69244ac387c2a97bbcc7fdcc5b8d12e93d00c799
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4583333333333333,
+      "bag_proxy": 0.625,
+      "cloth_proxy": 0.7083333333333334
+    },
+    "mean_success": 0.5972222222222222,
+    "visibility_integral": 29.697570121950573,
+    "corridor_availability": 0.8675610861844487,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.20430763148842,
+    "disturbance_cost": 0.36563710583787823
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..72c996af666245b0dccd85479c7d965290d910fc
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/checkpoint_best.pt
+- mean_success: 0.597
+- visibility_integral: 29.698
+- corridor_availability: 0.868
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.204
+- disturbance_cost: 0.366
+- foliage_proxy_success: 0.458
+- bag_proxy_success: 0.625
+- cloth_proxy_success: 0.708
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_planner/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_planner/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..69244ac387c2a97bbcc7fdcc5b8d12e93d00c799
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_planner/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4583333333333333,
+      "bag_proxy": 0.625,
+      "cloth_proxy": 0.7083333333333334
+    },
+    "mean_success": 0.5972222222222222,
+    "visibility_integral": 29.697570121950573,
+    "corridor_availability": 0.8675610861844487,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.20430763148842,
+    "disturbance_cost": 0.36563710583787823
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_planner/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_planner/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..72c996af666245b0dccd85479c7d965290d910fc
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_planner/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/checkpoint_best.pt
+- mean_success: 0.597
+- visibility_integral: 29.698
+- corridor_availability: 0.868
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.204
+- disturbance_cost: 0.366
+- foliage_proxy_success: 0.458
+- bag_proxy_success: 0.625
+- cloth_proxy_success: 0.708
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_role_symmetry/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_role_symmetry/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e13b53c7b2c4dbbb2ad8a48a58cc8a3f0100ff8
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_role_symmetry/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.5,
+      "bag_proxy": 0.625,
+      "cloth_proxy": 0.7083333333333334
+    },
+    "mean_success": 0.6111111111111112,
+    "visibility_integral": 28.954636810554398,
+    "corridor_availability": 0.8660841253068712,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.10539705814184,
+    "disturbance_cost": 0.35598844579524463
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_role_symmetry/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_role_symmetry/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..3654a1eaa96e4118f0eb8ae904a2a3349f87ad7c
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_role_symmetry/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/checkpoint_best.pt
+- mean_success: 0.611
+- visibility_integral: 28.955
+- corridor_availability: 0.866
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.105
+- disturbance_cost: 0.356
+- foliage_proxy_success: 0.500
+- bag_proxy_success: 0.625
+- cloth_proxy_success: 0.708
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/config_resolved.yaml b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bf93049674d59f6ad7937203233ea51c2cdbbaed
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/config_resolved.yaml
@@ -0,0 +1,147 @@
+experiment_name: proxy_interaction_r3d_stage1_dummy_seed15
+output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
+device: cuda
+seed: 15
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 96
+  dataset_version: reveal_proxy_v6_rgbd_elastic_state
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage1_dummy_seed15.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage1_dummy_seed15.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 15
+optim:
+  epochs: 4
+  batch_size: 16
+  num_workers: 4
+  lr: 0.001
+  weight_decay: 0.0001
+trainer:
+  policy_type: elastic_reveal
+  use_bf16: false
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+  use_depth: false
+  use_world_model: true
+  use_role_tokens: true
+  compute_equivariance_probe: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 192
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 192
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 384
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 192
+    action_dim: 14
+    history_steps: 6
+    scene_history_steps: 3
+    belief_history_steps: 8
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    scene_bank_size: 2
+    belief_bank_size: 2
+    num_heads: 4
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 192
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 384
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_proposal_modes: 6
+    planner_top_k: 4
+  reveal_head:
+    hidden_dim: 192
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 4
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 192
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 4
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+    scene_bank_size: 2
+    belief_bank_size: 2
+  planner:
+    hidden_dim: 192
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 4
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+    top_k: 4
+loss_weights:
+  action: 1.0
+  phase: 0.15
+  arm_role: 0.2
+  support_mode: 0.15
+  corridor: 0.2
+  persistence: 0.1
+  disturbance: 0.1
+  world_model: 0.25
+  belief: 0.05
+  visibility: 0.05
+  clearance: 0.05
+  support_stability: 0.05
+  reocclusion: 0.05
+  occluder_contact: 0.05
+  grasp_affordance: 0.05
+  planner_success: 0.2
+  planner_risk: 0.1
+  planner_ranking: 0.1
+  proposal_reconstruction: 0.2
+  proposal_success: 0.1
+  proposal_ranking: 0.1
+  proposal_diversity: 0.05
+  role_swap_consistency: 0.05
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/diagnostics_full/proxy_diagnostics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/diagnostics_full/proxy_diagnostics.json
new file mode 100644
index 0000000000000000000000000000000000000000..3f3e0d068805287846e0dd0e829c2e79fce92c83
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/diagnostics_full/proxy_diagnostics.json
@@ -0,0 +1,16 @@
+{
+  "planner_top1_accuracy": 0.3053435114503817,
+  "planner_regret": 0.013406210578978062,
+  "planner_score_utility_spearman": 0.2839694619178772,
+  "risk_calibration_mse": 0.010891024023294449,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.02313310280442238,
+  "left_right_equivariance_error": 0.006598936667775407,
+  "belief_calibration_brier": 0.00368268764577806,
+  "reocclusion_calibration_brier": 0.2288682460784912,
+  "support_stability_mae": 0.025202222168445587,
+  "clearance_auc": 0.9189163634555108,
+  "memory_write_rate": 0.0,
+  "memory_saturation": 0.8174758553504944,
+  "num_samples": 131
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/metrics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..f4ed296c1271db4ab0afdb9b80f79aa9904ffae6
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/metrics.json
@@ -0,0 +1,230 @@
+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.028008008919035394,
+      "arm_role": 0.2316993211661611,
+      "belief": 0.21131388066957393,
+      "clearance": 0.19917472638189793,
+      "corridor": 0.3046618662774563,
+      "disturbance": 0.020259966540227953,
+      "grasp_affordance": 0.15939014249791703,
+      "occluder_contact": 0.3023037730405728,
+      "persistence": 5.1030773023764295,
+      "phase": 0.7391876379648844,
+      "planner_ranking": 0.6672491803765297,
+      "planner_risk": 0.035407664448333286,
+      "planner_success": 0.6247484882672628,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.2685468345880508,
+      "proposal_reconstruction": 0.07012522220611572,
+      "proposal_success": 0.6749546950062116,
+      "reocclusion": 0.6581779879828294,
+      "role_swap_consistency": 0.0007787000698347887,
+      "support_mode": 0.6318444466839234,
+      "support_stability": 0.21354713415106139,
+      "total": 2.377249076962471,
+      "uncertainty": 0.2297215286331872,
+      "visibility": 0.20075704219440618,
+      "world_model": 4.083281387885411
+    },
+    "val": {
+      "action": 0.023762268117732473,
+      "arm_role": 0.00020197388787184737,
+      "belief": 0.1366901993751526,
+      "clearance": 0.10309203879700767,
+      "corridor": 0.26862603922684986,
+      "disturbance": 0.0037259276594138807,
+      "grasp_affordance": 0.044725324544641704,
+      "occluder_contact": 0.2536553243796031,
+      "persistence": 4.777863184611003,
+      "phase": 0.5066013468636407,
+      "planner_ranking": 0.44456031918525696,
+      "planner_risk": 0.01433694911085897,
+      "planner_success": 0.6283807026015388,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1667029857635498,
+      "proposal_reconstruction": 0.0664608735177252,
+      "proposal_success": 0.6838224861356947,
+      "reocclusion": 0.3364369339413113,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.06715444227059682,
+      "support_stability": 0.14777708219157326,
+      "total": 1.8394301467471652,
+      "uncertainty": 0.07208604945076837,
+      "visibility": 0.12188677820894453,
+      "world_model": 3.079341014226278
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.018888041842728853,
+      "arm_role": 0.00043030476990679745,
+      "belief": 0.11719414374480645,
+      "clearance": 0.08535642797748248,
+      "corridor": 0.24796467771132788,
+      "disturbance": 0.0024048478032151857,
+      "grasp_affordance": 0.022171703943361838,
+      "occluder_contact": 0.22088239962855974,
+      "persistence": 4.555501798788707,
+      "phase": 0.43327916599810123,
+      "planner_ranking": 0.15463371171305576,
+      "planner_risk": 0.01981719226265947,
+      "planner_success": 0.5631782834728559,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1632012923558552,
+      "proposal_reconstruction": 0.0615519261918962,
+      "proposal_success": 0.6722564473748207,
+      "reocclusion": 0.287830734004577,
+      "role_swap_consistency": 0.00048373279059887864,
+      "support_mode": 0.008119381836574272,
+      "support_stability": 0.13662359025329351,
+      "total": 1.567106415828069,
+      "uncertainty": 0.03243653344300886,
+      "visibility": 0.11203592922538519,
+      "world_model": 2.404594744245211
+    },
+    "val": {
+      "action": 0.019907095055613253,
+      "arm_role": 0.00038116834993060265,
+      "belief": 0.1014507081773546,
+      "clearance": 0.07728531956672668,
+      "corridor": 0.22947043677171072,
+      "disturbance": 0.0014698771928023133,
+      "grasp_affordance": 0.02056772096289529,
+      "occluder_contact": 0.20453951425022548,
+      "persistence": 3.6124378045399985,
+      "phase": 0.47070127063327366,
+      "planner_ranking": 0.08099263947870997,
+      "planner_risk": 0.017360565563042957,
+      "planner_success": 0.5593770245711008,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.11685311794281,
+      "proposal_reconstruction": 0.0633203275501728,
+      "proposal_success": 0.683642049630483,
+      "reocclusion": 0.42518342865837944,
+      "role_swap_consistency": 0.0,
+      "support_mode": 8.963614042537908e-05,
+      "support_stability": 0.1495772964424557,
+      "total": 1.5412384668986003,
+      "uncertainty": 0.024036270876725514,
+      "visibility": 0.10443270951509476,
+      "world_model": 2.6981404887305365
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.01506453799083829,
+      "arm_role": 0.0002299571582019174,
+      "belief": 0.10169448765615623,
+      "clearance": 0.08062320730338494,
+      "corridor": 0.23694788571447134,
+      "disturbance": 0.002010827219540564,
+      "grasp_affordance": 0.012944541425288966,
+      "occluder_contact": 0.20663638102511564,
+      "persistence": 2.024513818323612,
+      "phase": 0.4406547602266073,
+      "planner_ranking": 0.052334820929293834,
+      "planner_risk": 0.012688904457415143,
+      "planner_success": 0.4998842130104701,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1411344707012177,
+      "proposal_reconstruction": 0.058503514621406794,
+      "proposal_success": 0.663138655324777,
+      "reocclusion": 0.28770653810352087,
+      "role_swap_consistency": 0.0005917157322983257,
+      "support_mode": 0.00027886544603461516,
+      "support_stability": 0.14369840795795122,
+      "total": 1.2098931844035785,
+      "uncertainty": 0.009047253523021936,
+      "visibility": 0.09652530650297801,
+      "world_model": 2.1335272987683616
+    },
+    "val": {
+      "action": 0.0173407852028807,
+      "arm_role": 0.00028451886545452807,
+      "belief": 0.09623022625843684,
+      "clearance": 0.07612819969654083,
+      "corridor": 0.22281885809368557,
+      "disturbance": 0.001401680282368842,
+      "grasp_affordance": 0.00781761777276794,
+      "occluder_contact": 0.20622349116537306,
+      "persistence": 2.1598196625709534,
+      "phase": 0.47410638795958626,
+      "planner_ranking": 0.0378283916765617,
+      "planner_risk": 0.013348096515983343,
+      "planner_success": 0.4943488637606303,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1125682062572904,
+      "proposal_reconstruction": 0.06057575262255139,
+      "proposal_success": 0.6509590811199613,
+      "reocclusion": 0.2778696550263299,
+      "role_swap_consistency": 0.0,
+      "support_mode": 7.348006571798275e-05,
+      "support_stability": 0.14099042697085273,
+      "total": 1.2928278247515361,
+      "uncertainty": 0.0023198039270937443,
+      "visibility": 0.08993011878596412,
+      "world_model": 2.425517029232449
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.015032132350218793,
+      "arm_role": 0.00015960596041016592,
+      "belief": 0.10330141056329012,
+      "clearance": 0.0756644958940645,
+      "corridor": 0.22099452962478003,
+      "disturbance": 0.0017974149668589234,
+      "grasp_affordance": 0.008848114540645232,
+      "occluder_contact": 0.20204609570403895,
+      "persistence": 1.6058371538917224,
+      "phase": 0.42861080542206764,
+      "planner_ranking": 0.040083787171170115,
+      "planner_risk": 0.010861996522483727,
+      "planner_success": 0.48133989547689754,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1467161824305852,
+      "proposal_reconstruction": 0.058588774874806404,
+      "proposal_success": 0.6429290076096853,
+      "reocclusion": 0.24268781704207262,
+      "role_swap_consistency": 0.00047596763154918637,
+      "support_mode": 2.783346417345456e-05,
+      "support_stability": 0.1325785775358478,
+      "total": 1.1217727214097977,
+      "uncertainty": 0.003058687725570053,
+      "visibility": 0.09524129331111908,
+      "world_model": 2.0093316386143365
+    },
+    "val": {
+      "action": 0.016727436126934156,
+      "arm_role": 0.0002483524456490866,
+      "belief": 0.09281252986854976,
+      "clearance": 0.0730266264743275,
+      "corridor": 0.22520612014664543,
+      "disturbance": 0.0031746443160550874,
+      "grasp_affordance": 0.00780139294349485,
+      "occluder_contact": 0.20420674648549822,
+      "persistence": 1.9897065493795607,
+      "phase": 0.42935120397143894,
+      "planner_ranking": 0.03520135974718465,
+      "planner_risk": 0.012488630910714468,
+      "planner_success": 0.5116605394416385,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1216257943047419,
+      "proposal_reconstruction": 0.05996803608205584,
+      "proposal_success": 0.6389667987823486,
+      "reocclusion": 0.26481906490193474,
+      "role_swap_consistency": 0.0,
+      "support_mode": 4.154515813247094e-05,
+      "support_stability": 0.13968953986962637,
+      "total": 1.1943119830555387,
+      "uncertainty": 0.0017189466937755544,
+      "visibility": 0.09683923174937566,
+      "world_model": 2.1186628209220038
+    }
+  }
+]
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/summary.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..8562671a18b0e5c660f58f3a74d5286b1226c769
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/summary.json
@@ -0,0 +1,14 @@
+{
+  "experiment_name": "proxy_interaction_r3d_stage1_dummy_seed15",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/checkpoint_best.pt",
+  "final_train_total": 1.1217727214097977,
+  "final_val_total": 1.1943119830555387,
+  "train_time_sec": 20.030457735061646,
+  "peak_gpu_memory_mb": 631.1953125,
+  "num_train_samples": 380,
+  "num_val_samples": 131,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": null
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..53a27f4620326c925d07671745709e4e89c0a46f
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5833333333333334,
+      "cloth_proxy": 0.625
+    },
+    "mean_success": 0.5416666666666666,
+    "visibility_integral": 34.34427807728449,
+    "corridor_availability": 0.893132723040051,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.3119179729333856,
+    "disturbance_cost": 0.39262517919350004
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..673ea758ca9473f6de04fb0a1244b42348b11b40
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/checkpoint_best.pt
+- mean_success: 0.542
+- visibility_integral: 34.344
+- corridor_availability: 0.893
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.312
+- disturbance_cost: 0.393
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.583
+- cloth_proxy_success: 0.625
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/config_resolved.yaml b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f8259a4b910eccd10954ce134823179a566fdb9f
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/config_resolved.yaml
@@ -0,0 +1,149 @@
+experiment_name: proxy_interaction_r3d_stage2_clip_seed11
+output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
+device: cuda
+seed: 11
+init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
+init_strict: false
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 224
+  dataset_version: reveal_proxy_v6_rgbd_elastic_state
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage2_seed11.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage2_seed11.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 11
+optim:
+  epochs: 4
+  batch_size: 2
+  num_workers: 4
+  lr: 0.0003
+  weight_decay: 0.0001
+trainer:
+  policy_type: elastic_reveal
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+  use_depth: false
+  use_world_model: true
+  use_role_tokens: true
+  compute_equivariance_probe: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 512
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: false
+  fusion:
+    hidden_dim: 512
+    num_cameras: 3
+    num_layers: 4
+    num_heads: 8
+    ff_dim: 2048
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 512
+    action_dim: 14
+    history_steps: 6
+    scene_history_steps: 3
+    belief_history_steps: 8
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    scene_bank_size: 2
+    belief_bank_size: 2
+    num_heads: 8
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 4
+    ff_dim: 2048
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_proposal_modes: 6
+    planner_top_k: 4
+  reveal_head:
+    hidden_dim: 512
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 8
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 512
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+    scene_bank_size: 2
+    belief_bank_size: 2
+  planner:
+    hidden_dim: 512
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 8
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+    top_k: 4
+loss_weights:
+  action: 1.0
+  phase: 0.1
+  arm_role: 0.15
+  support_mode: 0.1
+  corridor: 0.15
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.25
+  belief: 0.05
+  visibility: 0.05
+  clearance: 0.05
+  support_stability: 0.05
+  reocclusion: 0.05
+  occluder_contact: 0.05
+  grasp_affordance: 0.05
+  planner_success: 0.25
+  planner_risk: 0.1
+  planner_ranking: 0.2
+  proposal_reconstruction: 0.1
+  proposal_success: 0.15
+  proposal_ranking: 0.2
+  proposal_diversity: 0.05
+  role_swap_consistency: 0.05
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/diagnostics_full/proxy_diagnostics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/diagnostics_full/proxy_diagnostics.json
new file mode 100644
index 0000000000000000000000000000000000000000..536a3a6ecf37e15b70651b86137c6fc96616f8b6
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/diagnostics_full/proxy_diagnostics.json
@@ -0,0 +1,16 @@
+{
+  "planner_top1_accuracy": 0.27906976744186046,
+  "planner_regret": 0.014687228947877884,
+  "planner_score_utility_spearman": 0.210852712392807,
+  "risk_calibration_mse": 0.00986906886100769,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.01944497972726822,
+  "left_right_equivariance_error": 0.0002826815475462795,
+  "belief_calibration_brier": 0.003809324698522687,
+  "reocclusion_calibration_brier": 0.28801918029785156,
+  "support_stability_mae": 0.026344481855630875,
+  "clearance_auc": 0.9058322298594268,
+  "memory_write_rate": 0.0,
+  "memory_saturation": 0.5182730555534363,
+  "num_samples": 129
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/metrics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..058ccc17a66721e9e9f7b9b357c61a5ef53f9916
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/metrics.json
@@ -0,0 +1,230 @@
+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.026644286036452386,
+      "arm_role": 0.024380755674152474,
+      "belief": 0.1216605089955929,
+      "clearance": 0.09435067850491763,
+      "corridor": 0.29937174982581466,
+      "disturbance": 0.00554025236528562,
+      "grasp_affordance": 0.02358881158130097,
+      "occluder_contact": 0.2092289766247984,
+      "persistence": 6.1897567423750885,
+      "phase": 0.7662794502617801,
+      "planner_ranking": 0.16281673026756807,
+      "planner_risk": 0.014868872865537152,
+      "planner_success": 0.6131215223467162,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.24531464707789,
+      "proposal_reconstruction": 0.06868474318094903,
+      "proposal_success": 0.6827385360033724,
+      "reocclusion": 0.7132243294054301,
+      "role_swap_consistency": 0.0003943645942521023,
+      "support_mode": 0.7494733720549738,
+      "support_stability": 0.1631323242406434,
+      "total": 1.8074374061604446,
+      "uncertainty": 0.02646746405007053,
+      "visibility": 0.11232841992019359,
+      "world_model": 2.612228818900922
+    },
+    "val": {
+      "action": 0.021447077637108472,
+      "arm_role": 2.0711024318678448e-05,
+      "belief": 0.09341082458312695,
+      "clearance": 0.07425147117330477,
+      "corridor": 0.23059940796632034,
+      "disturbance": 0.002393470596031805,
+      "grasp_affordance": 0.011041764040979056,
+      "occluder_contact": 0.18809776099828573,
+      "persistence": 4.780190816292396,
+      "phase": 0.6694326795064486,
+      "planner_ranking": 0.045178403781476216,
+      "planner_risk": 0.010466235164158906,
+      "planner_success": 0.5507269249512599,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1411677369704614,
+      "proposal_reconstruction": 0.06366521647343269,
+      "proposal_success": 0.6671431862390959,
+      "reocclusion": 0.6940084649966314,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6672373189375951,
+      "support_stability": 0.1576275432912203,
+      "total": 1.518847630574153,
+      "uncertainty": 0.007903887732670858,
+      "visibility": 0.08643374764002286,
+      "world_model": 2.1690124484208915
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.022044192911832745,
+      "arm_role": 6.96019976550996e-05,
+      "belief": 0.1076193918810465,
+      "clearance": 0.08718149573664079,
+      "corridor": 0.25136479897746394,
+      "disturbance": 0.0036231905150284236,
+      "grasp_affordance": 0.013151204869326652,
+      "occluder_contact": 0.20517516619872048,
+      "persistence": 3.9877223619186752,
+      "phase": 0.6843610034563155,
+      "planner_ranking": 0.052667735511481836,
+      "planner_risk": 0.011696826657908116,
+      "planner_success": 0.5557226944344206,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1597779933070638,
+      "proposal_reconstruction": 0.06451685880725297,
+      "proposal_success": 0.6708246469497681,
+      "reocclusion": 0.6674874808775817,
+      "role_swap_consistency": 0.00026557610019144277,
+      "support_mode": 0.6283252975703534,
+      "support_stability": 0.14863310884976885,
+      "total": 1.4848913787547207,
+      "uncertainty": 0.006362306834499096,
+      "visibility": 0.1025782693665065,
+      "world_model": 2.1518520416389584
+    },
+    "val": {
+      "action": 0.01952767800539732,
+      "arm_role": 0.00011286609648882753,
+      "belief": 0.11118833353886237,
+      "clearance": 0.08315109071823266,
+      "corridor": 0.44410995245069407,
+      "disturbance": 0.004647846037127797,
+      "grasp_affordance": 0.011384243833330962,
+      "occluder_contact": 0.19399810410462892,
+      "persistence": 7.307789671675374,
+      "phase": 0.5388953167658586,
+      "planner_ranking": 0.03512171468243581,
+      "planner_risk": 0.010607366236105848,
+      "planner_success": 0.5378215003472108,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1689154845017653,
+      "proposal_reconstruction": 0.06193383215711667,
+      "proposal_success": 0.6688321847182054,
+      "reocclusion": 0.6636646819802431,
+      "role_swap_consistency": 0.0,
+      "support_mode": 1.039346590408912,
+      "support_stability": 0.15784503956540272,
+      "total": 1.6776308609889103,
+      "uncertainty": 1.152622356900023e-05,
+      "visibility": 0.11231438769743993,
+      "world_model": 2.0740180052243744
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.018700552844632592,
+      "arm_role": 0.000972873253347986,
+      "belief": 0.12275376962741633,
+      "clearance": 0.08487847380593654,
+      "corridor": 0.24357045909599523,
+      "disturbance": 0.00331472009285923,
+      "grasp_affordance": 0.01026101550818738,
+      "occluder_contact": 0.2153189008304586,
+      "persistence": 2.4059808037708565,
+      "phase": 0.5033158507022558,
+      "planner_ranking": 0.03657240937522146,
+      "planner_risk": 0.010864751256517188,
+      "planner_success": 0.5124278418836793,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1595254449944221,
+      "proposal_reconstruction": 0.06179310919726706,
+      "proposal_success": 0.6566679927066983,
+      "reocclusion": 0.38847498592423246,
+      "role_swap_consistency": 0.0008652656654451233,
+      "support_mode": 0.35570250506176376,
+      "support_stability": 0.14306114877315715,
+      "total": 1.2949361607666414,
+      "uncertainty": 0.0021972638202774016,
+      "visibility": 0.11033565828001311,
+      "world_model": 2.0251020854680326
+    },
+    "val": {
+      "action": 0.02487506473150391,
+      "arm_role": 2.681288724095164e-06,
+      "belief": 0.1081794464817414,
+      "clearance": 0.07636868116947321,
+      "corridor": 0.20140686992269297,
+      "disturbance": 0.001994377507043492,
+      "grasp_affordance": 0.008677966799587012,
+      "occluder_contact": 0.21161039288227373,
+      "persistence": 1.3219125701257817,
+      "phase": 0.37710464734297533,
+      "planner_ranking": 0.03206984894719566,
+      "planner_risk": 0.011674165392581088,
+      "planner_success": 0.46599124119831964,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1611316717587985,
+      "proposal_reconstruction": 0.06948714015575555,
+      "proposal_success": 0.6533986550111037,
+      "reocclusion": 0.31326579468754623,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.17989734836066,
+      "support_stability": 0.13907468725855535,
+      "total": 1.1672267354451693,
+      "uncertainty": 0.0006145757817158972,
+      "visibility": 0.09655883323687774,
+      "world_model": 1.9251508355140685
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.01569180575160417,
+      "arm_role": 0.027261029675368864,
+      "belief": 0.1110531479042238,
+      "clearance": 0.07751915099694155,
+      "corridor": 0.20897386773052554,
+      "disturbance": 0.0019330896785874818,
+      "grasp_affordance": 0.008865814846184553,
+      "occluder_contact": 0.2038286757406764,
+      "persistence": 1.1690228903273747,
+      "phase": 0.3392091920862647,
+      "planner_ranking": 0.03543819029409765,
+      "planner_risk": 0.010516670346321021,
+      "planner_success": 0.48809501739384614,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.147950800613583,
+      "proposal_reconstruction": 0.05924179073403643,
+      "proposal_success": 0.6485388935860539,
+      "reocclusion": 0.25005930125791365,
+      "role_swap_consistency": 0.001582450235418851,
+      "support_mode": 0.1954826559695898,
+      "support_stability": 0.13409689854811,
+      "total": 1.056747386592845,
+      "uncertainty": 0.0003292631887740548,
+      "visibility": 0.10012162002827485,
+      "world_model": 1.5451418317425314
+    },
+    "val": {
+      "action": 0.012563670304818796,
+      "arm_role": 9.379507576667834e-05,
+      "belief": 0.09966908166041741,
+      "clearance": 0.07572867818749868,
+      "corridor": 0.19870975395807852,
+      "disturbance": 0.0011806640476536884,
+      "grasp_affordance": 0.00889887514595802,
+      "occluder_contact": 0.20167340773802536,
+      "persistence": 0.8940682159306911,
+      "phase": 0.2346100378781557,
+      "planner_ranking": 0.04236364569671353,
+      "planner_risk": 0.009735576174884604,
+      "planner_success": 0.4702391225558061,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.122354874244103,
+      "proposal_reconstruction": 0.056834035424085765,
+      "proposal_success": 0.6365870714187623,
+      "reocclusion": 0.21859066887543752,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.10797913265056334,
+      "support_stability": 0.1366611572698905,
+      "total": 0.9305079854451693,
+      "uncertainty": 0.0003750753218460327,
+      "visibility": 0.09423433232765932,
+      "world_model": 1.2587093903468205
+    }
+  }
+]
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/summary.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..12df1ebd0478acadd8ee7d73209f26b8fecc1fc7
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/summary.json
@@ -0,0 +1,557 @@
+{
+  "experiment_name": "proxy_interaction_r3d_stage2_clip_seed11",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/checkpoint_best.pt",
+  "final_train_total": 1.056747386592845,
+  "final_val_total": 0.9305079854451693,
+  "train_time_sec": 131.29005098342896,
+  "peak_gpu_memory_mb": 1894.7548828125,
+  "num_train_samples": 382,
+  "num_val_samples": 129,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": {
+    "path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
+    "loaded_keys": 461,
+    "skipped_shape_mismatch_keys": [
+      "memory.gru.weight_ih_l0",
+      "memory.gru.weight_hh_l0",
+      "memory.gru.bias_ih_l0",
+      "memory.gru.bias_hh_l0",
+      "memory.token_proj.0.weight",
+      "memory.token_proj.0.bias",
+      "memory.token_proj.1.weight",
+      "memory.token_proj.1.bias",
+      "decoder.actor_role_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.linear1.weight",
+      "decoder.revealer_decoder.layers.0.linear1.bias",
+      "decoder.revealer_decoder.layers.0.linear2.weight",
+      "decoder.revealer_decoder.layers.0.linear2.bias",
+      "decoder.revealer_decoder.layers.0.norm1.weight",
+      "decoder.revealer_decoder.layers.0.norm1.bias",
+      "decoder.revealer_decoder.layers.0.norm2.weight",
+      "decoder.revealer_decoder.layers.0.norm2.bias",
+      "decoder.revealer_decoder.layers.0.norm3.weight",
+      "decoder.revealer_decoder.layers.0.norm3.bias",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.linear1.weight",
+      "decoder.revealer_decoder.layers.1.linear1.bias",
+      "decoder.revealer_decoder.layers.1.linear2.weight",
+      "decoder.revealer_decoder.layers.1.linear2.bias",
+      "decoder.revealer_decoder.layers.1.norm1.weight",
+      "decoder.revealer_decoder.layers.1.norm1.bias",
+      "decoder.revealer_decoder.layers.1.norm2.weight",
+      "decoder.revealer_decoder.layers.1.norm2.bias",
+      "decoder.revealer_decoder.layers.1.norm3.weight",
+      "decoder.revealer_decoder.layers.1.norm3.bias",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.linear1.weight",
+      "decoder.revealer_decoder.layers.2.linear1.bias",
+      "decoder.revealer_decoder.layers.2.linear2.weight",
+      "decoder.revealer_decoder.layers.2.linear2.bias",
+      "decoder.revealer_decoder.layers.2.norm1.weight",
+      "decoder.revealer_decoder.layers.2.norm1.bias",
+      "decoder.revealer_decoder.layers.2.norm2.weight",
+      "decoder.revealer_decoder.layers.2.norm2.bias",
+      "decoder.revealer_decoder.layers.2.norm3.weight",
+      "decoder.revealer_decoder.layers.2.norm3.bias",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.linear1.weight",
+      "decoder.revealer_decoder.layers.3.linear1.bias",
+      "decoder.revealer_decoder.layers.3.linear2.weight",
+      "decoder.revealer_decoder.layers.3.linear2.bias",
+      "decoder.revealer_decoder.layers.3.norm1.weight",
+      "decoder.revealer_decoder.layers.3.norm1.bias",
+      "decoder.revealer_decoder.layers.3.norm2.weight",
+      "decoder.revealer_decoder.layers.3.norm2.bias",
+      "decoder.revealer_decoder.layers.3.norm3.weight",
+      "decoder.revealer_decoder.layers.3.norm3.bias",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.linear1.weight",
+      "decoder.actor_decoder.layers.0.linear1.bias",
+      "decoder.actor_decoder.layers.0.linear2.weight",
+      "decoder.actor_decoder.layers.0.linear2.bias",
+      "decoder.actor_decoder.layers.0.norm1.weight",
+      "decoder.actor_decoder.layers.0.norm1.bias",
+      "decoder.actor_decoder.layers.0.norm2.weight",
+      "decoder.actor_decoder.layers.0.norm2.bias",
+      "decoder.actor_decoder.layers.0.norm3.weight",
+      "decoder.actor_decoder.layers.0.norm3.bias",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.linear1.weight",
+      "decoder.actor_decoder.layers.1.linear1.bias",
+      "decoder.actor_decoder.layers.1.linear2.weight",
+      "decoder.actor_decoder.layers.1.linear2.bias",
+      "decoder.actor_decoder.layers.1.norm1.weight",
+      "decoder.actor_decoder.layers.1.norm1.bias",
+      "decoder.actor_decoder.layers.1.norm2.weight",
+      "decoder.actor_decoder.layers.1.norm2.bias",
+      "decoder.actor_decoder.layers.1.norm3.weight",
+      "decoder.actor_decoder.layers.1.norm3.bias",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.linear1.weight",
+      "decoder.actor_decoder.layers.2.linear1.bias",
+      "decoder.actor_decoder.layers.2.linear2.weight",
+      "decoder.actor_decoder.layers.2.linear2.bias",
+      "decoder.actor_decoder.layers.2.norm1.weight",
+      "decoder.actor_decoder.layers.2.norm1.bias",
+      "decoder.actor_decoder.layers.2.norm2.weight",
+      "decoder.actor_decoder.layers.2.norm2.bias",
+      "decoder.actor_decoder.layers.2.norm3.weight",
+      "decoder.actor_decoder.layers.2.norm3.bias",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.linear1.weight",
+      "decoder.actor_decoder.layers.3.linear1.bias",
+      "decoder.actor_decoder.layers.3.linear2.weight",
+      "decoder.actor_decoder.layers.3.linear2.bias",
+      "decoder.actor_decoder.layers.3.norm1.weight",
+      "decoder.actor_decoder.layers.3.norm1.bias",
+      "decoder.actor_decoder.layers.3.norm2.weight",
+      "decoder.actor_decoder.layers.3.norm2.bias",
+      "decoder.actor_decoder.layers.3.norm3.weight",
+      "decoder.actor_decoder.layers.3.norm3.bias",
+      "decoder.revealer_mean.weight",
+      "decoder.revealer_mean.bias",
+      "decoder.revealer_log_std.weight",
+      "decoder.revealer_log_std.bias",
+      "decoder.actor_mean.weight",
+      "decoder.actor_mean.bias",
+      "decoder.actor_log_std.weight",
+      "decoder.actor_log_std.bias",
+      "decoder.proposal_score.0.weight",
+      "decoder.proposal_score.0.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias"
+    ],
+    "missing_keys": [
+      "backbone.depth_adapter.depth_proj.0.weight",
+      "backbone.depth_adapter.depth_proj.0.bias",
+      "backbone.depth_adapter.depth_proj.1.weight",
+      "backbone.depth_adapter.depth_proj.1.bias",
+      "backbone.depth_adapter.depth_proj.3.weight",
+      "backbone.depth_adapter.depth_proj.3.bias",
+      "backbone.depth_adapter.geometry_proj.0.weight",
+      "backbone.depth_adapter.geometry_proj.0.bias",
+      "backbone.depth_adapter.geometry_proj.1.weight",
+      "backbone.depth_adapter.geometry_proj.1.bias",
+      "backbone.depth_adapter.camera_proj.0.weight",
+      "backbone.depth_adapter.camera_proj.0.bias",
+      "backbone.depth_adapter.camera_proj.1.weight",
+      "backbone.depth_adapter.camera_proj.1.bias",
+      "fusion.geometry_fusion.attn.in_proj_weight",
+      "fusion.geometry_fusion.attn.in_proj_bias",
+      "fusion.geometry_fusion.attn.out_proj.weight",
+      "fusion.geometry_fusion.attn.out_proj.bias",
+      "fusion.geometry_fusion.gate.0.weight",
+      "fusion.geometry_fusion.gate.0.bias",
+      "fusion.geometry_fusion.gate.1.weight",
+      "fusion.geometry_fusion.gate.1.bias",
+      "fusion.geometry_fusion.gate.3.weight",
+      "fusion.geometry_fusion.gate.3.bias",
+      "fusion.geometry_fusion.out.0.weight",
+      "fusion.geometry_fusion.out.0.bias",
+      "fusion.geometry_fusion.out.1.weight",
+      "fusion.geometry_fusion.out.1.bias",
+      "memory.scene_memory.position_embedding",
+      "memory.scene_memory.bank_queries",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.scene_memory.bank_attention.in_proj_weight",
+      "memory.scene_memory.bank_attention.in_proj_bias",
+      "memory.scene_memory.bank_attention.out_proj.weight",
+      "memory.scene_memory.bank_attention.out_proj.bias",
+      "memory.scene_memory.action_proj.0.weight",
+      "memory.scene_memory.action_proj.0.bias",
+      "memory.scene_memory.action_proj.1.weight",
+      "memory.scene_memory.action_proj.1.bias",
+      "memory.scene_memory.write_gate.0.weight",
+      "memory.scene_memory.write_gate.0.bias",
+      "memory.scene_memory.write_gate.1.weight",
+      "memory.scene_memory.write_gate.1.bias",
+      "memory.scene_memory.write_gate.3.weight",
+      "memory.scene_memory.write_gate.3.bias",
+      "memory.scene_memory.token_proj.0.weight",
+      "memory.scene_memory.token_proj.0.bias",
+      "memory.scene_memory.token_proj.1.weight",
+      "memory.scene_memory.token_proj.1.bias",
+      "memory.belief_memory.position_embedding",
+      "memory.belief_memory.bank_queries",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.belief_memory.bank_attention.in_proj_weight",
+      "memory.belief_memory.bank_attention.in_proj_bias",
+      "memory.belief_memory.bank_attention.out_proj.weight",
+      "memory.belief_memory.bank_attention.out_proj.bias",
+      "memory.belief_memory.action_proj.0.weight",
+      "memory.belief_memory.action_proj.0.bias",
+      "memory.belief_memory.action_proj.1.weight",
+      "memory.belief_memory.action_proj.1.bias",
+      "memory.belief_memory.write_gate.0.weight",
+      "memory.belief_memory.write_gate.0.bias",
+      "memory.belief_memory.write_gate.1.weight",
+      "memory.belief_memory.write_gate.1.bias",
+      "memory.belief_memory.write_gate.3.weight",
+      "memory.belief_memory.write_gate.3.bias",
+      "memory.belief_memory.token_proj.0.weight",
+      "memory.belief_memory.token_proj.0.bias",
+      "memory.belief_memory.token_proj.1.weight",
+      "memory.belief_memory.token_proj.1.bias",
+      "decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.0.linear1.weight",
+      "decoder.arm_decoder.layers.0.linear1.bias",
+      "decoder.arm_decoder.layers.0.linear2.weight",
+      "decoder.arm_decoder.layers.0.linear2.bias",
+      "decoder.arm_decoder.layers.0.norm1.weight",
+      "decoder.arm_decoder.layers.0.norm1.bias",
+      "decoder.arm_decoder.layers.0.norm2.weight",
+      "decoder.arm_decoder.layers.0.norm2.bias",
+      "decoder.arm_decoder.layers.0.norm3.weight",
+      "decoder.arm_decoder.layers.0.norm3.bias",
+      "decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.1.linear1.weight",
+      "decoder.arm_decoder.layers.1.linear1.bias",
+      "decoder.arm_decoder.layers.1.linear2.weight",
+      "decoder.arm_decoder.layers.1.linear2.bias",
+      "decoder.arm_decoder.layers.1.norm1.weight",
+      "decoder.arm_decoder.layers.1.norm1.bias",
+      "decoder.arm_decoder.layers.1.norm2.weight",
+      "decoder.arm_decoder.layers.1.norm2.bias",
+      "decoder.arm_decoder.layers.1.norm3.weight",
+      "decoder.arm_decoder.layers.1.norm3.bias",
+      "decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.2.linear1.weight",
+      "decoder.arm_decoder.layers.2.linear1.bias",
+      "decoder.arm_decoder.layers.2.linear2.weight",
+      "decoder.arm_decoder.layers.2.linear2.bias",
+      "decoder.arm_decoder.layers.2.norm1.weight",
+      "decoder.arm_decoder.layers.2.norm1.bias",
+      "decoder.arm_decoder.layers.2.norm2.weight",
+      "decoder.arm_decoder.layers.2.norm2.bias",
+      "decoder.arm_decoder.layers.2.norm3.weight",
+      "decoder.arm_decoder.layers.2.norm3.bias",
+      "decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.3.linear1.weight",
+      "decoder.arm_decoder.layers.3.linear1.bias",
+      "decoder.arm_decoder.layers.3.linear2.weight",
+      "decoder.arm_decoder.layers.3.linear2.bias",
+      "decoder.arm_decoder.layers.3.norm1.weight",
+      "decoder.arm_decoder.layers.3.norm1.bias",
+      "decoder.arm_decoder.layers.3.norm2.weight",
+      "decoder.arm_decoder.layers.3.norm2.bias",
+      "decoder.arm_decoder.layers.3.norm3.weight",
+      "decoder.arm_decoder.layers.3.norm3.bias",
+      "decoder.arm_identity.weight",
+      "decoder.phase_adapter.weight",
+      "decoder.phase_adapter.bias",
+      "decoder.role_adapter.weight",
+      "decoder.role_adapter.bias",
+      "decoder.context_proj.0.weight",
+      "decoder.context_proj.0.bias",
+      "decoder.context_proj.1.weight",
+      "decoder.context_proj.1.bias",
+      "decoder.arm_head.0.weight",
+      "decoder.arm_head.0.bias",
+      "decoder.arm_head.1.weight",
+      "decoder.arm_head.1.bias",
+      "decoder.arm_mean.weight",
+      "decoder.arm_mean.bias",
+      "decoder.arm_log_std.weight",
+      "decoder.arm_log_std.bias",
+      "decoder.proposal_mode_head.0.weight",
+      "decoder.proposal_mode_head.0.bias",
+      "decoder.proposal_mode_head.1.weight",
+      "decoder.proposal_mode_head.1.bias",
+      "decoder.proposal_mode_head.3.weight",
+      "decoder.proposal_mode_head.3.bias",
+      "decoder.proposal_mode_embeddings.weight",
+      "decoder.proposal_slot_embeddings.weight",
+      "decoder.mode_residual_heads.0.0.weight",
+      "decoder.mode_residual_heads.0.0.bias",
+      "decoder.mode_residual_heads.0.1.weight",
+      "decoder.mode_residual_heads.0.1.bias",
+      "decoder.mode_residual_heads.0.3.weight",
+      "decoder.mode_residual_heads.0.3.bias",
+      "decoder.mode_residual_heads.1.0.weight",
+      "decoder.mode_residual_heads.1.0.bias",
+      "decoder.mode_residual_heads.1.1.weight",
+      "decoder.mode_residual_heads.1.1.bias",
+      "decoder.mode_residual_heads.1.3.weight",
+      "decoder.mode_residual_heads.1.3.bias",
+      "decoder.mode_residual_heads.2.0.weight",
+      "decoder.mode_residual_heads.2.0.bias",
+      "decoder.mode_residual_heads.2.1.weight",
+      "decoder.mode_residual_heads.2.1.bias",
+      "decoder.mode_residual_heads.2.3.weight",
+      "decoder.mode_residual_heads.2.3.bias",
+      "decoder.mode_residual_heads.3.0.weight",
+      "decoder.mode_residual_heads.3.0.bias",
+      "decoder.mode_residual_heads.3.1.weight",
+      "decoder.mode_residual_heads.3.1.bias",
+      "decoder.mode_residual_heads.3.3.weight",
+      "decoder.mode_residual_heads.3.3.bias",
+      "decoder.mode_residual_heads.4.0.weight",
+      "decoder.mode_residual_heads.4.0.bias",
+      "decoder.mode_residual_heads.4.1.weight",
+      "decoder.mode_residual_heads.4.1.bias",
+      "decoder.mode_residual_heads.4.3.weight",
+      "decoder.mode_residual_heads.4.3.bias",
+      "decoder.mode_residual_heads.5.0.weight",
+      "decoder.mode_residual_heads.5.0.bias",
+      "decoder.mode_residual_heads.5.1.weight",
+      "decoder.mode_residual_heads.5.1.bias",
+      "decoder.mode_residual_heads.5.3.weight",
+      "decoder.mode_residual_heads.5.3.bias",
+      "decoder.slot_delta.0.weight",
+      "decoder.slot_delta.0.bias",
+      "decoder.slot_delta.1.weight",
+      "decoder.slot_delta.1.bias",
+      "decoder.slot_delta.3.weight",
+      "decoder.slot_delta.3.bias",
+      "decoder.proposal_score.0.weight",
+      "decoder.proposal_score.0.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias",
+      "decoder.proposal_score.3.weight",
+      "decoder.proposal_score.3.bias",
+      "elastic_state_head.interaction_queries",
+      "elastic_state_head.interaction_attention.in_proj_weight",
+      "elastic_state_head.interaction_attention.in_proj_bias",
+      "elastic_state_head.interaction_attention.out_proj.weight",
+      "elastic_state_head.interaction_attention.out_proj.bias",
+      "elastic_state_head.interaction_mlp.0.weight",
+      "elastic_state_head.interaction_mlp.0.bias",
+      "elastic_state_head.interaction_mlp.1.weight",
+      "elastic_state_head.interaction_mlp.1.bias",
+      "elastic_state_head.interaction_mlp.3.weight",
+      "elastic_state_head.interaction_mlp.3.bias",
+      "elastic_state_head.decoder.field_queries",
+      "elastic_state_head.decoder.field_attention.in_proj_weight",
+      "elastic_state_head.decoder.field_attention.in_proj_bias",
+      "elastic_state_head.decoder.field_attention.out_proj.weight",
+      "elastic_state_head.decoder.field_attention.out_proj.bias",
+      "elastic_state_head.decoder.field_mlp.0.weight",
+      "elastic_state_head.decoder.field_mlp.0.bias",
+      "elastic_state_head.decoder.field_mlp.1.weight",
+      "elastic_state_head.decoder.field_mlp.1.bias",
+      "elastic_state_head.decoder.field_mlp.3.weight",
+      "elastic_state_head.decoder.field_mlp.3.bias",
+      "elastic_state_head.decoder.summary_proj.0.weight",
+      "elastic_state_head.decoder.summary_proj.0.bias",
+      "elastic_state_head.decoder.summary_proj.1.weight",
+      "elastic_state_head.decoder.summary_proj.1.bias",
+      "elastic_state_head.decoder.phase_head.0.weight",
+      "elastic_state_head.decoder.phase_head.0.bias",
+      "elastic_state_head.decoder.phase_head.1.weight",
+      "elastic_state_head.decoder.phase_head.1.bias",
+      "elastic_state_head.decoder.phase_head.3.weight",
+      "elastic_state_head.decoder.phase_head.3.bias",
+      "elastic_state_head.decoder.arm_role_head.0.weight",
+      "elastic_state_head.decoder.arm_role_head.0.bias",
+      "elastic_state_head.decoder.arm_role_head.1.weight",
+      "elastic_state_head.decoder.arm_role_head.1.bias",
+      "elastic_state_head.decoder.arm_role_head.3.weight",
+      "elastic_state_head.decoder.arm_role_head.3.bias",
+      "elastic_state_head.decoder.arm_identity.weight",
+      "elastic_state_head.decoder.support_mode.0.weight",
+      "elastic_state_head.decoder.support_mode.0.bias",
+      "elastic_state_head.decoder.support_mode.1.weight",
+      "elastic_state_head.decoder.support_mode.1.bias",
+      "elastic_state_head.decoder.support_mode.3.weight",
+      "elastic_state_head.decoder.support_mode.3.bias",
+      "elastic_state_head.decoder.access_field.weight",
+      "elastic_state_head.decoder.access_field.bias",
+      "elastic_state_head.decoder.target_belief_field.weight",
+      "elastic_state_head.decoder.target_belief_field.bias",
+      "elastic_state_head.decoder.visibility_field.weight",
+      "elastic_state_head.decoder.visibility_field.bias",
+      "elastic_state_head.decoder.clearance_field.weight",
+      "elastic_state_head.decoder.clearance_field.bias",
+      "elastic_state_head.decoder.occluder_contact_field.weight",
+      "elastic_state_head.decoder.occluder_contact_field.bias",
+      "elastic_state_head.decoder.grasp_affordance_field.weight",
+      "elastic_state_head.decoder.grasp_affordance_field.bias",
+      "elastic_state_head.decoder.support_stability_field.weight",
+      "elastic_state_head.decoder.support_stability_field.bias",
+      "elastic_state_head.decoder.persistence_field.weight",
+      "elastic_state_head.decoder.persistence_field.bias",
+      "elastic_state_head.decoder.reocclusion_field.weight",
+      "elastic_state_head.decoder.reocclusion_field.bias",
+      "elastic_state_head.decoder.disturbance_field.weight",
+      "elastic_state_head.decoder.disturbance_field.bias",
+      "elastic_state_head.decoder.uncertainty_field.weight",
+      "elastic_state_head.decoder.uncertainty_field.bias",
+      "elastic_state_head.decoder.reocclusion_head.0.weight",
+      "elastic_state_head.decoder.reocclusion_head.0.bias",
+      "elastic_state_head.decoder.reocclusion_head.1.weight",
+      "elastic_state_head.decoder.reocclusion_head.1.bias",
+      "elastic_state_head.decoder.reocclusion_head.3.weight",
+      "elastic_state_head.decoder.reocclusion_head.3.bias",
+      "world_model.state_encoder.0.weight",
+      "world_model.state_encoder.0.bias",
+      "world_model.state_encoder.1.weight",
+      "world_model.state_encoder.1.bias",
+      "world_model.scene_memory_proj.0.weight",
+      "world_model.scene_memory_proj.0.bias",
+      "world_model.scene_memory_proj.1.weight",
+      "world_model.scene_memory_proj.1.bias",
+      "world_model.belief_memory_proj.0.weight",
+      "world_model.belief_memory_proj.0.bias",
+      "world_model.belief_memory_proj.1.weight",
+      "world_model.belief_memory_proj.1.bias",
+      "world_model.action_encoder.0.weight",
+      "world_model.action_encoder.0.bias",
+      "world_model.action_encoder.1.weight",
+      "world_model.action_encoder.1.bias",
+      "world_model.transition.weight_ih",
+      "world_model.transition.weight_hh",
+      "world_model.transition.bias_ih",
+      "world_model.transition.bias_hh",
+      "world_model.scene_memory_update.weight",
+      "world_model.scene_memory_update.bias",
+      "world_model.belief_memory_update.weight",
+      "world_model.belief_memory_update.bias",
+      "world_model.compact_decoder.weight",
+      "world_model.compact_decoder.bias",
+      "world_model.target_belief_head.weight",
+      "world_model.target_belief_head.bias",
+      "world_model.visibility_head.weight",
+      "world_model.visibility_head.bias",
+      "world_model.clearance_head.weight",
+      "world_model.clearance_head.bias",
+      "world_model.occluder_contact_head.weight",
+      "world_model.occluder_contact_head.bias",
+      "world_model.grasp_affordance_head.weight",
+      "world_model.grasp_affordance_head.bias",
+      "world_model.support_stability_head.weight",
+      "world_model.support_stability_head.bias",
+      "world_model.persistence_head.weight",
+      "world_model.persistence_head.bias",
+      "world_model.reocclusion_head.weight",
+      "world_model.reocclusion_head.bias",
+      "world_model.disturbance_head.weight",
+      "world_model.disturbance_head.bias",
+      "world_model.uncertainty_head.weight",
+      "world_model.uncertainty_head.bias",
+      "world_model.access_head.weight",
+      "world_model.access_head.bias",
+      "planner.residual.trunk.0.weight",
+      "planner.residual.trunk.0.bias",
+      "planner.residual.trunk.1.weight",
+      "planner.residual.trunk.1.bias",
+      "planner.residual.trunk.3.weight",
+      "planner.residual.trunk.3.bias",
+      "planner.residual.success_head.weight",
+      "planner.residual.success_head.bias",
+      "planner.residual.risk_head.weight",
+      "planner.residual.risk_head.bias",
+      "planner.residual.residual_head.weight",
+      "planner.residual.residual_head.bias"
+    ],
+    "unexpected_keys": []
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/benchmark_full/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/benchmark_full/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..1adcab281dc7f4bdb68aa7ad2d6156a5d63ba9ea
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/benchmark_full/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.25,
+      "bag_proxy": 0.20833333333333334,
+      "cloth_proxy": 0.5833333333333334
+    },
+    "mean_success": 0.34722222222222227,
+    "visibility_integral": 19.064177172051537,
+    "corridor_availability": 0.5252470484831266,
+    "reocclusion_rate": 0.034895833333333334,
+    "persistence_horizon_mae": 2.8043047013660196,
+    "disturbance_cost": 0.100128799987336
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/benchmark_full/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/benchmark_full/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..05966447af0dccbac2ba89e43d47a3c0157d24fa
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/benchmark_full/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/checkpoint_best.pt
+- mean_success: 0.347
+- visibility_integral: 19.064
+- corridor_availability: 0.525
+- reocclusion_rate: 0.035
+- persistence_horizon_mae: 2.804
+- disturbance_cost: 0.100
+- foliage_proxy_success: 0.250
+- bag_proxy_success: 0.208
+- cloth_proxy_success: 0.583
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/config_resolved.yaml b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..51410160afba1bc080f013abc06d9c7e4edfc9f1
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/config_resolved.yaml
@@ -0,0 +1,149 @@
+experiment_name: proxy_interaction_r3d_stage2_clip_seed12
+output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
+device: cuda
+seed: 12
+init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
+init_strict: false
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 224
+  dataset_version: reveal_proxy_v6_rgbd_elastic_state
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage2_seed12.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage2_seed12.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 12
+optim:
+  epochs: 4
+  batch_size: 2
+  num_workers: 4
+  lr: 0.0003
+  weight_decay: 0.0001
+trainer:
+  policy_type: elastic_reveal
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+  use_depth: false
+  use_world_model: true
+  use_role_tokens: true
+  compute_equivariance_probe: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 512
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: false
+  fusion:
+    hidden_dim: 512
+    num_cameras: 3
+    num_layers: 4
+    num_heads: 8
+    ff_dim: 2048
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 512
+    action_dim: 14
+    history_steps: 6
+    scene_history_steps: 3
+    belief_history_steps: 8
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    scene_bank_size: 2
+    belief_bank_size: 2
+    num_heads: 8
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 4
+    ff_dim: 2048
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_proposal_modes: 6
+    planner_top_k: 4
+  reveal_head:
+    hidden_dim: 512
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 8
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 512
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+    scene_bank_size: 2
+    belief_bank_size: 2
+  planner:
+    hidden_dim: 512
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 8
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+    top_k: 4
+loss_weights:
+  action: 1.0
+  phase: 0.1
+  arm_role: 0.15
+  support_mode: 0.1
+  corridor: 0.15
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.25
+  belief: 0.05
+  visibility: 0.05
+  clearance: 0.05
+  support_stability: 0.05
+  reocclusion: 0.05
+  occluder_contact: 0.05
+  grasp_affordance: 0.05
+  planner_success: 0.25
+  planner_risk: 0.1
+  planner_ranking: 0.2
+  proposal_reconstruction: 0.1
+  proposal_success: 0.15
+  proposal_ranking: 0.2
+  proposal_diversity: 0.05
+  role_swap_consistency: 0.05
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/diagnostics_full/proxy_diagnostics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/diagnostics_full/proxy_diagnostics.json
new file mode 100644
index 0000000000000000000000000000000000000000..629544cc9b35c7162fa2ca945991b0bcf53d4d9e
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/diagnostics_full/proxy_diagnostics.json
@@ -0,0 +1,16 @@
+{
+  "planner_top1_accuracy": 0.2692307692307692,
+  "planner_regret": 0.015571335330605507,
+  "planner_score_utility_spearman": 0.2846153974533081,
+  "risk_calibration_mse": 0.010228095576167107,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.025593629106879234,
+  "left_right_equivariance_error": 0.0001871140535292921,
+  "belief_calibration_brier": 0.006486459169536829,
+  "reocclusion_calibration_brier": 0.24318400025367737,
+  "support_stability_mae": 0.0361579954624176,
+  "clearance_auc": 0.6852405197686325,
+  "memory_write_rate": 0.13076923787593842,
+  "memory_saturation": 0.5033961534500122,
+  "num_samples": 130
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/metrics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..b7a32c66fa16a18d74af8a92e02e794db3f131f8
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/metrics.json
@@ -0,0 +1,230 @@
+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.0265116647753806,
+      "arm_role": 0.026648694620082517,
+      "belief": 0.12265744023220078,
+      "clearance": 0.09419052814820986,
+      "corridor": 0.29069200661325956,
+      "disturbance": 0.0063096336832193685,
+      "grasp_affordance": 0.02155034775060665,
+      "occluder_contact": 0.21986118271088725,
+      "persistence": 7.344096696604024,
+      "phase": 0.7511836346531413,
+      "planner_ranking": 0.1567143966832472,
+      "planner_risk": 0.01613354602277325,
+      "planner_success": 0.6087345007358421,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.2436198916734826,
+      "proposal_reconstruction": 0.06823750427334097,
+      "proposal_success": 0.680850050212201,
+      "reocclusion": 0.7201950554760339,
+      "role_swap_consistency": 0.0004218729012962723,
+      "support_mode": 0.7505828697643979,
+      "support_stability": 0.15377593591823902,
+      "total": 1.8732728003207302,
+      "uncertainty": 0.028985263621364478,
+      "visibility": 0.11896002095641266,
+      "world_model": 2.6631099815768096
+    },
+    "val": {
+      "action": 0.021836393154584445,
+      "arm_role": 8.102541575611283e-06,
+      "belief": 0.09969789322752219,
+      "clearance": 0.08271008575191864,
+      "corridor": 0.24081495908590464,
+      "disturbance": 0.0023920218258773763,
+      "grasp_affordance": 0.01155611932134399,
+      "occluder_contact": 0.20507212510475745,
+      "persistence": 4.512984638947707,
+      "phase": 0.6603462411807134,
+      "planner_ranking": 0.04704892609734088,
+      "planner_risk": 0.010522140137170656,
+      "planner_success": 0.52820757489938,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1522641053566567,
+      "proposal_reconstruction": 0.06444322845110527,
+      "proposal_success": 0.6650473337907058,
+      "reocclusion": 0.6900336522322434,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6587190958169791,
+      "support_stability": 0.1457874579498401,
+      "total": 1.5405189422460703,
+      "uncertainty": 0.0072207282010752424,
+      "visibility": 0.09836944525058453,
+      "world_model": 2.3159281272154586
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.02143435196278139,
+      "arm_role": 2.7934918228868416e-05,
+      "belief": 0.10347749129015738,
+      "clearance": 0.08618570594068285,
+      "corridor": 0.24549250739641215,
+      "disturbance": 0.0026278662473882427,
+      "grasp_affordance": 0.010813114165050508,
+      "occluder_contact": 0.21092523938698293,
+      "persistence": 4.392642458071883,
+      "phase": 0.6784450670811518,
+      "planner_ranking": 0.04968888171078444,
+      "planner_risk": 0.011175002839967257,
+      "planner_success": 0.5800106824614615,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.150295885757626,
+      "proposal_reconstruction": 0.06383146423631937,
+      "proposal_success": 0.6773600406671694,
+      "reocclusion": 0.7005154393730363,
+      "role_swap_consistency": 0.00020737489747128533,
+      "support_mode": 0.695506789921466,
+      "support_stability": 0.1444786221413088,
+      "total": 1.5313815312235768,
+      "uncertainty": 0.004678997660972451,
+      "visibility": 0.10154765454262339,
+      "world_model": 2.215607233384517
+    },
+    "val": {
+      "action": 0.021598762689301602,
+      "arm_role": 1.559978859754315e-05,
+      "belief": 0.10402895246560757,
+      "clearance": 0.08615114350731556,
+      "corridor": 0.24378756766135876,
+      "disturbance": 0.0017933934510857888,
+      "grasp_affordance": 0.00965615829023031,
+      "occluder_contact": 0.22014242937931647,
+      "persistence": 3.8692049705065212,
+      "phase": 0.6658917142794682,
+      "planner_ranking": 0.034095349999608095,
+      "planner_risk": 0.010596161578835634,
+      "planner_success": 0.5355585918976711,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1134835408284114,
+      "proposal_reconstruction": 0.06421315005192389,
+      "proposal_success": 0.6758711869900044,
+      "reocclusion": 0.6889989018440247,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.660473997776325,
+      "support_stability": 0.14310103247945125,
+      "total": 1.4629831167367788,
+      "uncertainty": 0.0009627942819721424,
+      "visibility": 0.10057846101430747,
+      "world_model": 2.1548714188429026
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.021090031074876874,
+      "arm_role": 0.00018929935874739242,
+      "belief": 0.10672742253198673,
+      "clearance": 0.08872412362608922,
+      "corridor": 0.2733879856331303,
+      "disturbance": 0.0030289446660285483,
+      "grasp_affordance": 0.01181759231562936,
+      "occluder_contact": 0.21809014919852712,
+      "persistence": 3.656308846635968,
+      "phase": 0.6599919983229712,
+      "planner_ranking": 0.041442906926855566,
+      "planner_risk": 0.010613277656603994,
+      "planner_success": 0.521210808092387,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1467299227315093,
+      "proposal_reconstruction": 0.06341781315067052,
+      "proposal_success": 0.6737371065229646,
+      "reocclusion": 0.6779327008639174,
+      "role_swap_consistency": 0.00022924937225094415,
+      "support_mode": 0.6753841819563461,
+      "support_stability": 0.14452538651009506,
+      "total": 1.4377010383531061,
+      "uncertainty": 0.0023537015170198385,
+      "visibility": 0.10841247750475456,
+      "world_model": 2.0592898433121087
+    },
+    "val": {
+      "action": 0.021482723922683643,
+      "arm_role": 1.9337384835055744e-05,
+      "belief": 0.1271346492262987,
+      "clearance": 0.083377072845514,
+      "corridor": 0.2741409402913772,
+      "disturbance": 0.002117429635165116,
+      "grasp_affordance": 0.011649288172618701,
+      "occluder_contact": 0.21174047933175014,
+      "persistence": 3.8564615689791166,
+      "phase": 0.6485314034498655,
+      "planner_ranking": 0.03306306126446893,
+      "planner_risk": 0.01047296588210604,
+      "planner_success": 0.5441290960862086,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1324434812252338,
+      "proposal_reconstruction": 0.06412716972140166,
+      "proposal_success": 0.6779822074449979,
+      "reocclusion": 0.6870533975271078,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6577057308875598,
+      "support_stability": 0.14096688464857066,
+      "total": 1.4323132129815908,
+      "uncertainty": 0.0027863349163313755,
+      "visibility": 0.10942233365315657,
+      "world_model": 1.9970711038662836
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.020458291170153162,
+      "arm_role": 0.00011509968972330942,
+      "belief": 0.11413928630386347,
+      "clearance": 0.09032999263852054,
+      "corridor": 0.2853015211679917,
+      "disturbance": 0.0033650345857184284,
+      "grasp_affordance": 0.011570076631255331,
+      "occluder_contact": 0.22306315101566115,
+      "persistence": 2.4543060132619727,
+      "phase": 0.5411919998248834,
+      "planner_ranking": 0.03125114804425049,
+      "planner_risk": 0.010697233745266307,
+      "planner_success": 0.5028430128674857,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1347670686182552,
+      "proposal_reconstruction": 0.06315274327915377,
+      "proposal_success": 0.677255996234754,
+      "reocclusion": 0.43758569880393555,
+      "role_swap_consistency": 0.0010055845596642276,
+      "support_mode": 0.44074948295872873,
+      "support_stability": 0.13863542222988387,
+      "total": 1.3077349625332817,
+      "uncertainty": 0.0008059210962010913,
+      "visibility": 0.12058865647587477,
+      "world_model": 1.9945788954565038
+    },
+    "val": {
+      "action": 0.01820472377137496,
+      "arm_role": 2.3236593000101145e-06,
+      "belief": 0.11443154307512136,
+      "clearance": 0.08722961630958777,
+      "corridor": 0.24973363708346508,
+      "disturbance": 0.0022052301745973707,
+      "grasp_affordance": 0.00954639521212532,
+      "occluder_contact": 0.22095146018725176,
+      "persistence": 1.743605894180767,
+      "phase": 0.3354514233964997,
+      "planner_ranking": 0.03021946732674573,
+      "planner_risk": 0.0104008026027049,
+      "planner_success": 0.5254414299359689,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1170437849484958,
+      "proposal_reconstruction": 0.06222414580675272,
+      "proposal_success": 0.6824415674576393,
+      "reocclusion": 0.3508238720635955,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.27097287286932653,
+      "support_stability": 0.1482541099190712,
+      "total": 1.2100626652057354,
+      "uncertainty": 5.787161892910192e-05,
+      "visibility": 0.11422394622976963,
+      "world_model": 1.9349967433856083
+    }
+  }
+]
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/summary.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..805e5df9f7a215d903f3f49df365decfd4ae4614
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/summary.json
@@ -0,0 +1,557 @@
+{
+  "experiment_name": "proxy_interaction_r3d_stage2_clip_seed12",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/checkpoint_best.pt",
+  "final_train_total": 1.3077349625332817,
+  "final_val_total": 1.2100626652057354,
+  "train_time_sec": 146.35694217681885,
+  "peak_gpu_memory_mb": 1917.4189453125,
+  "num_train_samples": 381,
+  "num_val_samples": 130,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": {
+    "path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
+    "loaded_keys": 461,
+    "skipped_shape_mismatch_keys": [
+      "memory.gru.weight_ih_l0",
+      "memory.gru.weight_hh_l0",
+      "memory.gru.bias_ih_l0",
+      "memory.gru.bias_hh_l0",
+      "memory.token_proj.0.weight",
+      "memory.token_proj.0.bias",
+      "memory.token_proj.1.weight",
+      "memory.token_proj.1.bias",
+      "decoder.actor_role_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.linear1.weight",
+      "decoder.revealer_decoder.layers.0.linear1.bias",
+      "decoder.revealer_decoder.layers.0.linear2.weight",
+      "decoder.revealer_decoder.layers.0.linear2.bias",
+      "decoder.revealer_decoder.layers.0.norm1.weight",
+      "decoder.revealer_decoder.layers.0.norm1.bias",
+      "decoder.revealer_decoder.layers.0.norm2.weight",
+      "decoder.revealer_decoder.layers.0.norm2.bias",
+      "decoder.revealer_decoder.layers.0.norm3.weight",
+      "decoder.revealer_decoder.layers.0.norm3.bias",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.linear1.weight",
+      "decoder.revealer_decoder.layers.1.linear1.bias",
+      "decoder.revealer_decoder.layers.1.linear2.weight",
+      "decoder.revealer_decoder.layers.1.linear2.bias",
+      "decoder.revealer_decoder.layers.1.norm1.weight",
+      "decoder.revealer_decoder.layers.1.norm1.bias",
+      "decoder.revealer_decoder.layers.1.norm2.weight",
+      "decoder.revealer_decoder.layers.1.norm2.bias",
+      "decoder.revealer_decoder.layers.1.norm3.weight",
+      "decoder.revealer_decoder.layers.1.norm3.bias",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.linear1.weight",
+      "decoder.revealer_decoder.layers.2.linear1.bias",
+      "decoder.revealer_decoder.layers.2.linear2.weight",
+      "decoder.revealer_decoder.layers.2.linear2.bias",
+      "decoder.revealer_decoder.layers.2.norm1.weight",
+      "decoder.revealer_decoder.layers.2.norm1.bias",
+      "decoder.revealer_decoder.layers.2.norm2.weight",
+      "decoder.revealer_decoder.layers.2.norm2.bias",
+      "decoder.revealer_decoder.layers.2.norm3.weight",
+      "decoder.revealer_decoder.layers.2.norm3.bias",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.linear1.weight",
+      "decoder.revealer_decoder.layers.3.linear1.bias",
+      "decoder.revealer_decoder.layers.3.linear2.weight",
+      "decoder.revealer_decoder.layers.3.linear2.bias",
+      "decoder.revealer_decoder.layers.3.norm1.weight",
+      "decoder.revealer_decoder.layers.3.norm1.bias",
+      "decoder.revealer_decoder.layers.3.norm2.weight",
+      "decoder.revealer_decoder.layers.3.norm2.bias",
+      "decoder.revealer_decoder.layers.3.norm3.weight",
+      "decoder.revealer_decoder.layers.3.norm3.bias",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.linear1.weight",
+      "decoder.actor_decoder.layers.0.linear1.bias",
+      "decoder.actor_decoder.layers.0.linear2.weight",
+      "decoder.actor_decoder.layers.0.linear2.bias",
+      "decoder.actor_decoder.layers.0.norm1.weight",
+      "decoder.actor_decoder.layers.0.norm1.bias",
+      "decoder.actor_decoder.layers.0.norm2.weight",
+      "decoder.actor_decoder.layers.0.norm2.bias",
+      "decoder.actor_decoder.layers.0.norm3.weight",
+      "decoder.actor_decoder.layers.0.norm3.bias",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.linear1.weight",
+      "decoder.actor_decoder.layers.1.linear1.bias",
+      "decoder.actor_decoder.layers.1.linear2.weight",
+      "decoder.actor_decoder.layers.1.linear2.bias",
+      "decoder.actor_decoder.layers.1.norm1.weight",
+      "decoder.actor_decoder.layers.1.norm1.bias",
+      "decoder.actor_decoder.layers.1.norm2.weight",
+      "decoder.actor_decoder.layers.1.norm2.bias",
+      "decoder.actor_decoder.layers.1.norm3.weight",
+      "decoder.actor_decoder.layers.1.norm3.bias",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.linear1.weight",
+      "decoder.actor_decoder.layers.2.linear1.bias",
+      "decoder.actor_decoder.layers.2.linear2.weight",
+      "decoder.actor_decoder.layers.2.linear2.bias",
+      "decoder.actor_decoder.layers.2.norm1.weight",
+      "decoder.actor_decoder.layers.2.norm1.bias",
+      "decoder.actor_decoder.layers.2.norm2.weight",
+      "decoder.actor_decoder.layers.2.norm2.bias",
+      "decoder.actor_decoder.layers.2.norm3.weight",
+      "decoder.actor_decoder.layers.2.norm3.bias",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.linear1.weight",
+      "decoder.actor_decoder.layers.3.linear1.bias",
+      "decoder.actor_decoder.layers.3.linear2.weight",
+      "decoder.actor_decoder.layers.3.linear2.bias",
+      "decoder.actor_decoder.layers.3.norm1.weight",
+      "decoder.actor_decoder.layers.3.norm1.bias",
+      "decoder.actor_decoder.layers.3.norm2.weight",
+      "decoder.actor_decoder.layers.3.norm2.bias",
+      "decoder.actor_decoder.layers.3.norm3.weight",
+      "decoder.actor_decoder.layers.3.norm3.bias",
+      "decoder.revealer_mean.weight",
+      "decoder.revealer_mean.bias",
+      "decoder.revealer_log_std.weight",
+      "decoder.revealer_log_std.bias",
+      "decoder.actor_mean.weight",
+      "decoder.actor_mean.bias",
+      "decoder.actor_log_std.weight",
+      "decoder.actor_log_std.bias",
+      "decoder.proposal_score.0.weight",
+      "decoder.proposal_score.0.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias"
+    ],
+    "missing_keys": [
+      "backbone.depth_adapter.depth_proj.0.weight",
+      "backbone.depth_adapter.depth_proj.0.bias",
+      "backbone.depth_adapter.depth_proj.1.weight",
+      "backbone.depth_adapter.depth_proj.1.bias",
+      "backbone.depth_adapter.depth_proj.3.weight",
+      "backbone.depth_adapter.depth_proj.3.bias",
+      "backbone.depth_adapter.geometry_proj.0.weight",
+      "backbone.depth_adapter.geometry_proj.0.bias",
+      "backbone.depth_adapter.geometry_proj.1.weight",
+      "backbone.depth_adapter.geometry_proj.1.bias",
+      "backbone.depth_adapter.camera_proj.0.weight",
+      "backbone.depth_adapter.camera_proj.0.bias",
+      "backbone.depth_adapter.camera_proj.1.weight",
+      "backbone.depth_adapter.camera_proj.1.bias",
+      "fusion.geometry_fusion.attn.in_proj_weight",
+      "fusion.geometry_fusion.attn.in_proj_bias",
+      "fusion.geometry_fusion.attn.out_proj.weight",
+      "fusion.geometry_fusion.attn.out_proj.bias",
+      "fusion.geometry_fusion.gate.0.weight",
+      "fusion.geometry_fusion.gate.0.bias",
+      "fusion.geometry_fusion.gate.1.weight",
+      "fusion.geometry_fusion.gate.1.bias",
+      "fusion.geometry_fusion.gate.3.weight",
+      "fusion.geometry_fusion.gate.3.bias",
+      "fusion.geometry_fusion.out.0.weight",
+      "fusion.geometry_fusion.out.0.bias",
+      "fusion.geometry_fusion.out.1.weight",
+      "fusion.geometry_fusion.out.1.bias",
+      "memory.scene_memory.position_embedding",
+      "memory.scene_memory.bank_queries",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.scene_memory.bank_attention.in_proj_weight",
+      "memory.scene_memory.bank_attention.in_proj_bias",
+      "memory.scene_memory.bank_attention.out_proj.weight",
+      "memory.scene_memory.bank_attention.out_proj.bias",
+      "memory.scene_memory.action_proj.0.weight",
+      "memory.scene_memory.action_proj.0.bias",
+      "memory.scene_memory.action_proj.1.weight",
+      "memory.scene_memory.action_proj.1.bias",
+      "memory.scene_memory.write_gate.0.weight",
+      "memory.scene_memory.write_gate.0.bias",
+      "memory.scene_memory.write_gate.1.weight",
+      "memory.scene_memory.write_gate.1.bias",
+      "memory.scene_memory.write_gate.3.weight",
+      "memory.scene_memory.write_gate.3.bias",
+      "memory.scene_memory.token_proj.0.weight",
+      "memory.scene_memory.token_proj.0.bias",
+      "memory.scene_memory.token_proj.1.weight",
+      "memory.scene_memory.token_proj.1.bias",
+      "memory.belief_memory.position_embedding",
+      "memory.belief_memory.bank_queries",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.belief_memory.bank_attention.in_proj_weight",
+      "memory.belief_memory.bank_attention.in_proj_bias",
+      "memory.belief_memory.bank_attention.out_proj.weight",
+      "memory.belief_memory.bank_attention.out_proj.bias",
+      "memory.belief_memory.action_proj.0.weight",
+      "memory.belief_memory.action_proj.0.bias",
+      "memory.belief_memory.action_proj.1.weight",
+      "memory.belief_memory.action_proj.1.bias",
+      "memory.belief_memory.write_gate.0.weight",
+      "memory.belief_memory.write_gate.0.bias",
+      "memory.belief_memory.write_gate.1.weight",
+      "memory.belief_memory.write_gate.1.bias",
+      "memory.belief_memory.write_gate.3.weight",
+      "memory.belief_memory.write_gate.3.bias",
+      "memory.belief_memory.token_proj.0.weight",
+      "memory.belief_memory.token_proj.0.bias",
+      "memory.belief_memory.token_proj.1.weight",
+      "memory.belief_memory.token_proj.1.bias",
+      "decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.0.linear1.weight",
+      "decoder.arm_decoder.layers.0.linear1.bias",
+      "decoder.arm_decoder.layers.0.linear2.weight",
+      "decoder.arm_decoder.layers.0.linear2.bias",
+      "decoder.arm_decoder.layers.0.norm1.weight",
+      "decoder.arm_decoder.layers.0.norm1.bias",
+      "decoder.arm_decoder.layers.0.norm2.weight",
+      "decoder.arm_decoder.layers.0.norm2.bias",
+      "decoder.arm_decoder.layers.0.norm3.weight",
+      "decoder.arm_decoder.layers.0.norm3.bias",
+      "decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.1.linear1.weight",
+      "decoder.arm_decoder.layers.1.linear1.bias",
+      "decoder.arm_decoder.layers.1.linear2.weight",
+      "decoder.arm_decoder.layers.1.linear2.bias",
+      "decoder.arm_decoder.layers.1.norm1.weight",
+      "decoder.arm_decoder.layers.1.norm1.bias",
+      "decoder.arm_decoder.layers.1.norm2.weight",
+      "decoder.arm_decoder.layers.1.norm2.bias",
+      "decoder.arm_decoder.layers.1.norm3.weight",
+      "decoder.arm_decoder.layers.1.norm3.bias",
+      "decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.2.linear1.weight",
+      "decoder.arm_decoder.layers.2.linear1.bias",
+      "decoder.arm_decoder.layers.2.linear2.weight",
+      "decoder.arm_decoder.layers.2.linear2.bias",
+      "decoder.arm_decoder.layers.2.norm1.weight",
+      "decoder.arm_decoder.layers.2.norm1.bias",
+      "decoder.arm_decoder.layers.2.norm2.weight",
+      "decoder.arm_decoder.layers.2.norm2.bias",
+      "decoder.arm_decoder.layers.2.norm3.weight",
+      "decoder.arm_decoder.layers.2.norm3.bias",
+      "decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.3.linear1.weight",
+      "decoder.arm_decoder.layers.3.linear1.bias",
+      "decoder.arm_decoder.layers.3.linear2.weight",
+      "decoder.arm_decoder.layers.3.linear2.bias",
+      "decoder.arm_decoder.layers.3.norm1.weight",
+      "decoder.arm_decoder.layers.3.norm1.bias",
+      "decoder.arm_decoder.layers.3.norm2.weight",
+      "decoder.arm_decoder.layers.3.norm2.bias",
+      "decoder.arm_decoder.layers.3.norm3.weight",
+      "decoder.arm_decoder.layers.3.norm3.bias",
+      "decoder.arm_identity.weight",
+      "decoder.phase_adapter.weight",
+      "decoder.phase_adapter.bias",
+      "decoder.role_adapter.weight",
+      "decoder.role_adapter.bias",
+      "decoder.context_proj.0.weight",
+      "decoder.context_proj.0.bias",
+      "decoder.context_proj.1.weight",
+      "decoder.context_proj.1.bias",
+      "decoder.arm_head.0.weight",
+      "decoder.arm_head.0.bias",
+      "decoder.arm_head.1.weight",
+      "decoder.arm_head.1.bias",
+      "decoder.arm_mean.weight",
+      "decoder.arm_mean.bias",
+      "decoder.arm_log_std.weight",
+      "decoder.arm_log_std.bias",
+      "decoder.proposal_mode_head.0.weight",
+      "decoder.proposal_mode_head.0.bias",
+      "decoder.proposal_mode_head.1.weight",
+      "decoder.proposal_mode_head.1.bias",
+      "decoder.proposal_mode_head.3.weight",
+      "decoder.proposal_mode_head.3.bias",
+      "decoder.proposal_mode_embeddings.weight",
+      "decoder.proposal_slot_embeddings.weight",
+      "decoder.mode_residual_heads.0.0.weight",
+      "decoder.mode_residual_heads.0.0.bias",
+      "decoder.mode_residual_heads.0.1.weight",
+      "decoder.mode_residual_heads.0.1.bias",
+      "decoder.mode_residual_heads.0.3.weight",
+      "decoder.mode_residual_heads.0.3.bias",
+      "decoder.mode_residual_heads.1.0.weight",
+      "decoder.mode_residual_heads.1.0.bias",
+      "decoder.mode_residual_heads.1.1.weight",
+      "decoder.mode_residual_heads.1.1.bias",
+      "decoder.mode_residual_heads.1.3.weight",
+      "decoder.mode_residual_heads.1.3.bias",
+      "decoder.mode_residual_heads.2.0.weight",
+      "decoder.mode_residual_heads.2.0.bias",
+      "decoder.mode_residual_heads.2.1.weight",
+      "decoder.mode_residual_heads.2.1.bias",
+      "decoder.mode_residual_heads.2.3.weight",
+      "decoder.mode_residual_heads.2.3.bias",
+      "decoder.mode_residual_heads.3.0.weight",
+      "decoder.mode_residual_heads.3.0.bias",
+      "decoder.mode_residual_heads.3.1.weight",
+      "decoder.mode_residual_heads.3.1.bias",
+      "decoder.mode_residual_heads.3.3.weight",
+      "decoder.mode_residual_heads.3.3.bias",
+      "decoder.mode_residual_heads.4.0.weight",
+      "decoder.mode_residual_heads.4.0.bias",
+      "decoder.mode_residual_heads.4.1.weight",
+      "decoder.mode_residual_heads.4.1.bias",
+      "decoder.mode_residual_heads.4.3.weight",
+      "decoder.mode_residual_heads.4.3.bias",
+      "decoder.mode_residual_heads.5.0.weight",
+      "decoder.mode_residual_heads.5.0.bias",
+      "decoder.mode_residual_heads.5.1.weight",
+      "decoder.mode_residual_heads.5.1.bias",
+      "decoder.mode_residual_heads.5.3.weight",
+      "decoder.mode_residual_heads.5.3.bias",
+      "decoder.slot_delta.0.weight",
+      "decoder.slot_delta.0.bias",
+      "decoder.slot_delta.1.weight",
+      "decoder.slot_delta.1.bias",
+      "decoder.slot_delta.3.weight",
+      "decoder.slot_delta.3.bias",
+      "decoder.proposal_score.0.weight",
+      "decoder.proposal_score.0.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias",
+      "decoder.proposal_score.3.weight",
+      "decoder.proposal_score.3.bias",
+      "elastic_state_head.interaction_queries",
+      "elastic_state_head.interaction_attention.in_proj_weight",
+      "elastic_state_head.interaction_attention.in_proj_bias",
+      "elastic_state_head.interaction_attention.out_proj.weight",
+      "elastic_state_head.interaction_attention.out_proj.bias",
+      "elastic_state_head.interaction_mlp.0.weight",
+      "elastic_state_head.interaction_mlp.0.bias",
+      "elastic_state_head.interaction_mlp.1.weight",
+      "elastic_state_head.interaction_mlp.1.bias",
+      "elastic_state_head.interaction_mlp.3.weight",
+      "elastic_state_head.interaction_mlp.3.bias",
+      "elastic_state_head.decoder.field_queries",
+      "elastic_state_head.decoder.field_attention.in_proj_weight",
+      "elastic_state_head.decoder.field_attention.in_proj_bias",
+      "elastic_state_head.decoder.field_attention.out_proj.weight",
+      "elastic_state_head.decoder.field_attention.out_proj.bias",
+      "elastic_state_head.decoder.field_mlp.0.weight",
+      "elastic_state_head.decoder.field_mlp.0.bias",
+      "elastic_state_head.decoder.field_mlp.1.weight",
+      "elastic_state_head.decoder.field_mlp.1.bias",
+      "elastic_state_head.decoder.field_mlp.3.weight",
+      "elastic_state_head.decoder.field_mlp.3.bias",
+      "elastic_state_head.decoder.summary_proj.0.weight",
+      "elastic_state_head.decoder.summary_proj.0.bias",
+      "elastic_state_head.decoder.summary_proj.1.weight",
+      "elastic_state_head.decoder.summary_proj.1.bias",
+      "elastic_state_head.decoder.phase_head.0.weight",
+      "elastic_state_head.decoder.phase_head.0.bias",
+      "elastic_state_head.decoder.phase_head.1.weight",
+      "elastic_state_head.decoder.phase_head.1.bias",
+      "elastic_state_head.decoder.phase_head.3.weight",
+      "elastic_state_head.decoder.phase_head.3.bias",
+      "elastic_state_head.decoder.arm_role_head.0.weight",
+      "elastic_state_head.decoder.arm_role_head.0.bias",
+      "elastic_state_head.decoder.arm_role_head.1.weight",
+      "elastic_state_head.decoder.arm_role_head.1.bias",
+      "elastic_state_head.decoder.arm_role_head.3.weight",
+      "elastic_state_head.decoder.arm_role_head.3.bias",
+      "elastic_state_head.decoder.arm_identity.weight",
+      "elastic_state_head.decoder.support_mode.0.weight",
+      "elastic_state_head.decoder.support_mode.0.bias",
+      "elastic_state_head.decoder.support_mode.1.weight",
+      "elastic_state_head.decoder.support_mode.1.bias",
+      "elastic_state_head.decoder.support_mode.3.weight",
+      "elastic_state_head.decoder.support_mode.3.bias",
+      "elastic_state_head.decoder.access_field.weight",
+      "elastic_state_head.decoder.access_field.bias",
+      "elastic_state_head.decoder.target_belief_field.weight",
+      "elastic_state_head.decoder.target_belief_field.bias",
+      "elastic_state_head.decoder.visibility_field.weight",
+      "elastic_state_head.decoder.visibility_field.bias",
+      "elastic_state_head.decoder.clearance_field.weight",
+      "elastic_state_head.decoder.clearance_field.bias",
+      "elastic_state_head.decoder.occluder_contact_field.weight",
+      "elastic_state_head.decoder.occluder_contact_field.bias",
+      "elastic_state_head.decoder.grasp_affordance_field.weight",
+      "elastic_state_head.decoder.grasp_affordance_field.bias",
+      "elastic_state_head.decoder.support_stability_field.weight",
+      "elastic_state_head.decoder.support_stability_field.bias",
+      "elastic_state_head.decoder.persistence_field.weight",
+      "elastic_state_head.decoder.persistence_field.bias",
+      "elastic_state_head.decoder.reocclusion_field.weight",
+      "elastic_state_head.decoder.reocclusion_field.bias",
+      "elastic_state_head.decoder.disturbance_field.weight",
+      "elastic_state_head.decoder.disturbance_field.bias",
+      "elastic_state_head.decoder.uncertainty_field.weight",
+      "elastic_state_head.decoder.uncertainty_field.bias",
+      "elastic_state_head.decoder.reocclusion_head.0.weight",
+      "elastic_state_head.decoder.reocclusion_head.0.bias",
+      "elastic_state_head.decoder.reocclusion_head.1.weight",
+      "elastic_state_head.decoder.reocclusion_head.1.bias",
+      "elastic_state_head.decoder.reocclusion_head.3.weight",
+      "elastic_state_head.decoder.reocclusion_head.3.bias",
+      "world_model.state_encoder.0.weight",
+      "world_model.state_encoder.0.bias",
+      "world_model.state_encoder.1.weight",
+      "world_model.state_encoder.1.bias",
+      "world_model.scene_memory_proj.0.weight",
+      "world_model.scene_memory_proj.0.bias",
+      "world_model.scene_memory_proj.1.weight",
+      "world_model.scene_memory_proj.1.bias",
+      "world_model.belief_memory_proj.0.weight",
+      "world_model.belief_memory_proj.0.bias",
+      "world_model.belief_memory_proj.1.weight",
+      "world_model.belief_memory_proj.1.bias",
+      "world_model.action_encoder.0.weight",
+      "world_model.action_encoder.0.bias",
+      "world_model.action_encoder.1.weight",
+      "world_model.action_encoder.1.bias",
+      "world_model.transition.weight_ih",
+      "world_model.transition.weight_hh",
+      "world_model.transition.bias_ih",
+      "world_model.transition.bias_hh",
+      "world_model.scene_memory_update.weight",
+      "world_model.scene_memory_update.bias",
+      "world_model.belief_memory_update.weight",
+      "world_model.belief_memory_update.bias",
+      "world_model.compact_decoder.weight",
+      "world_model.compact_decoder.bias",
+      "world_model.target_belief_head.weight",
+      "world_model.target_belief_head.bias",
+      "world_model.visibility_head.weight",
+      "world_model.visibility_head.bias",
+      "world_model.clearance_head.weight",
+      "world_model.clearance_head.bias",
+      "world_model.occluder_contact_head.weight",
+      "world_model.occluder_contact_head.bias",
+      "world_model.grasp_affordance_head.weight",
+      "world_model.grasp_affordance_head.bias",
+      "world_model.support_stability_head.weight",
+      "world_model.support_stability_head.bias",
+      "world_model.persistence_head.weight",
+      "world_model.persistence_head.bias",
+      "world_model.reocclusion_head.weight",
+      "world_model.reocclusion_head.bias",
+      "world_model.disturbance_head.weight",
+      "world_model.disturbance_head.bias",
+      "world_model.uncertainty_head.weight",
+      "world_model.uncertainty_head.bias",
+      "world_model.access_head.weight",
+      "world_model.access_head.bias",
+      "planner.residual.trunk.0.weight",
+      "planner.residual.trunk.0.bias",
+      "planner.residual.trunk.1.weight",
+      "planner.residual.trunk.1.bias",
+      "planner.residual.trunk.3.weight",
+      "planner.residual.trunk.3.bias",
+      "planner.residual.success_head.weight",
+      "planner.residual.success_head.bias",
+      "planner.residual.risk_head.weight",
+      "planner.residual.risk_head.bias",
+      "planner.residual.residual_head.weight",
+      "planner.residual.residual_head.bias"
+    ],
+    "unexpected_keys": []
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/benchmark_full/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/benchmark_full/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..5628bc52a79f9b40c1c662dfe5cba2adb1453f63
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/benchmark_full/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.5,
+      "bag_proxy": 0.5833333333333334,
+      "cloth_proxy": 0.7083333333333334
+    },
+    "mean_success": 0.5972222222222223,
+    "visibility_integral": 31.123170379135345,
+    "corridor_availability": 0.8694257512688637,
+    "reocclusion_rate": 0.00034722222222222224,
+    "persistence_horizon_mae": 1.8432530318753104,
+    "disturbance_cost": 0.32384756999090314
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/benchmark_full/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/benchmark_full/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..b8724500f3e85627353cca30db27bb5e451a1c61
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/benchmark_full/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/checkpoint_best.pt
+- mean_success: 0.597
+- visibility_integral: 31.123
+- corridor_availability: 0.869
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 1.843
+- disturbance_cost: 0.324
+- foliage_proxy_success: 0.500
+- bag_proxy_success: 0.583
+- cloth_proxy_success: 0.708
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/config_resolved.yaml b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cfd523ac65ba14b15a6abef147c17da31bc6968c
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/config_resolved.yaml
@@ -0,0 +1,149 @@
+experiment_name: proxy_interaction_r3d_stage2_clip_seed13
+output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
+device: cuda
+seed: 13
+init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
+init_strict: false
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 224
+  dataset_version: reveal_proxy_v6_rgbd_elastic_state
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage2_seed13.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage2_seed13.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 13
+optim:
+  epochs: 4
+  batch_size: 2
+  num_workers: 4
+  lr: 0.0003
+  weight_decay: 0.0001
+trainer:
+  policy_type: elastic_reveal
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+  use_depth: false
+  use_world_model: true
+  use_role_tokens: true
+  compute_equivariance_probe: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 512
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: false
+  fusion:
+    hidden_dim: 512
+    num_cameras: 3
+    num_layers: 4
+    num_heads: 8
+    ff_dim: 2048
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 512
+    action_dim: 14
+    history_steps: 6
+    scene_history_steps: 3
+    belief_history_steps: 8
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    scene_bank_size: 2
+    belief_bank_size: 2
+    num_heads: 8
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 4
+    ff_dim: 2048
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_proposal_modes: 6
+    planner_top_k: 4
+  reveal_head:
+    hidden_dim: 512
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 8
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 512
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+    scene_bank_size: 2
+    belief_bank_size: 2
+  planner:
+    hidden_dim: 512
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 8
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+    top_k: 4
+loss_weights:
+  action: 1.0
+  phase: 0.1
+  arm_role: 0.15
+  support_mode: 0.1
+  corridor: 0.15
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.25
+  belief: 0.05
+  visibility: 0.05
+  clearance: 0.05
+  support_stability: 0.05
+  reocclusion: 0.05
+  occluder_contact: 0.05
+  grasp_affordance: 0.05
+  planner_success: 0.25
+  planner_risk: 0.1
+  planner_ranking: 0.2
+  proposal_reconstruction: 0.1
+  proposal_success: 0.15
+  proposal_ranking: 0.2
+  proposal_diversity: 0.05
+  role_swap_consistency: 0.05
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/diagnostics_full/proxy_diagnostics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/diagnostics_full/proxy_diagnostics.json
new file mode 100644
index 0000000000000000000000000000000000000000..860433c8133514d5f5b604be4e8cd55f385902ab
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/diagnostics_full/proxy_diagnostics.json
@@ -0,0 +1,16 @@
+{
+  "planner_top1_accuracy": 0.2595419847328244,
+  "planner_regret": 0.015185066498816013,
+  "planner_score_utility_spearman": 0.2809160351753235,
+  "risk_calibration_mse": 0.010697935707867146,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.019719451665878296,
+  "left_right_equivariance_error": 8.677602234070726e-05,
+  "belief_calibration_brier": 0.003582377452403307,
+  "reocclusion_calibration_brier": 0.2486726939678192,
+  "support_stability_mae": 0.027683958411216736,
+  "clearance_auc": 0.8539042374111527,
+  "memory_write_rate": 0.49614080786705017,
+  "memory_saturation": 0.3391597867012024,
+  "num_samples": 131
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/metrics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..675aff85763dd14e1f032de475fbe32a73b5e212
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/metrics.json
@@ -0,0 +1,230 @@
+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.025199293658921592,
+      "arm_role": 0.031675430975462264,
+      "belief": 0.12093473198382478,
+      "clearance": 0.09368413742631673,
+      "corridor": 0.29680905555816073,
+      "disturbance": 0.007625889547575513,
+      "grasp_affordance": 0.023363290535972307,
+      "occluder_contact": 0.21423418019947252,
+      "persistence": 8.489773372286244,
+      "phase": 0.7337813527960526,
+      "planner_ranking": 0.23520062585716675,
+      "planner_risk": 0.015000962853235635,
+      "planner_success": 0.6204052362002824,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.2412571900769285,
+      "proposal_reconstruction": 0.0672246428108529,
+      "proposal_success": 0.6834057503624966,
+      "reocclusion": 0.7059739547340493,
+      "role_swap_consistency": 0.00044641466650462364,
+      "support_mode": 0.737896728515625,
+      "support_stability": 0.16598236134863997,
+      "total": 1.936751513732107,
+      "uncertainty": 0.02631602293505227,
+      "visibility": 0.12221070531951754,
+      "world_model": 2.6260432685676376
+    },
+    "val": {
+      "action": 0.02296490968684807,
+      "arm_role": 1.1920925544472993e-06,
+      "belief": 0.10552826659245924,
+      "clearance": 0.07981697961010716,
+      "corridor": 0.24074691330844705,
+      "disturbance": 0.0019879042129173977,
+      "grasp_affordance": 0.012804760837532354,
+      "occluder_contact": 0.20304674835819186,
+      "persistence": 4.831832351106586,
+      "phase": 0.662635090676221,
+      "planner_ranking": 0.04777729516111625,
+      "planner_risk": 0.011265802354142634,
+      "planner_success": 0.5608469446500143,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1612638379588271,
+      "proposal_reconstruction": 0.06499927355484529,
+      "proposal_success": 0.6768998079227678,
+      "reocclusion": 0.692740258845416,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6566608846187592,
+      "support_stability": 0.15997966932076396,
+      "total": 1.5804176764054731,
+      "uncertainty": 0.012467421647725683,
+      "visibility": 0.09922279044985771,
+      "world_model": 2.3550273776054382
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.021963300938276867,
+      "arm_role": 6.080301184403269e-06,
+      "belief": 0.10263110273762753,
+      "clearance": 0.0788226080960349,
+      "corridor": 0.2412219915735094,
+      "disturbance": 0.002794332566535511,
+      "grasp_affordance": 0.009757642472456944,
+      "occluder_contact": 0.195604843920783,
+      "persistence": 4.26262659869696,
+      "phase": 0.6962530838815789,
+      "planner_ranking": 0.051491495151506236,
+      "planner_risk": 0.011504811691855521,
+      "planner_success": 0.5311845611584814,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1484619347672713,
+      "proposal_reconstruction": 0.06422912276497013,
+      "proposal_success": 0.6744128418596167,
+      "reocclusion": 0.7004858849864257,
+      "role_swap_consistency": 0.00023457101549291494,
+      "support_mode": 0.6761667351973685,
+      "support_stability": 0.1433959776927766,
+      "total": 1.493165501795317,
+      "uncertainty": 0.005074691738149053,
+      "visibility": 0.095918686060529,
+      "world_model": 2.146357953234723
+    },
+    "val": {
+      "action": 0.023318854075941173,
+      "arm_role": 1.2460903566203672e-05,
+      "belief": 0.09727730161764404,
+      "clearance": 0.07534228863589691,
+      "corridor": 0.23771371602108984,
+      "disturbance": 0.001875049049582837,
+      "grasp_affordance": 0.008910867576064034,
+      "occluder_contact": 0.1906791471622207,
+      "persistence": 3.784950184099602,
+      "phase": 0.6902159127322111,
+      "planner_ranking": 0.039764305716744275,
+      "planner_risk": 0.011417482169539047,
+      "planner_success": 0.5057139098644257,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.147210995356242,
+      "proposal_reconstruction": 0.06565308503129265,
+      "proposal_success": 0.6812662798346896,
+      "reocclusion": 0.6869303502819755,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6485618005196253,
+      "support_stability": 0.14766556989740243,
+      "total": 1.4424566698796821,
+      "uncertainty": 0.00217234116809612,
+      "visibility": 0.08674176816235889,
+      "world_model": 2.087360879688552
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.021162556783345186,
+      "arm_role": 3.887634528310675e-05,
+      "belief": 0.12156015297299937,
+      "clearance": 0.08380865936020487,
+      "corridor": 0.24415273534893794,
+      "disturbance": 0.00342957377352401,
+      "grasp_affordance": 0.009738953835575988,
+      "occluder_contact": 0.2044433526302639,
+      "persistence": 2.334686749353373,
+      "phase": 0.4850014937551398,
+      "planner_ranking": 0.040600373610121955,
+      "planner_risk": 0.011128092848996043,
+      "planner_success": 0.5146951448760535,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1424914592190794,
+      "proposal_reconstruction": 0.06354390727846246,
+      "proposal_success": 0.6729260523068278,
+      "reocclusion": 0.45681651623331404,
+      "role_swap_consistency": 0.000693912741476915,
+      "support_mode": 0.3760432626071729,
+      "support_stability": 0.15264682037461746,
+      "total": 1.3047154269720378,
+      "uncertainty": 0.0018502298858421502,
+      "visibility": 0.10071343036466524,
+      "world_model": 2.0547038796700927
+    },
+    "val": {
+      "action": 0.021024575605141847,
+      "arm_role": 8.373512278494948e-06,
+      "belief": 0.14957294635700458,
+      "clearance": 0.07959625695013639,
+      "corridor": 0.23735206732244202,
+      "disturbance": 0.004530226309725549,
+      "grasp_affordance": 0.009394604938499855,
+      "occluder_contact": 0.20181630529237515,
+      "persistence": 1.9288715395060452,
+      "phase": 0.4471131846252264,
+      "planner_ranking": 0.032947048032920895,
+      "planner_risk": 0.010839967758246612,
+      "planner_success": 0.5091258653185584,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.133656098987117,
+      "proposal_reconstruction": 0.06369356336918744,
+      "proposal_success": 0.6579223636424902,
+      "reocclusion": 0.3941904430588086,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.22626511123257154,
+      "support_stability": 0.14589737135578285,
+      "total": 1.2399074046900778,
+      "uncertainty": 0.0006092997625246151,
+      "visibility": 0.0903791573011514,
+      "world_model": 1.9959143472440315
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.01603315635706837,
+      "arm_role": 0.002382638893629375,
+      "belief": 0.10478096155351714,
+      "clearance": 0.07576708702468558,
+      "corridor": 0.21942420092742204,
+      "disturbance": 0.0019073896570166414,
+      "grasp_affordance": 0.008396455439689912,
+      "occluder_contact": 0.1960402814965499,
+      "persistence": 1.2186123260438904,
+      "phase": 0.358337392305073,
+      "planner_ranking": 0.03375569848982483,
+      "planner_risk": 0.010724377139826845,
+      "planner_success": 0.4898807811893915,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1367171218520717,
+      "proposal_reconstruction": 0.05945674386855803,
+      "proposal_success": 0.6522450180430162,
+      "reocclusion": 0.28051841486564005,
+      "role_swap_consistency": 0.0010344118927222506,
+      "support_mode": 0.26600602300543535,
+      "support_stability": 0.13557514025780715,
+      "total": 1.1690542249303115,
+      "uncertainty": 0.0006959539541825458,
+      "visibility": 0.09422595846025568,
+      "world_model": 1.9601847686265643
+    },
+    "val": {
+      "action": 0.015446776827571519,
+      "arm_role": 9.393832596701313e-05,
+      "belief": 0.10828393223610791,
+      "clearance": 0.0738553563979539,
+      "corridor": 0.20814461167901754,
+      "disturbance": 0.0014511280261296743,
+      "grasp_affordance": 0.007996377563386253,
+      "occluder_contact": 0.1979678033879309,
+      "persistence": 0.8299788037935892,
+      "phase": 0.2583448259053209,
+      "planner_ranking": 0.03061466764219486,
+      "planner_risk": 0.011345374417336037,
+      "planner_success": 0.47457649852290296,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1192938667355161,
+      "proposal_reconstruction": 0.05910300570681239,
+      "proposal_success": 0.6505675640973178,
+      "reocclusion": 0.3073428579126344,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.23859061848347526,
+      "support_stability": 0.14255593956984353,
+      "total": 1.1266983756513307,
+      "uncertainty": 0.000752164227874759,
+      "visibility": 0.08751969581300562,
+      "world_model": 1.956845378333872
+    }
+  }
+]
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/summary.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..033fbd778155b678f8687fe7356a3aa5cb917fa3
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/summary.json
@@ -0,0 +1,557 @@
+{
+  "experiment_name": "proxy_interaction_r3d_stage2_clip_seed13",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/checkpoint_best.pt",
+  "final_train_total": 1.1690542249303115,
+  "final_val_total": 1.1266983756513307,
+  "train_time_sec": 147.0101616382599,
+  "peak_gpu_memory_mb": 1895.4541015625,
+  "num_train_samples": 380,
+  "num_val_samples": 131,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": {
+    "path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
+    "loaded_keys": 461,
+    "skipped_shape_mismatch_keys": [
+      "memory.gru.weight_ih_l0",
+      "memory.gru.weight_hh_l0",
+      "memory.gru.bias_ih_l0",
+      "memory.gru.bias_hh_l0",
+      "memory.token_proj.0.weight",
+      "memory.token_proj.0.bias",
+      "memory.token_proj.1.weight",
+      "memory.token_proj.1.bias",
+      "decoder.actor_role_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.linear1.weight",
+      "decoder.revealer_decoder.layers.0.linear1.bias",
+      "decoder.revealer_decoder.layers.0.linear2.weight",
+      "decoder.revealer_decoder.layers.0.linear2.bias",
+      "decoder.revealer_decoder.layers.0.norm1.weight",
+      "decoder.revealer_decoder.layers.0.norm1.bias",
+      "decoder.revealer_decoder.layers.0.norm2.weight",
+      "decoder.revealer_decoder.layers.0.norm2.bias",
+      "decoder.revealer_decoder.layers.0.norm3.weight",
+      "decoder.revealer_decoder.layers.0.norm3.bias",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.linear1.weight",
+      "decoder.revealer_decoder.layers.1.linear1.bias",
+      "decoder.revealer_decoder.layers.1.linear2.weight",
+      "decoder.revealer_decoder.layers.1.linear2.bias",
+      "decoder.revealer_decoder.layers.1.norm1.weight",
+      "decoder.revealer_decoder.layers.1.norm1.bias",
+      "decoder.revealer_decoder.layers.1.norm2.weight",
+      "decoder.revealer_decoder.layers.1.norm2.bias",
+      "decoder.revealer_decoder.layers.1.norm3.weight",
+      "decoder.revealer_decoder.layers.1.norm3.bias",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.linear1.weight",
+      "decoder.revealer_decoder.layers.2.linear1.bias",
+      "decoder.revealer_decoder.layers.2.linear2.weight",
+      "decoder.revealer_decoder.layers.2.linear2.bias",
+      "decoder.revealer_decoder.layers.2.norm1.weight",
+      "decoder.revealer_decoder.layers.2.norm1.bias",
+      "decoder.revealer_decoder.layers.2.norm2.weight",
+      "decoder.revealer_decoder.layers.2.norm2.bias",
+      "decoder.revealer_decoder.layers.2.norm3.weight",
+      "decoder.revealer_decoder.layers.2.norm3.bias",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.linear1.weight",
+      "decoder.revealer_decoder.layers.3.linear1.bias",
+      "decoder.revealer_decoder.layers.3.linear2.weight",
+      "decoder.revealer_decoder.layers.3.linear2.bias",
+      "decoder.revealer_decoder.layers.3.norm1.weight",
+      "decoder.revealer_decoder.layers.3.norm1.bias",
+      "decoder.revealer_decoder.layers.3.norm2.weight",
+      "decoder.revealer_decoder.layers.3.norm2.bias",
+      "decoder.revealer_decoder.layers.3.norm3.weight",
+      "decoder.revealer_decoder.layers.3.norm3.bias",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.linear1.weight",
+      "decoder.actor_decoder.layers.0.linear1.bias",
+      "decoder.actor_decoder.layers.0.linear2.weight",
+      "decoder.actor_decoder.layers.0.linear2.bias",
+      "decoder.actor_decoder.layers.0.norm1.weight",
+      "decoder.actor_decoder.layers.0.norm1.bias",
+      "decoder.actor_decoder.layers.0.norm2.weight",
+      "decoder.actor_decoder.layers.0.norm2.bias",
+      "decoder.actor_decoder.layers.0.norm3.weight",
+      "decoder.actor_decoder.layers.0.norm3.bias",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.linear1.weight",
+      "decoder.actor_decoder.layers.1.linear1.bias",
+      "decoder.actor_decoder.layers.1.linear2.weight",
+      "decoder.actor_decoder.layers.1.linear2.bias",
+      "decoder.actor_decoder.layers.1.norm1.weight",
+      "decoder.actor_decoder.layers.1.norm1.bias",
+      "decoder.actor_decoder.layers.1.norm2.weight",
+      "decoder.actor_decoder.layers.1.norm2.bias",
+      "decoder.actor_decoder.layers.1.norm3.weight",
+      "decoder.actor_decoder.layers.1.norm3.bias",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.linear1.weight",
+      "decoder.actor_decoder.layers.2.linear1.bias",
+      "decoder.actor_decoder.layers.2.linear2.weight",
+      "decoder.actor_decoder.layers.2.linear2.bias",
+      "decoder.actor_decoder.layers.2.norm1.weight",
+      "decoder.actor_decoder.layers.2.norm1.bias",
+      "decoder.actor_decoder.layers.2.norm2.weight",
+      "decoder.actor_decoder.layers.2.norm2.bias",
+      "decoder.actor_decoder.layers.2.norm3.weight",
+      "decoder.actor_decoder.layers.2.norm3.bias",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.linear1.weight",
+      "decoder.actor_decoder.layers.3.linear1.bias",
+      "decoder.actor_decoder.layers.3.linear2.weight",
+      "decoder.actor_decoder.layers.3.linear2.bias",
+      "decoder.actor_decoder.layers.3.norm1.weight",
+      "decoder.actor_decoder.layers.3.norm1.bias",
+      "decoder.actor_decoder.layers.3.norm2.weight",
+      "decoder.actor_decoder.layers.3.norm2.bias",
+      "decoder.actor_decoder.layers.3.norm3.weight",
+      "decoder.actor_decoder.layers.3.norm3.bias",
+      "decoder.revealer_mean.weight",
+      "decoder.revealer_mean.bias",
+      "decoder.revealer_log_std.weight",
+      "decoder.revealer_log_std.bias",
+      "decoder.actor_mean.weight",
+      "decoder.actor_mean.bias",
+      "decoder.actor_log_std.weight",
+      "decoder.actor_log_std.bias",
+      "decoder.proposal_score.0.weight",
+      "decoder.proposal_score.0.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias"
+    ],
+    "missing_keys": [
+      "backbone.depth_adapter.depth_proj.0.weight",
+      "backbone.depth_adapter.depth_proj.0.bias",
+      "backbone.depth_adapter.depth_proj.1.weight",
+      "backbone.depth_adapter.depth_proj.1.bias",
+      "backbone.depth_adapter.depth_proj.3.weight",
+      "backbone.depth_adapter.depth_proj.3.bias",
+      "backbone.depth_adapter.geometry_proj.0.weight",
+      "backbone.depth_adapter.geometry_proj.0.bias",
+      "backbone.depth_adapter.geometry_proj.1.weight",
+      "backbone.depth_adapter.geometry_proj.1.bias",
+      "backbone.depth_adapter.camera_proj.0.weight",
+      "backbone.depth_adapter.camera_proj.0.bias",
+      "backbone.depth_adapter.camera_proj.1.weight",
+      "backbone.depth_adapter.camera_proj.1.bias",
+      "fusion.geometry_fusion.attn.in_proj_weight",
+      "fusion.geometry_fusion.attn.in_proj_bias",
+      "fusion.geometry_fusion.attn.out_proj.weight",
+      "fusion.geometry_fusion.attn.out_proj.bias",
+      "fusion.geometry_fusion.gate.0.weight",
+      "fusion.geometry_fusion.gate.0.bias",
+      "fusion.geometry_fusion.gate.1.weight",
+      "fusion.geometry_fusion.gate.1.bias",
+      "fusion.geometry_fusion.gate.3.weight",
+      "fusion.geometry_fusion.gate.3.bias",
+      "fusion.geometry_fusion.out.0.weight",
+      "fusion.geometry_fusion.out.0.bias",
+      "fusion.geometry_fusion.out.1.weight",
+      "fusion.geometry_fusion.out.1.bias",
+      "memory.scene_memory.position_embedding",
+      "memory.scene_memory.bank_queries",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.scene_memory.bank_attention.in_proj_weight",
+      "memory.scene_memory.bank_attention.in_proj_bias",
+      "memory.scene_memory.bank_attention.out_proj.weight",
+      "memory.scene_memory.bank_attention.out_proj.bias",
+      "memory.scene_memory.action_proj.0.weight",
+      "memory.scene_memory.action_proj.0.bias",
+      "memory.scene_memory.action_proj.1.weight",
+      "memory.scene_memory.action_proj.1.bias",
+      "memory.scene_memory.write_gate.0.weight",
+      "memory.scene_memory.write_gate.0.bias",
+      "memory.scene_memory.write_gate.1.weight",
+      "memory.scene_memory.write_gate.1.bias",
+      "memory.scene_memory.write_gate.3.weight",
+      "memory.scene_memory.write_gate.3.bias",
+      "memory.scene_memory.token_proj.0.weight",
+      "memory.scene_memory.token_proj.0.bias",
+      "memory.scene_memory.token_proj.1.weight",
+      "memory.scene_memory.token_proj.1.bias",
+      "memory.belief_memory.position_embedding",
+      "memory.belief_memory.bank_queries",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.belief_memory.bank_attention.in_proj_weight",
+      "memory.belief_memory.bank_attention.in_proj_bias",
+      "memory.belief_memory.bank_attention.out_proj.weight",
+      "memory.belief_memory.bank_attention.out_proj.bias",
+      "memory.belief_memory.action_proj.0.weight",
+      "memory.belief_memory.action_proj.0.bias",
+      "memory.belief_memory.action_proj.1.weight",
+      "memory.belief_memory.action_proj.1.bias",
+      "memory.belief_memory.write_gate.0.weight",
+      "memory.belief_memory.write_gate.0.bias",
+      "memory.belief_memory.write_gate.1.weight",
+      "memory.belief_memory.write_gate.1.bias",
+      "memory.belief_memory.write_gate.3.weight",
+      "memory.belief_memory.write_gate.3.bias",
+      "memory.belief_memory.token_proj.0.weight",
+      "memory.belief_memory.token_proj.0.bias",
+      "memory.belief_memory.token_proj.1.weight",
+      "memory.belief_memory.token_proj.1.bias",
+      "decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.0.linear1.weight",
+      "decoder.arm_decoder.layers.0.linear1.bias",
+      "decoder.arm_decoder.layers.0.linear2.weight",
+      "decoder.arm_decoder.layers.0.linear2.bias",
+      "decoder.arm_decoder.layers.0.norm1.weight",
+      "decoder.arm_decoder.layers.0.norm1.bias",
+      "decoder.arm_decoder.layers.0.norm2.weight",
+      "decoder.arm_decoder.layers.0.norm2.bias",
+      "decoder.arm_decoder.layers.0.norm3.weight",
+      "decoder.arm_decoder.layers.0.norm3.bias",
+      "decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.1.linear1.weight",
+      "decoder.arm_decoder.layers.1.linear1.bias",
+      "decoder.arm_decoder.layers.1.linear2.weight",
+      "decoder.arm_decoder.layers.1.linear2.bias",
+      "decoder.arm_decoder.layers.1.norm1.weight",
+      "decoder.arm_decoder.layers.1.norm1.bias",
+      "decoder.arm_decoder.layers.1.norm2.weight",
+      "decoder.arm_decoder.layers.1.norm2.bias",
+      "decoder.arm_decoder.layers.1.norm3.weight",
+      "decoder.arm_decoder.layers.1.norm3.bias",
+      "decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.2.linear1.weight",
+      "decoder.arm_decoder.layers.2.linear1.bias",
+      "decoder.arm_decoder.layers.2.linear2.weight",
+      "decoder.arm_decoder.layers.2.linear2.bias",
+      "decoder.arm_decoder.layers.2.norm1.weight",
+      "decoder.arm_decoder.layers.2.norm1.bias",
+      "decoder.arm_decoder.layers.2.norm2.weight",
+      "decoder.arm_decoder.layers.2.norm2.bias",
+      "decoder.arm_decoder.layers.2.norm3.weight",
+      "decoder.arm_decoder.layers.2.norm3.bias",
+      "decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.3.linear1.weight",
+      "decoder.arm_decoder.layers.3.linear1.bias",
+      "decoder.arm_decoder.layers.3.linear2.weight",
+      "decoder.arm_decoder.layers.3.linear2.bias",
+      "decoder.arm_decoder.layers.3.norm1.weight",
+      "decoder.arm_decoder.layers.3.norm1.bias",
+      "decoder.arm_decoder.layers.3.norm2.weight",
+      "decoder.arm_decoder.layers.3.norm2.bias",
+      "decoder.arm_decoder.layers.3.norm3.weight",
+      "decoder.arm_decoder.layers.3.norm3.bias",
+      "decoder.arm_identity.weight",
+      "decoder.phase_adapter.weight",
+      "decoder.phase_adapter.bias",
+      "decoder.role_adapter.weight",
+      "decoder.role_adapter.bias",
+      "decoder.context_proj.0.weight",
+      "decoder.context_proj.0.bias",
+      "decoder.context_proj.1.weight",
+      "decoder.context_proj.1.bias",
+      "decoder.arm_head.0.weight",
+      "decoder.arm_head.0.bias",
+      "decoder.arm_head.1.weight",
+      "decoder.arm_head.1.bias",
+      "decoder.arm_mean.weight",
+      "decoder.arm_mean.bias",
+      "decoder.arm_log_std.weight",
+      "decoder.arm_log_std.bias",
+      "decoder.proposal_mode_head.0.weight",
+      "decoder.proposal_mode_head.0.bias",
+      "decoder.proposal_mode_head.1.weight",
+      "decoder.proposal_mode_head.1.bias",
+      "decoder.proposal_mode_head.3.weight",
+      "decoder.proposal_mode_head.3.bias",
+      "decoder.proposal_mode_embeddings.weight",
+      "decoder.proposal_slot_embeddings.weight",
+      "decoder.mode_residual_heads.0.0.weight",
+      "decoder.mode_residual_heads.0.0.bias",
+      "decoder.mode_residual_heads.0.1.weight",
+      "decoder.mode_residual_heads.0.1.bias",
+      "decoder.mode_residual_heads.0.3.weight",
+      "decoder.mode_residual_heads.0.3.bias",
+      "decoder.mode_residual_heads.1.0.weight",
+      "decoder.mode_residual_heads.1.0.bias",
+      "decoder.mode_residual_heads.1.1.weight",
+      "decoder.mode_residual_heads.1.1.bias",
+      "decoder.mode_residual_heads.1.3.weight",
+      "decoder.mode_residual_heads.1.3.bias",
+      "decoder.mode_residual_heads.2.0.weight",
+      "decoder.mode_residual_heads.2.0.bias",
+      "decoder.mode_residual_heads.2.1.weight",
+      "decoder.mode_residual_heads.2.1.bias",
+      "decoder.mode_residual_heads.2.3.weight",
+      "decoder.mode_residual_heads.2.3.bias",
+      "decoder.mode_residual_heads.3.0.weight",
+      "decoder.mode_residual_heads.3.0.bias",
+      "decoder.mode_residual_heads.3.1.weight",
+      "decoder.mode_residual_heads.3.1.bias",
+      "decoder.mode_residual_heads.3.3.weight",
+      "decoder.mode_residual_heads.3.3.bias",
+      "decoder.mode_residual_heads.4.0.weight",
+      "decoder.mode_residual_heads.4.0.bias",
+      "decoder.mode_residual_heads.4.1.weight",
+      "decoder.mode_residual_heads.4.1.bias",
+      "decoder.mode_residual_heads.4.3.weight",
+      "decoder.mode_residual_heads.4.3.bias",
+      "decoder.mode_residual_heads.5.0.weight",
+      "decoder.mode_residual_heads.5.0.bias",
+      "decoder.mode_residual_heads.5.1.weight",
+      "decoder.mode_residual_heads.5.1.bias",
+      "decoder.mode_residual_heads.5.3.weight",
+      "decoder.mode_residual_heads.5.3.bias",
+      "decoder.slot_delta.0.weight",
+      "decoder.slot_delta.0.bias",
+      "decoder.slot_delta.1.weight",
+      "decoder.slot_delta.1.bias",
+      "decoder.slot_delta.3.weight",
+      "decoder.slot_delta.3.bias",
+      "decoder.proposal_score.0.weight",
+      "decoder.proposal_score.0.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias",
+      "decoder.proposal_score.3.weight",
+      "decoder.proposal_score.3.bias",
+      "elastic_state_head.interaction_queries",
+      "elastic_state_head.interaction_attention.in_proj_weight",
+      "elastic_state_head.interaction_attention.in_proj_bias",
+      "elastic_state_head.interaction_attention.out_proj.weight",
+      "elastic_state_head.interaction_attention.out_proj.bias",
+      "elastic_state_head.interaction_mlp.0.weight",
+      "elastic_state_head.interaction_mlp.0.bias",
+      "elastic_state_head.interaction_mlp.1.weight",
+      "elastic_state_head.interaction_mlp.1.bias",
+      "elastic_state_head.interaction_mlp.3.weight",
+      "elastic_state_head.interaction_mlp.3.bias",
+      "elastic_state_head.decoder.field_queries",
+      "elastic_state_head.decoder.field_attention.in_proj_weight",
+      "elastic_state_head.decoder.field_attention.in_proj_bias",
+      "elastic_state_head.decoder.field_attention.out_proj.weight",
+      "elastic_state_head.decoder.field_attention.out_proj.bias",
+      "elastic_state_head.decoder.field_mlp.0.weight",
+      "elastic_state_head.decoder.field_mlp.0.bias",
+      "elastic_state_head.decoder.field_mlp.1.weight",
+      "elastic_state_head.decoder.field_mlp.1.bias",
+      "elastic_state_head.decoder.field_mlp.3.weight",
+      "elastic_state_head.decoder.field_mlp.3.bias",
+      "elastic_state_head.decoder.summary_proj.0.weight",
+      "elastic_state_head.decoder.summary_proj.0.bias",
+      "elastic_state_head.decoder.summary_proj.1.weight",
+      "elastic_state_head.decoder.summary_proj.1.bias",
+      "elastic_state_head.decoder.phase_head.0.weight",
+      "elastic_state_head.decoder.phase_head.0.bias",
+      "elastic_state_head.decoder.phase_head.1.weight",
+      "elastic_state_head.decoder.phase_head.1.bias",
+      "elastic_state_head.decoder.phase_head.3.weight",
+      "elastic_state_head.decoder.phase_head.3.bias",
+      "elastic_state_head.decoder.arm_role_head.0.weight",
+      "elastic_state_head.decoder.arm_role_head.0.bias",
+      "elastic_state_head.decoder.arm_role_head.1.weight",
+      "elastic_state_head.decoder.arm_role_head.1.bias",
+      "elastic_state_head.decoder.arm_role_head.3.weight",
+      "elastic_state_head.decoder.arm_role_head.3.bias",
+      "elastic_state_head.decoder.arm_identity.weight",
+      "elastic_state_head.decoder.support_mode.0.weight",
+      "elastic_state_head.decoder.support_mode.0.bias",
+      "elastic_state_head.decoder.support_mode.1.weight",
+      "elastic_state_head.decoder.support_mode.1.bias",
+      "elastic_state_head.decoder.support_mode.3.weight",
+      "elastic_state_head.decoder.support_mode.3.bias",
+      "elastic_state_head.decoder.access_field.weight",
+      "elastic_state_head.decoder.access_field.bias",
+      "elastic_state_head.decoder.target_belief_field.weight",
+      "elastic_state_head.decoder.target_belief_field.bias",
+      "elastic_state_head.decoder.visibility_field.weight",
+      "elastic_state_head.decoder.visibility_field.bias",
+      "elastic_state_head.decoder.clearance_field.weight",
+      "elastic_state_head.decoder.clearance_field.bias",
+      "elastic_state_head.decoder.occluder_contact_field.weight",
+      "elastic_state_head.decoder.occluder_contact_field.bias",
+      "elastic_state_head.decoder.grasp_affordance_field.weight",
+      "elastic_state_head.decoder.grasp_affordance_field.bias",
+      "elastic_state_head.decoder.support_stability_field.weight",
+      "elastic_state_head.decoder.support_stability_field.bias",
+      "elastic_state_head.decoder.persistence_field.weight",
+      "elastic_state_head.decoder.persistence_field.bias",
+      "elastic_state_head.decoder.reocclusion_field.weight",
+      "elastic_state_head.decoder.reocclusion_field.bias",
+      "elastic_state_head.decoder.disturbance_field.weight",
+      "elastic_state_head.decoder.disturbance_field.bias",
+      "elastic_state_head.decoder.uncertainty_field.weight",
+      "elastic_state_head.decoder.uncertainty_field.bias",
+      "elastic_state_head.decoder.reocclusion_head.0.weight",
+      "elastic_state_head.decoder.reocclusion_head.0.bias",
+      "elastic_state_head.decoder.reocclusion_head.1.weight",
+      "elastic_state_head.decoder.reocclusion_head.1.bias",
+      "elastic_state_head.decoder.reocclusion_head.3.weight",
+      "elastic_state_head.decoder.reocclusion_head.3.bias",
+      "world_model.state_encoder.0.weight",
+      "world_model.state_encoder.0.bias",
+      "world_model.state_encoder.1.weight",
+      "world_model.state_encoder.1.bias",
+      "world_model.scene_memory_proj.0.weight",
+      "world_model.scene_memory_proj.0.bias",
+      "world_model.scene_memory_proj.1.weight",
+      "world_model.scene_memory_proj.1.bias",
+      "world_model.belief_memory_proj.0.weight",
+      "world_model.belief_memory_proj.0.bias",
+      "world_model.belief_memory_proj.1.weight",
+      "world_model.belief_memory_proj.1.bias",
+      "world_model.action_encoder.0.weight",
+      "world_model.action_encoder.0.bias",
+      "world_model.action_encoder.1.weight",
+      "world_model.action_encoder.1.bias",
+      "world_model.transition.weight_ih",
+      "world_model.transition.weight_hh",
+      "world_model.transition.bias_ih",
+      "world_model.transition.bias_hh",
+      "world_model.scene_memory_update.weight",
+      "world_model.scene_memory_update.bias",
+      "world_model.belief_memory_update.weight",
+      "world_model.belief_memory_update.bias",
+      "world_model.compact_decoder.weight",
+      "world_model.compact_decoder.bias",
+      "world_model.target_belief_head.weight",
+      "world_model.target_belief_head.bias",
+      "world_model.visibility_head.weight",
+      "world_model.visibility_head.bias",
+      "world_model.clearance_head.weight",
+      "world_model.clearance_head.bias",
+      "world_model.occluder_contact_head.weight",
+      "world_model.occluder_contact_head.bias",
+      "world_model.grasp_affordance_head.weight",
+      "world_model.grasp_affordance_head.bias",
+      "world_model.support_stability_head.weight",
+      "world_model.support_stability_head.bias",
+      "world_model.persistence_head.weight",
+      "world_model.persistence_head.bias",
+      "world_model.reocclusion_head.weight",
+      "world_model.reocclusion_head.bias",
+      "world_model.disturbance_head.weight",
+      "world_model.disturbance_head.bias",
+      "world_model.uncertainty_head.weight",
+      "world_model.uncertainty_head.bias",
+      "world_model.access_head.weight",
+      "world_model.access_head.bias",
+      "planner.residual.trunk.0.weight",
+      "planner.residual.trunk.0.bias",
+      "planner.residual.trunk.1.weight",
+      "planner.residual.trunk.1.bias",
+      "planner.residual.trunk.3.weight",
+      "planner.residual.trunk.3.bias",
+      "planner.residual.success_head.weight",
+      "planner.residual.success_head.bias",
+      "planner.residual.risk_head.weight",
+      "planner.residual.risk_head.bias",
+      "planner.residual.residual_head.weight",
+      "planner.residual.residual_head.bias"
+    ],
+    "unexpected_keys": []
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_full/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_full/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..6fd5848efcbbe39e82be21e008516f85685c2ba6
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_full/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5416666666666666,
+    "visibility_integral": 34.41302740573883,
+    "corridor_availability": 0.8933400412400564,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.6405131230011083,
+    "disturbance_cost": 0.3704787661942343
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_full/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_full/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..0a676fcb3a3d8a67b8789c72105d6e6d3e761125
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_full/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/checkpoint_best.pt
+- mean_success: 0.542
+- visibility_integral: 34.413
+- corridor_availability: 0.893
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.641
+- disturbance_cost: 0.370
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.667
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_no_world_model/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_no_world_model/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..5e0acfb62535ce075397944fd468896a21789891
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_no_world_model/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5416666666666666,
+    "visibility_integral": 34.65096331967248,
+    "corridor_availability": 0.8933400412400564,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.6348470987268464,
+    "disturbance_cost": 0.36164701517878306
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_no_world_model/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_no_world_model/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..194c5e573e9be06fc7e72df3878f9f88af9f88e2
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_no_world_model/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/checkpoint_best.pt
+- mean_success: 0.542
+- visibility_integral: 34.651
+- corridor_availability: 0.893
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.635
+- disturbance_cost: 0.362
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.667
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_short_history/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_short_history/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..d51664ca4637941a7beec387972ca255e3902cd8
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_short_history/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5416666666666666,
+    "visibility_integral": 34.41317194037967,
+    "corridor_availability": 0.8933400412400564,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.639803415858654,
+    "disturbance_cost": 0.37048843161513406
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_short_history/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_short_history/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..516e5d7183a1aa9ef6ad58011f3a59dcdac495e2
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_short_history/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/checkpoint_best.pt
+- mean_success: 0.542
+- visibility_integral: 34.413
+- corridor_availability: 0.893
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.640
+- disturbance_cost: 0.370
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.667
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/config_resolved.yaml b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a696d5c3f8c9d9af76f2eebe257f06c3312751b3
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/config_resolved.yaml
@@ -0,0 +1,148 @@
+experiment_name: proxy_interaction_r3d_stage2_dummy_seed21
+output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
+device: cuda
+seed: 21
+defaults: []
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 96
+  dataset_version: reveal_proxy_v6_rgbd_elastic_state
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage2_dummy_seed21.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage2_dummy_seed21.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 21
+optim:
+  epochs: 4
+  batch_size: 16
+  num_workers: 4
+  lr: 0.001
+  weight_decay: 0.0001
+trainer:
+  policy_type: elastic_reveal
+  use_bf16: false
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+  use_depth: false
+  use_world_model: true
+  use_role_tokens: true
+  compute_equivariance_probe: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 192
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 192
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 384
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 192
+    action_dim: 14
+    history_steps: 6
+    scene_history_steps: 3
+    belief_history_steps: 8
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    scene_bank_size: 2
+    belief_bank_size: 2
+    num_heads: 4
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 192
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 384
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_proposal_modes: 6
+    planner_top_k: 4
+  reveal_head:
+    hidden_dim: 192
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 4
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 192
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 4
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+    scene_bank_size: 2
+    belief_bank_size: 2
+  planner:
+    hidden_dim: 192
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 4
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+    top_k: 4
+loss_weights:
+  action: 1.0
+  phase: 0.15
+  arm_role: 0.2
+  support_mode: 0.15
+  corridor: 0.2
+  persistence: 0.1
+  disturbance: 0.1
+  world_model: 0.3
+  belief: 0.05
+  visibility: 0.05
+  clearance: 0.05
+  support_stability: 0.05
+  reocclusion: 0.05
+  occluder_contact: 0.05
+  grasp_affordance: 0.05
+  planner_success: 0.2
+  planner_risk: 0.1
+  planner_ranking: 0.1
+  proposal_reconstruction: 0.2
+  proposal_success: 0.1
+  proposal_ranking: 0.1
+  proposal_diversity: 0.05
+  role_swap_consistency: 0.05
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/diagnostics_full/proxy_diagnostics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/diagnostics_full/proxy_diagnostics.json
new file mode 100644
index 0000000000000000000000000000000000000000..a05ce1c9f2fb8448f6fad1142589b68c4b059afb
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/diagnostics_full/proxy_diagnostics.json
@@ -0,0 +1,16 @@
+{
+  "planner_top1_accuracy": 0.3383458646616541,
+  "planner_regret": 0.020659049972891808,
+  "planner_score_utility_spearman": 0.2586466372013092,
+  "risk_calibration_mse": 0.011588108725845814,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.026253661140799522,
+  "left_right_equivariance_error": 0.007271398872356205,
+  "belief_calibration_brier": 0.004160370212048292,
+  "reocclusion_calibration_brier": 0.2820528745651245,
+  "support_stability_mae": 0.030557002872228622,
+  "clearance_auc": 0.9069614725933284,
+  "memory_write_rate": 0.0,
+  "memory_saturation": 0.6733582615852356,
+  "num_samples": 133
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/metrics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..8e65e21653831dd022ca59e262a61e8ed02b9091
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/metrics.json
@@ -0,0 +1,230 @@
+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.029067292887096603,
+      "arm_role": 0.2128272018841623,
+      "belief": 0.23175121502329907,
+      "clearance": 0.1794816708813111,
+      "corridor": 0.2991743894914786,
+      "disturbance": 0.014563722050903985,
+      "grasp_affordance": 0.11285659003381927,
+      "occluder_contact": 0.2981356270611286,
+      "persistence": 5.024227797985077,
+      "phase": 0.736465490112702,
+      "planner_ranking": 0.7001801505684853,
+      "planner_risk": 0.029345064676211525,
+      "planner_success": 0.6331901401281357,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.2601740161577861,
+      "proposal_reconstruction": 0.07138338964432478,
+      "proposal_success": 0.6750835478305817,
+      "reocclusion": 0.6895125756661097,
+      "role_swap_consistency": 0.0008129292400553823,
+      "support_mode": 0.7214357455571493,
+      "support_stability": 0.18610862642526627,
+      "total": 2.5757969667514167,
+      "uncertainty": 0.16812690005948147,
+      "visibility": 0.17425233901788792,
+      "world_model": 4.0634838839372
+    },
+    "val": {
+      "action": 0.02386013480524222,
+      "arm_role": 0.0004076675427818878,
+      "belief": 0.1069209881954723,
+      "clearance": 0.08219879203372532,
+      "corridor": 0.2415692475106981,
+      "disturbance": 0.0030337396116616824,
+      "grasp_affordance": 0.01165291853249073,
+      "occluder_contact": 0.22314749823676217,
+      "persistence": 4.605164660347833,
+      "phase": 0.8142086532380846,
+      "planner_ranking": 0.5563494629330106,
+      "planner_risk": 0.011604948745419582,
+      "planner_success": 0.6387051675054762,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.2504341999689739,
+      "proposal_reconstruction": 0.06713124199046029,
+      "proposal_success": 0.6774384710523818,
+      "reocclusion": 0.7591080533133613,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.7771240539020963,
+      "support_stability": 0.135693629582723,
+      "total": 2.1388481987847223,
+      "uncertainty": 0.015495387733810477,
+      "visibility": 0.09591657254430982,
+      "world_model": 3.0181201563941107
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.020422046072781086,
+      "arm_role": 0.00013267093800095608,
+      "belief": 0.132033076758186,
+      "clearance": 0.09306831813106935,
+      "corridor": 0.2473244791229566,
+      "disturbance": 0.005267159331803366,
+      "grasp_affordance": 0.02323731636473288,
+      "occluder_contact": 0.2274861807624499,
+      "persistence": 4.788148105144501,
+      "phase": 0.4897861474504073,
+      "planner_ranking": 0.19803702970966697,
+      "planner_risk": 0.014476059819571674,
+      "planner_success": 0.582294854025046,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1768548240264256,
+      "proposal_reconstruction": 0.06309070127705733,
+      "proposal_success": 0.6687473853429159,
+      "reocclusion": 0.4347735444704692,
+      "role_swap_consistency": 0.0005246425753284711,
+      "support_mode": 0.27536690221556154,
+      "support_stability": 0.13952944738169512,
+      "total": 1.8044419437646866,
+      "uncertainty": 0.02774027381868412,
+      "visibility": 0.1153421513736248,
+      "world_model": 2.4898271610339484
+    },
+    "val": {
+      "action": 0.01874730870541599,
+      "arm_role": 5.6157629943401036e-05,
+      "belief": 0.10553244915273455,
+      "clearance": 0.07688990897602505,
+      "corridor": 0.22811337808767954,
+      "disturbance": 0.003250152357698729,
+      "grasp_affordance": 0.01229651603433821,
+      "occluder_contact": 0.2126419097185135,
+      "persistence": 4.407040860917833,
+      "phase": 0.5402041557762358,
+      "planner_ranking": 0.057698477473523885,
+      "planner_risk": 0.018357175298862986,
+      "planner_success": 0.5312860574987199,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1326524019241333,
+      "proposal_reconstruction": 0.061598031471172966,
+      "proposal_success": 0.684064143233829,
+      "reocclusion": 0.30786263280444676,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.004251669186891781,
+      "support_stability": 0.13853448629379272,
+      "total": 1.6857457160949707,
+      "uncertainty": 0.01232649458365308,
+      "visibility": 0.09530285745859146,
+      "world_model": 2.4774555497699313
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.016114005508522194,
+      "arm_role": 0.0001030681860356708,
+      "belief": 0.10320375890781482,
+      "clearance": 0.07995640703787406,
+      "corridor": 0.25392253262301284,
+      "disturbance": 0.0031722914403265654,
+      "grasp_affordance": 0.012748630911422273,
+      "occluder_contact": 0.2107334186633428,
+      "persistence": 2.4088165710369744,
+      "phase": 0.4625267634789149,
+      "planner_ranking": 0.060266673332080245,
+      "planner_risk": 0.012258843247157833,
+      "planner_success": 0.5274426229298115,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1385845243930817,
+      "proposal_reconstruction": 0.05936284347747763,
+      "proposal_success": 0.6725146919488907,
+      "reocclusion": 0.2462632873406013,
+      "role_swap_consistency": 0.0006072094838600606,
+      "support_mode": 0.0016275297427152207,
+      "support_stability": 0.1426111270363132,
+      "total": 1.3764432966709137,
+      "uncertainty": 0.009469694186312458,
+      "visibility": 0.09597749076783657,
+      "world_model": 2.160929208000501
+    },
+    "val": {
+      "action": 0.017381828278303146,
+      "arm_role": 0.00010448855997916932,
+      "belief": 0.10097876108354992,
+      "clearance": 0.07277507541908158,
+      "corridor": 0.2505771385298835,
+      "disturbance": 0.0016975371917295787,
+      "grasp_affordance": 0.009771786112752225,
+      "occluder_contact": 0.21183227002620697,
+      "persistence": 2.4857726428243847,
+      "phase": 0.4415881070825789,
+      "planner_ranking": 0.050767497469981514,
+      "planner_risk": 0.012091901567247178,
+      "planner_success": 0.5431661009788513,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1517571475770738,
+      "proposal_reconstruction": 0.06106388237741259,
+      "proposal_success": 0.6686976816919115,
+      "reocclusion": 0.3017841925223668,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.0003194726822483871,
+      "support_stability": 0.138791523873806,
+      "total": 1.3918594784206815,
+      "uncertainty": 0.006160195347749525,
+      "visibility": 0.09356896413697137,
+      "world_model": 2.1786467101838856
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.014575915721555551,
+      "arm_role": 0.00010951897911581909,
+      "belief": 0.11138264213999112,
+      "clearance": 0.08021063978473346,
+      "corridor": 0.22665666664640108,
+      "disturbance": 0.001938682675245218,
+      "grasp_affordance": 0.009700370137579739,
+      "occluder_contact": 0.21799744479358196,
+      "persistence": 1.6732217147946358,
+      "phase": 0.44999681537350017,
+      "planner_ranking": 0.042192295814553894,
+      "planner_risk": 0.01132670590110744,
+      "planner_success": 0.5080402580400308,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.137233888109525,
+      "proposal_reconstruction": 0.058159296245624624,
+      "proposal_success": 0.6529582714041074,
+      "reocclusion": 0.25044785129527253,
+      "role_swap_consistency": 0.0005161711233085953,
+      "support_mode": 0.00018608797411919417,
+      "support_stability": 0.13345634875198206,
+      "total": 1.2379883875449498,
+      "uncertainty": 0.0046325789056330295,
+      "visibility": 0.10680994981278975,
+      "world_model": 1.9994410425424576
+    },
+    "val": {
+      "action": 0.016705242089099355,
+      "arm_role": 4.718890462148314e-05,
+      "belief": 0.09792536165979174,
+      "clearance": 0.07563622544209163,
+      "corridor": 0.2376250127951304,
+      "disturbance": 0.002239807761119058,
+      "grasp_affordance": 0.008206432374815146,
+      "occluder_contact": 0.2136789427863227,
+      "persistence": 2.978070444530911,
+      "phase": 0.4751303195953369,
+      "planner_ranking": 0.0338772117263741,
+      "planner_risk": 0.011766589557131132,
+      "planner_success": 0.5005052321487002,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1342882580227323,
+      "proposal_reconstruction": 0.05980717432167795,
+      "proposal_success": 0.639495485358768,
+      "reocclusion": 0.3352541989750332,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.0004235156811773777,
+      "support_stability": 0.13641884757412803,
+      "total": 1.3906548553042941,
+      "uncertainty": 0.0036365572466618484,
+      "visibility": 0.09763797538148032,
+      "world_model": 2.049238271183438
+    }
+  }
+]
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/summary.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..7c1dda4ef4a72349adbabcbcc66b922b15fc5708
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/summary.json
@@ -0,0 +1,14 @@
+{
+  "experiment_name": "proxy_interaction_r3d_stage2_dummy_seed21",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/checkpoint_best.pt",
+  "final_train_total": 1.2379883875449498,
+  "final_val_total": 1.3906548553042941,
+  "train_time_sec": 18.177103996276855,
+  "peak_gpu_memory_mb": 639.55078125,
+  "num_train_samples": 379,
+  "num_val_samples": 133,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": null
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_full/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_full/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..947d338b656b32b71125643764b3a014a2eaab3a
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_full/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.5,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5694444444444443,
+    "visibility_integral": 33.861522571908104,
+    "corridor_availability": 0.8863558504316542,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 1.6200438848336538,
+    "disturbance_cost": 0.2896964028477669
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_full/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_full/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..fe4be4781986b09dfe5520aa344d81ec34ebf12d
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_full/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/checkpoint_best.pt
+- mean_success: 0.569
+- visibility_integral: 33.862
+- corridor_availability: 0.886
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 1.620
+- disturbance_cost: 0.290
+- foliage_proxy_success: 0.500
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.667
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_no_world_model/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_no_world_model/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..947d338b656b32b71125643764b3a014a2eaab3a
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_no_world_model/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.5,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5694444444444443,
+    "visibility_integral": 33.861522571908104,
+    "corridor_availability": 0.8863558504316542,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 1.6200438848336538,
+    "disturbance_cost": 0.2896964028477669
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_no_world_model/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_no_world_model/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..fe4be4781986b09dfe5520aa344d81ec34ebf12d
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_no_world_model/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/checkpoint_best.pt
+- mean_success: 0.569
+- visibility_integral: 33.862
+- corridor_availability: 0.886
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 1.620
+- disturbance_cost: 0.290
+- foliage_proxy_success: 0.500
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.667
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_short_history/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_short_history/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..fba9642f7ad6d3aae7ea336a3cb3a4f04b98514e
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_short_history/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.5,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5694444444444443,
+    "visibility_integral": 33.86345969637235,
+    "corridor_availability": 0.8863558504316542,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 1.6183116247653961,
+    "disturbance_cost": 0.2896275156591501
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_short_history/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_short_history/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..fe6f5e2dbd42ce79b21ef8f9b16706a50240f94c
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_short_history/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/checkpoint_best.pt
+- mean_success: 0.569
+- visibility_integral: 33.863
+- corridor_availability: 0.886
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 1.618
+- disturbance_cost: 0.290
+- foliage_proxy_success: 0.500
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.667
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/config_resolved.yaml b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..120a05c579a02cb5837dea6e0e837736552231bf
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/config_resolved.yaml
@@ -0,0 +1,148 @@
+experiment_name: proxy_interaction_r3d_stage2_dummy_seed22
+output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
+device: cuda
+seed: 22
+defaults: []
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 96
+  dataset_version: reveal_proxy_v6_rgbd_elastic_state
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage2_dummy_seed22.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage2_dummy_seed22.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 22
+optim:
+  epochs: 4
+  batch_size: 16
+  num_workers: 4
+  lr: 0.001
+  weight_decay: 0.0001
+trainer:
+  policy_type: elastic_reveal
+  use_bf16: false
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+  use_depth: false
+  use_world_model: true
+  use_role_tokens: true
+  compute_equivariance_probe: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 192
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 192
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 384
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 192
+    action_dim: 14
+    history_steps: 6
+    scene_history_steps: 3
+    belief_history_steps: 8
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    scene_bank_size: 2
+    belief_bank_size: 2
+    num_heads: 4
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 192
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 384
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_proposal_modes: 6
+    planner_top_k: 4
+  reveal_head:
+    hidden_dim: 192
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 4
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 192
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 4
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+    scene_bank_size: 2
+    belief_bank_size: 2
+  planner:
+    hidden_dim: 192
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 4
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+    top_k: 4
+loss_weights:
+  action: 1.0
+  phase: 0.15
+  arm_role: 0.2
+  support_mode: 0.15
+  corridor: 0.2
+  persistence: 0.1
+  disturbance: 0.1
+  world_model: 0.3
+  belief: 0.05
+  visibility: 0.05
+  clearance: 0.05
+  support_stability: 0.05
+  reocclusion: 0.05
+  occluder_contact: 0.05
+  grasp_affordance: 0.05
+  planner_success: 0.2
+  planner_risk: 0.1
+  planner_ranking: 0.1
+  proposal_reconstruction: 0.2
+  proposal_success: 0.1
+  proposal_ranking: 0.1
+  proposal_diversity: 0.05
+  role_swap_consistency: 0.05
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/diagnostics_full/proxy_diagnostics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/diagnostics_full/proxy_diagnostics.json
new file mode 100644
index 0000000000000000000000000000000000000000..566ac5b17916240f8b9b679ad9ac829fd56c67c1
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/diagnostics_full/proxy_diagnostics.json
@@ -0,0 +1,16 @@
+{
+  "planner_top1_accuracy": 0.3484848484848485,
+  "planner_regret": 0.020695989951491356,
+  "planner_score_utility_spearman": 0.23636364936828613,
+  "risk_calibration_mse": 0.011909244582057,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.022221416234970093,
+  "left_right_equivariance_error": 0.00428396016907166,
+  "belief_calibration_brier": 0.004661242943257093,
+  "reocclusion_calibration_brier": 0.2808501124382019,
+  "support_stability_mae": 0.023243192583322525,
+  "clearance_auc": 0.8644590429594041,
+  "memory_write_rate": 0.0,
+  "memory_saturation": 0.765249490737915,
+  "num_samples": 132
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/metrics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ca12a531d738ea6971cfb1e1253cebc6ac6053d
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/metrics.json
@@ -0,0 +1,230 @@
+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.03347124446493884,
+      "arm_role": 0.20229130648294813,
+      "belief": 0.17402074641237655,
+      "clearance": 0.19880834439148506,
+      "corridor": 0.2891631244371335,
+      "disturbance": 0.018553439459841076,
+      "grasp_affordance": 0.11385511832001309,
+      "occluder_contact": 0.29940443734327954,
+      "persistence": 5.160142799218495,
+      "phase": 0.8644107232491175,
+      "planner_ranking": 0.6737854778766632,
+      "planner_risk": 0.03873047609037409,
+      "planner_success": 0.6520731473962466,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.3132477800051372,
+      "proposal_reconstruction": 0.07480817381292582,
+      "proposal_success": 0.6795827721556028,
+      "reocclusion": 0.7054086849093437,
+      "role_swap_consistency": 0.0006326730472210329,
+      "support_mode": 0.6758991243938605,
+      "support_stability": 0.20185439257572094,
+      "total": 2.653773923714956,
+      "uncertainty": 0.17960463898877302,
+      "visibility": 0.18138946779072285,
+      "world_model": 4.2053997417291
+    },
+    "val": {
+      "action": 0.025188560287157696,
+      "arm_role": 0.00023987682490971766,
+      "belief": 0.13046854072146946,
+      "clearance": 0.0943274630440606,
+      "corridor": 0.25146762364440495,
+      "disturbance": 0.003108833476694094,
+      "grasp_affordance": 0.025106851425435808,
+      "occluder_contact": 0.25706369678179425,
+      "persistence": 5.160573641459147,
+      "phase": 0.6966154111756219,
+      "planner_ranking": 0.5090681347582076,
+      "planner_risk": 0.01417370161248578,
+      "planner_success": 0.6582367420196533,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.263387746281094,
+      "proposal_reconstruction": 0.06713862634367412,
+      "proposal_success": 0.6767676009072198,
+      "reocclusion": 0.6798703074455261,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6764178209834628,
+      "support_stability": 0.13747453110085595,
+      "total": 2.237161636352539,
+      "uncertainty": 0.05560384856330024,
+      "visibility": 0.09796598636441761,
+      "world_model": 3.2540784147050648
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.020618916450378794,
+      "arm_role": 7.443887670888216e-05,
+      "belief": 0.12020893146594365,
+      "clearance": 0.09807458023230235,
+      "corridor": 0.24780173785984516,
+      "disturbance": 0.003880485649763917,
+      "grasp_affordance": 0.024978001097527642,
+      "occluder_contact": 0.23083883275588354,
+      "persistence": 4.817646026611328,
+      "phase": 0.4601554498076439,
+      "planner_ranking": 0.19355946235979596,
+      "planner_risk": 0.015518942285173884,
+      "planner_success": 0.6042056332031885,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1747117042541504,
+      "proposal_reconstruction": 0.06312885119890173,
+      "proposal_success": 0.6667466511329015,
+      "reocclusion": 0.3677233246465524,
+      "role_swap_consistency": 0.0004291342059635402,
+      "support_mode": 0.08244437400693035,
+      "support_stability": 0.13558734022080898,
+      "total": 1.7778482685486476,
+      "uncertainty": 0.04317541288522383,
+      "visibility": 0.11643363380183776,
+      "world_model": 2.5014847815036774
+    },
+    "val": {
+      "action": 0.019934887687365215,
+      "arm_role": 0.00020918159215297137,
+      "belief": 0.10726906028058794,
+      "clearance": 0.08435270521375868,
+      "corridor": 0.24421080119080013,
+      "disturbance": 0.0025649187963507655,
+      "grasp_affordance": 0.013776088029974036,
+      "occluder_contact": 0.22032455106576285,
+      "persistence": 4.736663394504124,
+      "phase": 0.4386194712585873,
+      "planner_ranking": 0.05592367466953066,
+      "planner_risk": 0.01741992651174466,
+      "planner_success": 0.5965519547462463,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1303200324376423,
+      "proposal_reconstruction": 0.06245918033851518,
+      "proposal_success": 0.6803573237525092,
+      "reocclusion": 0.300288421412309,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.0014197466015401813,
+      "support_stability": 0.13595510439740288,
+      "total": 1.7569248808754816,
+      "uncertainty": 0.017266521230340004,
+      "visibility": 0.09970718456639184,
+      "world_model": 2.5990555551317005
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.01646357006393373,
+      "arm_role": 0.00032456668426069274,
+      "belief": 0.11667600863923629,
+      "clearance": 0.0851635920504729,
+      "corridor": 0.2447526715695858,
+      "disturbance": 0.003258141950936988,
+      "grasp_affordance": 0.012976687673168877,
+      "occluder_contact": 0.21284440780679384,
+      "persistence": 2.953347126642863,
+      "phase": 0.44309895547727746,
+      "planner_ranking": 0.04747697835167249,
+      "planner_risk": 0.016302392507592838,
+      "planner_success": 0.5193743904431661,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.150391474366188,
+      "proposal_reconstruction": 0.05955767119303346,
+      "proposal_success": 0.6713465626041094,
+      "reocclusion": 0.2378139222661654,
+      "role_swap_consistency": 0.0005029737524940477,
+      "support_mode": 0.0011681052292260574,
+      "support_stability": 0.13539936766028404,
+      "total": 1.4113694926102955,
+      "uncertainty": 0.014864409691654146,
+      "visibility": 0.10114136214057605,
+      "world_model": 2.113257105151812
+    },
+    "val": {
+      "action": 0.01689246390014887,
+      "arm_role": 0.0002562762076397323,
+      "belief": 0.12067249417304993,
+      "clearance": 0.08075836963123745,
+      "corridor": 0.2332237097952101,
+      "disturbance": 0.0030973186884592804,
+      "grasp_affordance": 0.009670139031691683,
+      "occluder_contact": 0.19927391078737047,
+      "persistence": 2.144443233807882,
+      "phase": 0.4981871048609416,
+      "planner_ranking": 0.04188345455461078,
+      "planner_risk": 0.015286814835336473,
+      "planner_success": 0.5075024200810326,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.142343070771959,
+      "proposal_reconstruction": 0.0599246294134193,
+      "proposal_success": 0.6811430851618449,
+      "reocclusion": 0.31094949195782345,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.00042427888709223934,
+      "support_stability": 0.13746210518810484,
+      "total": 1.4014967216385736,
+      "uncertainty": 0.002499298451261388,
+      "visibility": 0.1028875137368838,
+      "world_model": 2.329009042845832
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.0155420743006592,
+      "arm_role": 0.00015497119996628803,
+      "belief": 0.11977876101930936,
+      "clearance": 0.07912964109952252,
+      "corridor": 0.23581945523619652,
+      "disturbance": 0.002367413486354053,
+      "grasp_affordance": 0.009577435072666654,
+      "occluder_contact": 0.2115720814714829,
+      "persistence": 1.9874264548222225,
+      "phase": 0.45408404618501663,
+      "planner_ranking": 0.038977843476459384,
+      "planner_risk": 0.011543226932796339,
+      "planner_success": 0.5249839027722677,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1570468346277873,
+      "proposal_reconstruction": 0.059013870234290756,
+      "proposal_success": 0.6656323795517286,
+      "reocclusion": 0.2790042506530881,
+      "role_swap_consistency": 0.0005038505745081542,
+      "support_mode": 0.03911329966407114,
+      "support_stability": 0.1344400765374303,
+      "total": 1.2917357434829075,
+      "uncertainty": 0.003919239621609449,
+      "visibility": 0.10335852671414614,
+      "world_model": 2.016709173719088
+    },
+    "val": {
+      "action": 0.01665111506978671,
+      "arm_role": 0.00012317704871141663,
+      "belief": 0.11847328394651413,
+      "clearance": 0.07685465945137872,
+      "corridor": 0.23272691004806095,
+      "disturbance": 0.0017439340590499341,
+      "grasp_affordance": 0.009264905523094866,
+      "occluder_contact": 0.20612997810045877,
+      "persistence": 2.2046579784817166,
+      "phase": 0.45347891251246136,
+      "planner_ranking": 0.03614056089686023,
+      "planner_risk": 0.012666935566812754,
+      "planner_success": 0.5357781946659088,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.118505941496955,
+      "proposal_reconstruction": 0.059652416656414665,
+      "proposal_success": 0.6818766991297404,
+      "reocclusion": 0.31799929009543526,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.00013029564353119995,
+      "support_stability": 0.13687688443395826,
+      "total": 1.3739903701676264,
+      "uncertainty": 0.0021665632569541535,
+      "visibility": 0.09471688088443544,
+      "world_model": 2.233483672142029
+    }
+  }
+]
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/summary.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..884e7ea038c79d0c2384b31faacd08aba16f3fa3
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/summary.json
@@ -0,0 +1,14 @@
+{
+  "experiment_name": "proxy_interaction_r3d_stage2_dummy_seed22",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/checkpoint_best.pt",
+  "final_train_total": 1.2917357434829075,
+  "final_val_total": 1.3739903701676264,
+  "train_time_sec": 21.751301288604736,
+  "peak_gpu_memory_mb": 635.970703125,
+  "num_train_samples": 380,
+  "num_val_samples": 132,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": null
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_full/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_full/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..98a7634dd43c1f7bea6523bbc55b13874987dc99
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_full/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.625
+    },
+    "mean_success": 0.5277777777777778,
+    "visibility_integral": 31.055846561988194,
+    "corridor_availability": 0.8294495956765281,
+    "reocclusion_rate": 0.036193347953216375,
+    "persistence_horizon_mae": 2.446918895718322,
+    "disturbance_cost": 0.2842518512883948
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_full/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_full/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..a8d42ad7626f90ef68c6867e28444af9f526a7d9
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_full/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/checkpoint_best.pt
+- mean_success: 0.528
+- visibility_integral: 31.056
+- corridor_availability: 0.829
+- reocclusion_rate: 0.036
+- persistence_horizon_mae: 2.447
+- disturbance_cost: 0.284
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.625
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_no_world_model/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_no_world_model/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..b8975ffea8ff240a4211f7ad5a782d9b753c95a1
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_no_world_model/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.625
+    },
+    "mean_success": 0.5277777777777778,
+    "visibility_integral": 31.244046566387016,
+    "corridor_availability": 0.8636231190628476,
+    "reocclusion_rate": 0.00798611111111111,
+    "persistence_horizon_mae": 2.825085285899754,
+    "disturbance_cost": 0.3346485110103256
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_no_world_model/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_no_world_model/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..81759851de48d3bda9bf9a04a4c57b07773c56f7
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_no_world_model/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/checkpoint_best.pt
+- mean_success: 0.528
+- visibility_integral: 31.244
+- corridor_availability: 0.864
+- reocclusion_rate: 0.008
+- persistence_horizon_mae: 2.825
+- disturbance_cost: 0.335
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.625
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_short_history/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_short_history/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..bc9c6b37c343109b256d01bc0bde6de0af74717c
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_short_history/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.625
+    },
+    "mean_success": 0.5277777777777778,
+    "visibility_integral": 31.054917756054138,
+    "corridor_availability": 0.8292781271868281,
+    "reocclusion_rate": 0.036366959064327485,
+    "persistence_horizon_mae": 2.4464666320020285,
+    "disturbance_cost": 0.2843864895920787
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_short_history/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_short_history/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..dcbbbe31aceba3f45f901e2d7277cde41a3f50ec
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_short_history/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/checkpoint_best.pt
+- mean_success: 0.528
+- visibility_integral: 31.055
+- corridor_availability: 0.829
+- reocclusion_rate: 0.036
+- persistence_horizon_mae: 2.446
+- disturbance_cost: 0.284
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.625
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/config_resolved.yaml b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..00c657eb23e26c38238200a9ac782819c1acebf3
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/config_resolved.yaml
@@ -0,0 +1,148 @@
+experiment_name: proxy_interaction_r3d_stage2_dummy_seed23
+output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
+device: cuda
+seed: 23
+defaults: []
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 96
+  dataset_version: reveal_proxy_v6_rgbd_elastic_state
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage2_dummy_seed23.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage2_dummy_seed23.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 23
+optim:
+  epochs: 4
+  batch_size: 16
+  num_workers: 4
+  lr: 0.001
+  weight_decay: 0.0001
+trainer:
+  policy_type: elastic_reveal
+  use_bf16: false
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+  use_depth: false
+  use_world_model: true
+  use_role_tokens: true
+  compute_equivariance_probe: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 192
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 192
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 384
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 192
+    action_dim: 14
+    history_steps: 6
+    scene_history_steps: 3
+    belief_history_steps: 8
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    scene_bank_size: 2
+    belief_bank_size: 2
+    num_heads: 4
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 192
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 384
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_proposal_modes: 6
+    planner_top_k: 4
+  reveal_head:
+    hidden_dim: 192
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 4
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 192
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 4
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+    scene_bank_size: 2
+    belief_bank_size: 2
+  planner:
+    hidden_dim: 192
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 4
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+    top_k: 4
+loss_weights:
+  action: 1.0
+  phase: 0.15
+  arm_role: 0.2
+  support_mode: 0.15
+  corridor: 0.2
+  persistence: 0.1
+  disturbance: 0.1
+  world_model: 0.3
+  belief: 0.05
+  visibility: 0.05
+  clearance: 0.05
+  support_stability: 0.05
+  reocclusion: 0.05
+  occluder_contact: 0.05
+  grasp_affordance: 0.05
+  planner_success: 0.2
+  planner_risk: 0.1
+  planner_ranking: 0.1
+  proposal_reconstruction: 0.2
+  proposal_success: 0.1
+  proposal_ranking: 0.1
+  proposal_diversity: 0.05
+  role_swap_consistency: 0.05
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/diagnostics_full/proxy_diagnostics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/diagnostics_full/proxy_diagnostics.json
new file mode 100644
index 0000000000000000000000000000000000000000..0693b4b7422520470edad09c32366527610a9ad6
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/diagnostics_full/proxy_diagnostics.json
@@ -0,0 +1,16 @@
+{
+  "planner_top1_accuracy": 0.3458646616541353,
+  "planner_regret": 0.020924845710396767,
+  "planner_score_utility_spearman": 0.22406017780303955,
+  "risk_calibration_mse": 0.01817331090569496,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.02499752677977085,
+  "left_right_equivariance_error": 0.0035538733252050247,
+  "belief_calibration_brier": 0.016437487676739693,
+  "reocclusion_calibration_brier": 0.2605345547199249,
+  "support_stability_mae": 0.03507188707590103,
+  "clearance_auc": 0.8892945983340073,
+  "memory_write_rate": 0.0,
+  "memory_saturation": 0.8572164177894592,
+  "num_samples": 133
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/metrics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..0ea2ef14bdb65bd0980161234b4efb14fb39f52f
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/metrics.json
@@ -0,0 +1,230 @@
+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.029319839163993795,
+      "arm_role": 0.23177419497793986,
+      "belief": 0.19956070557236671,
+      "clearance": 0.19428976656248173,
+      "corridor": 0.28412687219679356,
+      "disturbance": 0.014706775381152207,
+      "grasp_affordance": 0.09792078468793382,
+      "occluder_contact": 0.26536280413468677,
+      "persistence": 4.883942524592082,
+      "phase": 0.7541014266510805,
+      "planner_ranking": 0.6753277728954951,
+      "planner_risk": 0.025359969469718635,
+      "planner_success": 0.6392867142955462,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.2586021423339844,
+      "proposal_reconstruction": 0.07134231490393479,
+      "proposal_success": 0.6742175792654356,
+      "reocclusion": 0.6968964214126269,
+      "role_swap_consistency": 0.0008294641763010683,
+      "support_mode": 0.7254589063425859,
+      "support_stability": 0.20699986908584833,
+      "total": 2.557743047674497,
+      "uncertainty": 0.2125748023002719,
+      "visibility": 0.1725630493213733,
+      "world_model": 4.047068367401759
+    },
+    "val": {
+      "action": 0.02311015480922328,
+      "arm_role": 0.00018451267129017247,
+      "belief": 0.10730510370598899,
+      "clearance": 0.08969895541667938,
+      "corridor": 0.2666405571831597,
+      "disturbance": 0.0017999378841422084,
+      "grasp_affordance": 0.009250536198831268,
+      "occluder_contact": 0.2300689915815989,
+      "persistence": 4.78337902492947,
+      "phase": 0.7185595366689894,
+      "planner_ranking": 0.4548414415783352,
+      "planner_risk": 0.012839581610427963,
+      "planner_success": 0.6475298735830519,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1986345450083415,
+      "proposal_reconstruction": 0.06549014771978061,
+      "proposal_success": 0.6734013424979316,
+      "reocclusion": 0.6851721008618673,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6492728524737887,
+      "support_stability": 0.13324941943089166,
+      "total": 2.1259667608473034,
+      "uncertainty": 0.007739724384413825,
+      "visibility": 0.09847861197259691,
+      "world_model": 3.0716149542066784
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.01952160553385814,
+      "arm_role": 8.932303330766445e-05,
+      "belief": 0.12918406135092178,
+      "clearance": 0.0892887885371844,
+      "corridor": 0.2474869458625714,
+      "disturbance": 0.0037682938176052025,
+      "grasp_affordance": 0.017499797123794753,
+      "occluder_contact": 0.21778892911970615,
+      "persistence": 4.783275107542674,
+      "phase": 0.49998418365915615,
+      "planner_ranking": 0.1362916425180932,
+      "planner_risk": 0.016128994757309556,
+      "planner_success": 0.5813094191253185,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1601267904043198,
+      "proposal_reconstruction": 0.06217473496993383,
+      "proposal_success": 0.6659636174639066,
+      "reocclusion": 0.42425032146275043,
+      "role_swap_consistency": 0.0004794289428294481,
+      "support_mode": 0.2768289456823065,
+      "support_stability": 0.1436432379608353,
+      "total": 1.7938196162382762,
+      "uncertainty": 0.03192775448163351,
+      "visibility": 0.11060131123910348,
+      "world_model": 2.4868411223093667
+    },
+    "val": {
+      "action": 0.020857719497548208,
+      "arm_role": 0.0003135378614792393,
+      "belief": 0.11252795242600971,
+      "clearance": 0.07942688961823781,
+      "corridor": 0.23491873840490976,
+      "disturbance": 0.002107284943728397,
+      "grasp_affordance": 0.012219702411029074,
+      "occluder_contact": 0.2054839183886846,
+      "persistence": 5.023713111877441,
+      "phase": 0.429247111082077,
+      "planner_ranking": 0.06150271536575423,
+      "planner_risk": 0.014784476098914942,
+      "planner_success": 0.5266371270020803,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.151251130633884,
+      "proposal_reconstruction": 0.06296455942922169,
+      "proposal_success": 0.6976126233736674,
+      "reocclusion": 0.31344303902652526,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.0009844449426357944,
+      "support_stability": 0.137757099337048,
+      "total": 1.758366995387607,
+      "uncertainty": 0.028360916922489803,
+      "visibility": 0.0932084388203091,
+      "world_model": 2.549745281537374
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.01677789391639332,
+      "arm_role": 0.00018648877752032908,
+      "belief": 0.10787749228378136,
+      "clearance": 0.08126144856214523,
+      "corridor": 0.2433396608879169,
+      "disturbance": 0.00258097746943046,
+      "grasp_affordance": 0.017474771360866725,
+      "occluder_contact": 0.20834970474243164,
+      "persistence": 2.438386077682177,
+      "phase": 0.4408506167431672,
+      "planner_ranking": 0.0513462177477777,
+      "planner_risk": 0.012917533827324709,
+      "planner_success": 0.5113103551169237,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.149764706691106,
+      "proposal_reconstruction": 0.06004528577129046,
+      "proposal_success": 0.675537995994091,
+      "reocclusion": 0.25245313874135417,
+      "role_swap_consistency": 0.000515319329376022,
+      "support_mode": 0.0019980755605502054,
+      "support_stability": 0.140461476209263,
+      "total": 1.388836865623792,
+      "uncertainty": 0.022540901283112664,
+      "visibility": 0.09868530587603648,
+      "world_model": 2.2134085396925607
+    },
+    "val": {
+      "action": 0.017413452060686216,
+      "arm_role": 6.871581151952139e-05,
+      "belief": 0.11101349939902623,
+      "clearance": 0.08136323259936439,
+      "corridor": 0.254426423046324,
+      "disturbance": 0.0028602277549604573,
+      "grasp_affordance": 0.010002103013296923,
+      "occluder_contact": 0.22746851212448543,
+      "persistence": 2.106385005844964,
+      "phase": 0.46620431542396545,
+      "planner_ranking": 0.037967391312122345,
+      "planner_risk": 0.01383865676406357,
+      "planner_success": 0.5223823752668169,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.120710015296936,
+      "proposal_reconstruction": 0.06028586791621314,
+      "proposal_success": 0.6775065395567152,
+      "reocclusion": 0.29894231177038616,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.00042318304379781085,
+      "support_stability": 0.14477568368117014,
+      "total": 1.4280590216318767,
+      "uncertainty": 0.015508349053561687,
+      "visibility": 0.10366267793708378,
+      "world_model": 2.4275851249694824
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.014798827391738692,
+      "arm_role": 0.00012019669429719215,
+      "belief": 0.10037506744265556,
+      "clearance": 0.0759961671816806,
+      "corridor": 0.21944596556325754,
+      "disturbance": 0.0022576948249479756,
+      "grasp_affordance": 0.008150271993751327,
+      "occluder_contact": 0.204491992170612,
+      "persistence": 1.7069302797317505,
+      "phase": 0.4352826727554202,
+      "planner_ranking": 0.039453314462055765,
+      "planner_risk": 0.01098932025100415,
+      "planner_success": 0.49703357741236687,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1383505016565323,
+      "proposal_reconstruction": 0.058284393356492124,
+      "proposal_success": 0.6598181600371996,
+      "reocclusion": 0.24928847063953677,
+      "role_swap_consistency": 0.00044218337643542327,
+      "support_mode": 0.00023409606789452178,
+      "support_stability": 0.13384470157325268,
+      "total": 1.2625050817926724,
+      "uncertainty": 0.006061152564749743,
+      "visibility": 0.0954263440022866,
+      "world_model": 2.093868618210157
+    },
+    "val": {
+      "action": 0.016459165140986443,
+      "arm_role": 0.00010593322319133829,
+      "belief": 0.11468188961346944,
+      "clearance": 0.07586023211479187,
+      "corridor": 0.2960966345336702,
+      "disturbance": 0.003753158315602276,
+      "grasp_affordance": 0.008578508730149932,
+      "occluder_contact": 0.2071819139851464,
+      "persistence": 2.1860306660334268,
+      "phase": 0.4479760792520311,
+      "planner_ranking": 0.03706499561667442,
+      "planner_risk": 0.013231952778167196,
+      "planner_success": 0.4939282072914971,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1495306624306574,
+      "proposal_reconstruction": 0.05932460932268037,
+      "proposal_success": 0.6537699633174472,
+      "reocclusion": 0.29562795327769387,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.00016798629722971882,
+      "support_stability": 0.13361257563034692,
+      "total": 1.392296102311876,
+      "uncertainty": 0.0055736687241329085,
+      "visibility": 0.09414981967873043,
+      "world_model": 2.2928188774320812
+    }
+  }
+]
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/summary.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..1b0ad63ab15cc90281b79ed45ca4356f79da49b9
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/summary.json
@@ -0,0 +1,14 @@
+{
+  "experiment_name": "proxy_interaction_r3d_stage2_dummy_seed23",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/checkpoint_best.pt",
+  "final_train_total": 1.2625050817926724,
+  "final_val_total": 1.392296102311876,
+  "train_time_sec": 22.338274240493774,
+  "peak_gpu_memory_mb": 642.658203125,
+  "num_train_samples": 380,
+  "num_val_samples": 133,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": null
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_full/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_full/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..41c495ecb4cd2eac11e8e36c401ac0a8706dfef8
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_full/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.5,
+      "bag_proxy": 0.5833333333333334,
+      "cloth_proxy": 0.7083333333333334
+    },
+    "mean_success": 0.5972222222222223,
+    "visibility_integral": 17.476146274142796,
+    "corridor_availability": 0.6671382097734345,
+    "reocclusion_rate": 0.022222222222222223,
+    "persistence_horizon_mae": 2.4605967335703705,
+    "disturbance_cost": 0.15201607884632218
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_full/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_full/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..caebf0261f5ea2e2995704d66d0e76c2d17fd3b8
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_full/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/checkpoint_best.pt
+- mean_success: 0.597
+- visibility_integral: 17.476
+- corridor_availability: 0.667
+- reocclusion_rate: 0.022
+- persistence_horizon_mae: 2.461
+- disturbance_cost: 0.152
+- foliage_proxy_success: 0.500
+- bag_proxy_success: 0.583
+- cloth_proxy_success: 0.708
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_no_depth/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_no_depth/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..fb1524ab69b165e66adf25b1d266ea74407f969f
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_no_depth/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.4583333333333333,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5138888888888888,
+    "visibility_integral": 17.8445434462693,
+    "corridor_availability": 0.6146270692762401,
+    "reocclusion_rate": 0.025173611111111115,
+    "persistence_horizon_mae": 2.6810189323804137,
+    "disturbance_cost": 0.1732833090548714
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_no_depth/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_no_depth/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..d19910714c310557e6c65c0fc19cac175e902df6
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_no_depth/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/checkpoint_best.pt
+- mean_success: 0.514
+- visibility_integral: 17.845
+- corridor_availability: 0.615
+- reocclusion_rate: 0.025
+- persistence_horizon_mae: 2.681
+- disturbance_cost: 0.173
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.458
+- cloth_proxy_success: 0.667
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/config_resolved.yaml b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..79cd3d6e38e4b0a081d397b9d5130b5ca69707bc
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/config_resolved.yaml
@@ -0,0 +1,149 @@
+experiment_name: proxy_interaction_r3d_stage3_clip_rgbd_seed17
+output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
+device: cuda
+seed: 17
+init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
+init_strict: false
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 224
+  dataset_version: reveal_proxy_v6_rgbd_elastic_state
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3_seed17.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_seed17.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 17
+optim:
+  epochs: 4
+  batch_size: 2
+  num_workers: 4
+  lr: 0.0003
+  weight_decay: 0.0001
+trainer:
+  policy_type: elastic_reveal
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+  use_depth: true
+  use_world_model: true
+  use_role_tokens: true
+  compute_equivariance_probe: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 512
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: false
+  fusion:
+    hidden_dim: 512
+    num_cameras: 3
+    num_layers: 4
+    num_heads: 8
+    ff_dim: 2048
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 512
+    action_dim: 14
+    history_steps: 6
+    scene_history_steps: 3
+    belief_history_steps: 8
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    scene_bank_size: 2
+    belief_bank_size: 2
+    num_heads: 8
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 4
+    ff_dim: 2048
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_proposal_modes: 6
+    planner_top_k: 4
+  reveal_head:
+    hidden_dim: 512
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 8
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 512
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+    scene_bank_size: 2
+    belief_bank_size: 2
+  planner:
+    hidden_dim: 512
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 8
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+    top_k: 4
+loss_weights:
+  action: 1.0
+  phase: 0.1
+  arm_role: 0.15
+  support_mode: 0.1
+  corridor: 0.15
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.25
+  belief: 0.05
+  visibility: 0.05
+  clearance: 0.05
+  support_stability: 0.05
+  reocclusion: 0.05
+  occluder_contact: 0.05
+  grasp_affordance: 0.05
+  planner_success: 0.25
+  planner_risk: 0.1
+  planner_ranking: 0.2
+  proposal_reconstruction: 0.1
+  proposal_success: 0.15
+  proposal_ranking: 0.2
+  proposal_diversity: 0.05
+  role_swap_consistency: 0.05
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/diagnostics_full/proxy_diagnostics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/diagnostics_full/proxy_diagnostics.json
new file mode 100644
index 0000000000000000000000000000000000000000..0fc231d5c506b0afee033d9b067a898a9721261e
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/diagnostics_full/proxy_diagnostics.json
@@ -0,0 +1,16 @@
+{
+  "planner_top1_accuracy": 0.29770992366412213,
+  "planner_regret": 0.013548726215958595,
+  "planner_score_utility_spearman": 0.19083969295024872,
+  "risk_calibration_mse": 0.010792036540806293,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.033339403569698334,
+  "left_right_equivariance_error": 6.996666387280901e-05,
+  "belief_calibration_brier": 0.004780409391969442,
+  "reocclusion_calibration_brier": 0.2683986723423004,
+  "support_stability_mae": 0.027458177879452705,
+  "clearance_auc": 0.9621755433404506,
+  "memory_write_rate": 0.4997691512107849,
+  "memory_saturation": 0.5063503980636597,
+  "num_samples": 131
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/metrics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..86553cbe14abad380b398fb218f2be36df31dc72
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/metrics.json
@@ -0,0 +1,230 @@
+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.02815176180416816,
+      "arm_role": 0.029603523643393265,
+      "belief": 0.11383584796598083,
+      "clearance": 0.09396348909327859,
+      "corridor": 0.27990493799902894,
+      "disturbance": 0.0038364405198463877,
+      "grasp_affordance": 0.021576231786687123,
+      "occluder_contact": 0.21160779281666403,
+      "persistence": 5.600160198503511,
+      "phase": 0.7736630088404606,
+      "planner_ranking": 0.22218675435226606,
+      "planner_risk": 0.016042623469394657,
+      "planner_success": 0.6215873261815623,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.2711315641277714,
+      "proposal_reconstruction": 0.06910703382209728,
+      "proposal_success": 0.6854824983759931,
+      "reocclusion": 0.7048384636640549,
+      "role_swap_consistency": 0.0009499136090437931,
+      "support_mode": 0.7124486019736842,
+      "support_stability": 0.16308180080040505,
+      "total": 1.80654469226536,
+      "uncertainty": 0.018382124003638686,
+      "visibility": 0.11109156565446603,
+      "world_model": 2.6661070312324324
+    },
+    "val": {
+      "action": 0.02329161667236776,
+      "arm_role": 1.2488904289828938e-05,
+      "belief": 0.09460798730001305,
+      "clearance": 0.07680663082635764,
+      "corridor": 0.23859078346779852,
+      "disturbance": 0.0018481058845778039,
+      "grasp_affordance": 0.008316120937127958,
+      "occluder_contact": 0.20198668494369043,
+      "persistence": 4.336097291021636,
+      "phase": 0.6960404786196622,
+      "planner_ranking": 0.05048516956263815,
+      "planner_risk": 0.011283920221268834,
+      "planner_success": 0.6002033824721972,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1648338744134614,
+      "proposal_reconstruction": 0.06488600407134403,
+      "proposal_success": 0.6840101480484009,
+      "reocclusion": 0.6917306789846132,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6627979712052778,
+      "support_stability": 0.14844087350436233,
+      "total": 1.679735080762343,
+      "uncertainty": 0.0017192336152110136,
+      "visibility": 0.08843542991036718,
+      "world_model": 2.796021072250424
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.022217433576128983,
+      "arm_role": 1.2244989997462223e-05,
+      "belief": 0.10026986102916692,
+      "clearance": 0.08305128944155417,
+      "corridor": 0.2513870910986474,
+      "disturbance": 0.002561395750442324,
+      "grasp_affordance": 0.009610651308474572,
+      "occluder_contact": 0.2029949708988792,
+      "persistence": 4.274056613445282,
+      "phase": 0.6910824424342106,
+      "planner_ranking": 0.05793145077140683,
+      "planner_risk": 0.01096556101493709,
+      "planner_success": 0.5687428168560329,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1443857858055517,
+      "proposal_reconstruction": 0.06427661131479238,
+      "proposal_success": 0.677754524193312,
+      "reocclusion": 0.7005640453413913,
+      "role_swap_consistency": 0.000685913089546375,
+      "support_mode": 0.6932514391447369,
+      "support_stability": 0.1438921767728109,
+      "total": 1.5099341706225746,
+      "uncertainty": 0.0017574461604358237,
+      "visibility": 0.09719569808558413,
+      "world_model": 2.1559007089389
+    },
+    "val": {
+      "action": 0.022725384470752695,
+      "arm_role": 5.272072281293345e-05,
+      "belief": 0.09615642464522159,
+      "clearance": 0.08045592444073973,
+      "corridor": 0.23664624672947507,
+      "disturbance": 0.0026030399920714308,
+      "grasp_affordance": 0.008498197915314726,
+      "occluder_contact": 0.20824123399727273,
+      "persistence": 4.317030298890489,
+      "phase": 0.6911809751481721,
+      "planner_ranking": 0.04295492070951062,
+      "planner_risk": 0.011065340517124048,
+      "planner_success": 0.48531281767469464,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1322515372074011,
+      "proposal_reconstruction": 0.06503733078187163,
+      "proposal_success": 0.6843914163835121,
+      "reocclusion": 0.6961798333760464,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6560130250273328,
+      "support_stability": 0.15669836300058346,
+      "total": 1.5237527164545925,
+      "uncertainty": 0.00022470286212134823,
+      "visibility": 0.09155982241711834,
+      "world_model": 2.325164812080788
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.021325080562382935,
+      "arm_role": 3.0533263557835627e-06,
+      "belief": 0.10637223070585414,
+      "clearance": 0.08482141373188871,
+      "corridor": 0.24448425138467236,
+      "disturbance": 0.0027837532476089684,
+      "grasp_affordance": 0.010188110915355776,
+      "occluder_contact": 0.20852525508717487,
+      "persistence": 3.92157253560267,
+      "phase": 0.6793996710526315,
+      "planner_ranking": 0.04173064419405579,
+      "planner_risk": 0.01095533966547105,
+      "planner_success": 0.5252253392809316,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1396061564746656,
+      "proposal_reconstruction": 0.06338561845845297,
+      "proposal_success": 0.6762565412019429,
+      "reocclusion": 0.6968451614442625,
+      "role_swap_consistency": 0.00035725896036877344,
+      "support_mode": 0.67568359375,
+      "support_stability": 0.1382079716869875,
+      "total": 1.4412984270798532,
+      "uncertainty": 0.0013268421901700908,
+      "visibility": 0.10235781155918774,
+      "world_model": 2.0309231833407755
+    },
+    "val": {
+      "action": 0.02207596722820943,
+      "arm_role": 5.734688214130129e-07,
+      "belief": 0.10629133389077404,
+      "clearance": 0.08537082306363365,
+      "corridor": 0.2549011127063722,
+      "disturbance": 0.0016559935975788958,
+      "grasp_affordance": 0.009338989832692525,
+      "occluder_contact": 0.22646727706446793,
+      "persistence": 3.3828756935668713,
+      "phase": 0.6586590924046256,
+      "planner_ranking": 0.037367004423382845,
+      "planner_risk": 0.010625976211017449,
+      "planner_success": 0.4890047986850594,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1231766451488843,
+      "proposal_reconstruction": 0.06420681360318806,
+      "proposal_success": 0.686759861129703,
+      "reocclusion": 0.6883266523028865,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6401153613220562,
+      "support_stability": 0.1437410551095099,
+      "total": 1.4105993129990317,
+      "uncertainty": 0.0012141159448109454,
+      "visibility": 0.09800120776124073,
+      "world_model": 2.073751407139229
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.01997323781742077,
+      "arm_role": 6.544025320755808e-05,
+      "belief": 0.1165526967299612,
+      "clearance": 0.08391809918378529,
+      "corridor": 0.22205955819808879,
+      "disturbance": 0.0020432069609006777,
+      "grasp_affordance": 0.010270981588645985,
+      "occluder_contact": 0.2182939759994808,
+      "persistence": 1.2004593090216034,
+      "phase": 0.48546034160413243,
+      "planner_ranking": 0.034699498282608196,
+      "planner_risk": 0.010634258209029213,
+      "planner_success": 0.5031348044150754,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1341818285615821,
+      "proposal_reconstruction": 0.06254415310135013,
+      "proposal_success": 0.6701703595487695,
+      "reocclusion": 0.4238589314466624,
+      "role_swap_consistency": 0.0005717776034879008,
+      "support_mode": 0.25150866257516963,
+      "support_stability": 0.1331169359729086,
+      "total": 1.194669181735892,
+      "uncertainty": 0.0006598267668334139,
+      "visibility": 0.11416163087675446,
+      "world_model": 1.94043176832952
+    },
+    "val": {
+      "action": 0.021236835851926695,
+      "arm_role": 3.174391024734205e-06,
+      "belief": 0.11499111557548697,
+      "clearance": 0.079068739305843,
+      "corridor": 0.21379030992587408,
+      "disturbance": 0.0011454115509443661,
+      "grasp_affordance": 0.01121496031004371,
+      "occluder_contact": 0.21270102081876813,
+      "persistence": 1.763856044095574,
+      "phase": 0.3144524506095684,
+      "planner_ranking": 0.04632020881961745,
+      "planner_risk": 0.010791946026570671,
+      "planner_success": 0.4698902251142444,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1357553402582805,
+      "proposal_reconstruction": 0.06354159370742062,
+      "proposal_success": 0.6496003658482523,
+      "reocclusion": 0.31966371032776253,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.0014491713399448517,
+      "support_stability": 0.13909960007577232,
+      "total": 1.1584763833970735,
+      "uncertainty": 0.00029339295746688026,
+      "visibility": 0.09549486710492408,
+      "world_model": 1.911818307457548
+    }
+  }
+]
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/summary.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..050b14c6daecee35aaf514f8058d588b84563b26
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/summary.json
@@ -0,0 +1,557 @@
+{
+  "experiment_name": "proxy_interaction_r3d_stage3_clip_rgbd_seed17",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/checkpoint_best.pt",
+  "final_train_total": 1.194669181735892,
+  "final_val_total": 1.1584763833970735,
+  "train_time_sec": 138.50738143920898,
+  "peak_gpu_memory_mb": 1933.771484375,
+  "num_train_samples": 380,
+  "num_val_samples": 131,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": {
+    "path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
+    "loaded_keys": 461,
+    "skipped_shape_mismatch_keys": [
+      "memory.gru.weight_ih_l0",
+      "memory.gru.weight_hh_l0",
+      "memory.gru.bias_ih_l0",
+      "memory.gru.bias_hh_l0",
+      "memory.token_proj.0.weight",
+      "memory.token_proj.0.bias",
+      "memory.token_proj.1.weight",
+      "memory.token_proj.1.bias",
+      "decoder.actor_role_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.linear1.weight",
+      "decoder.revealer_decoder.layers.0.linear1.bias",
+      "decoder.revealer_decoder.layers.0.linear2.weight",
+      "decoder.revealer_decoder.layers.0.linear2.bias",
+      "decoder.revealer_decoder.layers.0.norm1.weight",
+      "decoder.revealer_decoder.layers.0.norm1.bias",
+      "decoder.revealer_decoder.layers.0.norm2.weight",
+      "decoder.revealer_decoder.layers.0.norm2.bias",
+      "decoder.revealer_decoder.layers.0.norm3.weight",
+      "decoder.revealer_decoder.layers.0.norm3.bias",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.linear1.weight",
+      "decoder.revealer_decoder.layers.1.linear1.bias",
+      "decoder.revealer_decoder.layers.1.linear2.weight",
+      "decoder.revealer_decoder.layers.1.linear2.bias",
+      "decoder.revealer_decoder.layers.1.norm1.weight",
+      "decoder.revealer_decoder.layers.1.norm1.bias",
+      "decoder.revealer_decoder.layers.1.norm2.weight",
+      "decoder.revealer_decoder.layers.1.norm2.bias",
+      "decoder.revealer_decoder.layers.1.norm3.weight",
+      "decoder.revealer_decoder.layers.1.norm3.bias",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.linear1.weight",
+      "decoder.revealer_decoder.layers.2.linear1.bias",
+      "decoder.revealer_decoder.layers.2.linear2.weight",
+      "decoder.revealer_decoder.layers.2.linear2.bias",
+      "decoder.revealer_decoder.layers.2.norm1.weight",
+      "decoder.revealer_decoder.layers.2.norm1.bias",
+      "decoder.revealer_decoder.layers.2.norm2.weight",
+      "decoder.revealer_decoder.layers.2.norm2.bias",
+      "decoder.revealer_decoder.layers.2.norm3.weight",
+      "decoder.revealer_decoder.layers.2.norm3.bias",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.linear1.weight",
+      "decoder.revealer_decoder.layers.3.linear1.bias",
+      "decoder.revealer_decoder.layers.3.linear2.weight",
+      "decoder.revealer_decoder.layers.3.linear2.bias",
+      "decoder.revealer_decoder.layers.3.norm1.weight",
+      "decoder.revealer_decoder.layers.3.norm1.bias",
+      "decoder.revealer_decoder.layers.3.norm2.weight",
+      "decoder.revealer_decoder.layers.3.norm2.bias",
+      "decoder.revealer_decoder.layers.3.norm3.weight",
+      "decoder.revealer_decoder.layers.3.norm3.bias",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.linear1.weight",
+      "decoder.actor_decoder.layers.0.linear1.bias",
+      "decoder.actor_decoder.layers.0.linear2.weight",
+      "decoder.actor_decoder.layers.0.linear2.bias",
+      "decoder.actor_decoder.layers.0.norm1.weight",
+      "decoder.actor_decoder.layers.0.norm1.bias",
+      "decoder.actor_decoder.layers.0.norm2.weight",
+      "decoder.actor_decoder.layers.0.norm2.bias",
+      "decoder.actor_decoder.layers.0.norm3.weight",
+      "decoder.actor_decoder.layers.0.norm3.bias",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.linear1.weight",
+      "decoder.actor_decoder.layers.1.linear1.bias",
+      "decoder.actor_decoder.layers.1.linear2.weight",
+      "decoder.actor_decoder.layers.1.linear2.bias",
+      "decoder.actor_decoder.layers.1.norm1.weight",
+      "decoder.actor_decoder.layers.1.norm1.bias",
+      "decoder.actor_decoder.layers.1.norm2.weight",
+      "decoder.actor_decoder.layers.1.norm2.bias",
+      "decoder.actor_decoder.layers.1.norm3.weight",
+      "decoder.actor_decoder.layers.1.norm3.bias",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.linear1.weight",
+      "decoder.actor_decoder.layers.2.linear1.bias",
+      "decoder.actor_decoder.layers.2.linear2.weight",
+      "decoder.actor_decoder.layers.2.linear2.bias",
+      "decoder.actor_decoder.layers.2.norm1.weight",
+      "decoder.actor_decoder.layers.2.norm1.bias",
+      "decoder.actor_decoder.layers.2.norm2.weight",
+      "decoder.actor_decoder.layers.2.norm2.bias",
+      "decoder.actor_decoder.layers.2.norm3.weight",
+      "decoder.actor_decoder.layers.2.norm3.bias",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.linear1.weight",
+      "decoder.actor_decoder.layers.3.linear1.bias",
+      "decoder.actor_decoder.layers.3.linear2.weight",
+      "decoder.actor_decoder.layers.3.linear2.bias",
+      "decoder.actor_decoder.layers.3.norm1.weight",
+      "decoder.actor_decoder.layers.3.norm1.bias",
+      "decoder.actor_decoder.layers.3.norm2.weight",
+      "decoder.actor_decoder.layers.3.norm2.bias",
+      "decoder.actor_decoder.layers.3.norm3.weight",
+      "decoder.actor_decoder.layers.3.norm3.bias",
+      "decoder.revealer_mean.weight",
+      "decoder.revealer_mean.bias",
+      "decoder.revealer_log_std.weight",
+      "decoder.revealer_log_std.bias",
+      "decoder.actor_mean.weight",
+      "decoder.actor_mean.bias",
+      "decoder.actor_log_std.weight",
+      "decoder.actor_log_std.bias",
+      "decoder.proposal_score.0.weight",
+      "decoder.proposal_score.0.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias"
+    ],
+    "missing_keys": [
+      "backbone.depth_adapter.depth_proj.0.weight",
+      "backbone.depth_adapter.depth_proj.0.bias",
+      "backbone.depth_adapter.depth_proj.1.weight",
+      "backbone.depth_adapter.depth_proj.1.bias",
+      "backbone.depth_adapter.depth_proj.3.weight",
+      "backbone.depth_adapter.depth_proj.3.bias",
+      "backbone.depth_adapter.geometry_proj.0.weight",
+      "backbone.depth_adapter.geometry_proj.0.bias",
+      "backbone.depth_adapter.geometry_proj.1.weight",
+      "backbone.depth_adapter.geometry_proj.1.bias",
+      "backbone.depth_adapter.camera_proj.0.weight",
+      "backbone.depth_adapter.camera_proj.0.bias",
+      "backbone.depth_adapter.camera_proj.1.weight",
+      "backbone.depth_adapter.camera_proj.1.bias",
+      "fusion.geometry_fusion.attn.in_proj_weight",
+      "fusion.geometry_fusion.attn.in_proj_bias",
+      "fusion.geometry_fusion.attn.out_proj.weight",
+      "fusion.geometry_fusion.attn.out_proj.bias",
+      "fusion.geometry_fusion.gate.0.weight",
+      "fusion.geometry_fusion.gate.0.bias",
+      "fusion.geometry_fusion.gate.1.weight",
+      "fusion.geometry_fusion.gate.1.bias",
+      "fusion.geometry_fusion.gate.3.weight",
+      "fusion.geometry_fusion.gate.3.bias",
+      "fusion.geometry_fusion.out.0.weight",
+      "fusion.geometry_fusion.out.0.bias",
+      "fusion.geometry_fusion.out.1.weight",
+      "fusion.geometry_fusion.out.1.bias",
+      "memory.scene_memory.position_embedding",
+      "memory.scene_memory.bank_queries",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.scene_memory.bank_attention.in_proj_weight",
+      "memory.scene_memory.bank_attention.in_proj_bias",
+      "memory.scene_memory.bank_attention.out_proj.weight",
+      "memory.scene_memory.bank_attention.out_proj.bias",
+      "memory.scene_memory.action_proj.0.weight",
+      "memory.scene_memory.action_proj.0.bias",
+      "memory.scene_memory.action_proj.1.weight",
+      "memory.scene_memory.action_proj.1.bias",
+      "memory.scene_memory.write_gate.0.weight",
+      "memory.scene_memory.write_gate.0.bias",
+      "memory.scene_memory.write_gate.1.weight",
+      "memory.scene_memory.write_gate.1.bias",
+      "memory.scene_memory.write_gate.3.weight",
+      "memory.scene_memory.write_gate.3.bias",
+      "memory.scene_memory.token_proj.0.weight",
+      "memory.scene_memory.token_proj.0.bias",
+      "memory.scene_memory.token_proj.1.weight",
+      "memory.scene_memory.token_proj.1.bias",
+      "memory.belief_memory.position_embedding",
+      "memory.belief_memory.bank_queries",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.belief_memory.bank_attention.in_proj_weight",
+      "memory.belief_memory.bank_attention.in_proj_bias",
+      "memory.belief_memory.bank_attention.out_proj.weight",
+      "memory.belief_memory.bank_attention.out_proj.bias",
+      "memory.belief_memory.action_proj.0.weight",
+      "memory.belief_memory.action_proj.0.bias",
+      "memory.belief_memory.action_proj.1.weight",
+      "memory.belief_memory.action_proj.1.bias",
+      "memory.belief_memory.write_gate.0.weight",
+      "memory.belief_memory.write_gate.0.bias",
+      "memory.belief_memory.write_gate.1.weight",
+      "memory.belief_memory.write_gate.1.bias",
+      "memory.belief_memory.write_gate.3.weight",
+      "memory.belief_memory.write_gate.3.bias",
+      "memory.belief_memory.token_proj.0.weight",
+      "memory.belief_memory.token_proj.0.bias",
+      "memory.belief_memory.token_proj.1.weight",
+      "memory.belief_memory.token_proj.1.bias",
+      "decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.0.linear1.weight",
+      "decoder.arm_decoder.layers.0.linear1.bias",
+      "decoder.arm_decoder.layers.0.linear2.weight",
+      "decoder.arm_decoder.layers.0.linear2.bias",
+      "decoder.arm_decoder.layers.0.norm1.weight",
+      "decoder.arm_decoder.layers.0.norm1.bias",
+      "decoder.arm_decoder.layers.0.norm2.weight",
+      "decoder.arm_decoder.layers.0.norm2.bias",
+      "decoder.arm_decoder.layers.0.norm3.weight",
+      "decoder.arm_decoder.layers.0.norm3.bias",
+      "decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.1.linear1.weight",
+      "decoder.arm_decoder.layers.1.linear1.bias",
+      "decoder.arm_decoder.layers.1.linear2.weight",
+      "decoder.arm_decoder.layers.1.linear2.bias",
+      "decoder.arm_decoder.layers.1.norm1.weight",
+      "decoder.arm_decoder.layers.1.norm1.bias",
+      "decoder.arm_decoder.layers.1.norm2.weight",
+      "decoder.arm_decoder.layers.1.norm2.bias",
+      "decoder.arm_decoder.layers.1.norm3.weight",
+      "decoder.arm_decoder.layers.1.norm3.bias",
+      "decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.2.linear1.weight",
+      "decoder.arm_decoder.layers.2.linear1.bias",
+      "decoder.arm_decoder.layers.2.linear2.weight",
+      "decoder.arm_decoder.layers.2.linear2.bias",
+      "decoder.arm_decoder.layers.2.norm1.weight",
+      "decoder.arm_decoder.layers.2.norm1.bias",
+      "decoder.arm_decoder.layers.2.norm2.weight",
+      "decoder.arm_decoder.layers.2.norm2.bias",
+      "decoder.arm_decoder.layers.2.norm3.weight",
+      "decoder.arm_decoder.layers.2.norm3.bias",
+      "decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.arm_decoder.layers.3.linear1.weight",
+      "decoder.arm_decoder.layers.3.linear1.bias",
+      "decoder.arm_decoder.layers.3.linear2.weight",
+      "decoder.arm_decoder.layers.3.linear2.bias",
+      "decoder.arm_decoder.layers.3.norm1.weight",
+      "decoder.arm_decoder.layers.3.norm1.bias",
+      "decoder.arm_decoder.layers.3.norm2.weight",
+      "decoder.arm_decoder.layers.3.norm2.bias",
+      "decoder.arm_decoder.layers.3.norm3.weight",
+      "decoder.arm_decoder.layers.3.norm3.bias",
+      "decoder.arm_identity.weight",
+      "decoder.phase_adapter.weight",
+      "decoder.phase_adapter.bias",
+      "decoder.role_adapter.weight",
+      "decoder.role_adapter.bias",
+      "decoder.context_proj.0.weight",
+      "decoder.context_proj.0.bias",
+      "decoder.context_proj.1.weight",
+      "decoder.context_proj.1.bias",
+      "decoder.arm_head.0.weight",
+      "decoder.arm_head.0.bias",
+      "decoder.arm_head.1.weight",
+      "decoder.arm_head.1.bias",
+      "decoder.arm_mean.weight",
+      "decoder.arm_mean.bias",
+      "decoder.arm_log_std.weight",
+      "decoder.arm_log_std.bias",
+      "decoder.proposal_mode_head.0.weight",
+      "decoder.proposal_mode_head.0.bias",
+      "decoder.proposal_mode_head.1.weight",
+      "decoder.proposal_mode_head.1.bias",
+      "decoder.proposal_mode_head.3.weight",
+      "decoder.proposal_mode_head.3.bias",
+      "decoder.proposal_mode_embeddings.weight",
+      "decoder.proposal_slot_embeddings.weight",
+      "decoder.mode_residual_heads.0.0.weight",
+      "decoder.mode_residual_heads.0.0.bias",
+      "decoder.mode_residual_heads.0.1.weight",
+      "decoder.mode_residual_heads.0.1.bias",
+      "decoder.mode_residual_heads.0.3.weight",
+      "decoder.mode_residual_heads.0.3.bias",
+      "decoder.mode_residual_heads.1.0.weight",
+      "decoder.mode_residual_heads.1.0.bias",
+      "decoder.mode_residual_heads.1.1.weight",
+      "decoder.mode_residual_heads.1.1.bias",
+      "decoder.mode_residual_heads.1.3.weight",
+      "decoder.mode_residual_heads.1.3.bias",
+      "decoder.mode_residual_heads.2.0.weight",
+      "decoder.mode_residual_heads.2.0.bias",
+      "decoder.mode_residual_heads.2.1.weight",
+      "decoder.mode_residual_heads.2.1.bias",
+      "decoder.mode_residual_heads.2.3.weight",
+      "decoder.mode_residual_heads.2.3.bias",
+      "decoder.mode_residual_heads.3.0.weight",
+      "decoder.mode_residual_heads.3.0.bias",
+      "decoder.mode_residual_heads.3.1.weight",
+      "decoder.mode_residual_heads.3.1.bias",
+      "decoder.mode_residual_heads.3.3.weight",
+      "decoder.mode_residual_heads.3.3.bias",
+      "decoder.mode_residual_heads.4.0.weight",
+      "decoder.mode_residual_heads.4.0.bias",
+      "decoder.mode_residual_heads.4.1.weight",
+      "decoder.mode_residual_heads.4.1.bias",
+      "decoder.mode_residual_heads.4.3.weight",
+      "decoder.mode_residual_heads.4.3.bias",
+      "decoder.mode_residual_heads.5.0.weight",
+      "decoder.mode_residual_heads.5.0.bias",
+      "decoder.mode_residual_heads.5.1.weight",
+      "decoder.mode_residual_heads.5.1.bias",
+      "decoder.mode_residual_heads.5.3.weight",
+      "decoder.mode_residual_heads.5.3.bias",
+      "decoder.slot_delta.0.weight",
+      "decoder.slot_delta.0.bias",
+      "decoder.slot_delta.1.weight",
+      "decoder.slot_delta.1.bias",
+      "decoder.slot_delta.3.weight",
+      "decoder.slot_delta.3.bias",
+      "decoder.proposal_score.0.weight",
+      "decoder.proposal_score.0.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias",
+      "decoder.proposal_score.3.weight",
+      "decoder.proposal_score.3.bias",
+      "elastic_state_head.interaction_queries",
+      "elastic_state_head.interaction_attention.in_proj_weight",
+      "elastic_state_head.interaction_attention.in_proj_bias",
+      "elastic_state_head.interaction_attention.out_proj.weight",
+      "elastic_state_head.interaction_attention.out_proj.bias",
+      "elastic_state_head.interaction_mlp.0.weight",
+      "elastic_state_head.interaction_mlp.0.bias",
+      "elastic_state_head.interaction_mlp.1.weight",
+      "elastic_state_head.interaction_mlp.1.bias",
+      "elastic_state_head.interaction_mlp.3.weight",
+      "elastic_state_head.interaction_mlp.3.bias",
+      "elastic_state_head.decoder.field_queries",
+      "elastic_state_head.decoder.field_attention.in_proj_weight",
+      "elastic_state_head.decoder.field_attention.in_proj_bias",
+      "elastic_state_head.decoder.field_attention.out_proj.weight",
+      "elastic_state_head.decoder.field_attention.out_proj.bias",
+      "elastic_state_head.decoder.field_mlp.0.weight",
+      "elastic_state_head.decoder.field_mlp.0.bias",
+      "elastic_state_head.decoder.field_mlp.1.weight",
+      "elastic_state_head.decoder.field_mlp.1.bias",
+      "elastic_state_head.decoder.field_mlp.3.weight",
+      "elastic_state_head.decoder.field_mlp.3.bias",
+      "elastic_state_head.decoder.summary_proj.0.weight",
+      "elastic_state_head.decoder.summary_proj.0.bias",
+      "elastic_state_head.decoder.summary_proj.1.weight",
+      "elastic_state_head.decoder.summary_proj.1.bias",
+      "elastic_state_head.decoder.phase_head.0.weight",
+      "elastic_state_head.decoder.phase_head.0.bias",
+      "elastic_state_head.decoder.phase_head.1.weight",
+      "elastic_state_head.decoder.phase_head.1.bias",
+      "elastic_state_head.decoder.phase_head.3.weight",
+      "elastic_state_head.decoder.phase_head.3.bias",
+      "elastic_state_head.decoder.arm_role_head.0.weight",
+      "elastic_state_head.decoder.arm_role_head.0.bias",
+      "elastic_state_head.decoder.arm_role_head.1.weight",
+      "elastic_state_head.decoder.arm_role_head.1.bias",
+      "elastic_state_head.decoder.arm_role_head.3.weight",
+      "elastic_state_head.decoder.arm_role_head.3.bias",
+      "elastic_state_head.decoder.arm_identity.weight",
+      "elastic_state_head.decoder.support_mode.0.weight",
+      "elastic_state_head.decoder.support_mode.0.bias",
+      "elastic_state_head.decoder.support_mode.1.weight",
+      "elastic_state_head.decoder.support_mode.1.bias",
+      "elastic_state_head.decoder.support_mode.3.weight",
+      "elastic_state_head.decoder.support_mode.3.bias",
+      "elastic_state_head.decoder.access_field.weight",
+      "elastic_state_head.decoder.access_field.bias",
+      "elastic_state_head.decoder.target_belief_field.weight",
+      "elastic_state_head.decoder.target_belief_field.bias",
+      "elastic_state_head.decoder.visibility_field.weight",
+      "elastic_state_head.decoder.visibility_field.bias",
+      "elastic_state_head.decoder.clearance_field.weight",
+      "elastic_state_head.decoder.clearance_field.bias",
+      "elastic_state_head.decoder.occluder_contact_field.weight",
+      "elastic_state_head.decoder.occluder_contact_field.bias",
+      "elastic_state_head.decoder.grasp_affordance_field.weight",
+      "elastic_state_head.decoder.grasp_affordance_field.bias",
+      "elastic_state_head.decoder.support_stability_field.weight",
+      "elastic_state_head.decoder.support_stability_field.bias",
+      "elastic_state_head.decoder.persistence_field.weight",
+      "elastic_state_head.decoder.persistence_field.bias",
+      "elastic_state_head.decoder.reocclusion_field.weight",
+      "elastic_state_head.decoder.reocclusion_field.bias",
+      "elastic_state_head.decoder.disturbance_field.weight",
+      "elastic_state_head.decoder.disturbance_field.bias",
+      "elastic_state_head.decoder.uncertainty_field.weight",
+      "elastic_state_head.decoder.uncertainty_field.bias",
+      "elastic_state_head.decoder.reocclusion_head.0.weight",
+      "elastic_state_head.decoder.reocclusion_head.0.bias",
+      "elastic_state_head.decoder.reocclusion_head.1.weight",
+      "elastic_state_head.decoder.reocclusion_head.1.bias",
+      "elastic_state_head.decoder.reocclusion_head.3.weight",
+      "elastic_state_head.decoder.reocclusion_head.3.bias",
+      "world_model.state_encoder.0.weight",
+      "world_model.state_encoder.0.bias",
+      "world_model.state_encoder.1.weight",
+      "world_model.state_encoder.1.bias",
+      "world_model.scene_memory_proj.0.weight",
+      "world_model.scene_memory_proj.0.bias",
+      "world_model.scene_memory_proj.1.weight",
+      "world_model.scene_memory_proj.1.bias",
+      "world_model.belief_memory_proj.0.weight",
+      "world_model.belief_memory_proj.0.bias",
+      "world_model.belief_memory_proj.1.weight",
+      "world_model.belief_memory_proj.1.bias",
+      "world_model.action_encoder.0.weight",
+      "world_model.action_encoder.0.bias",
+      "world_model.action_encoder.1.weight",
+      "world_model.action_encoder.1.bias",
+      "world_model.transition.weight_ih",
+      "world_model.transition.weight_hh",
+      "world_model.transition.bias_ih",
+      "world_model.transition.bias_hh",
+      "world_model.scene_memory_update.weight",
+      "world_model.scene_memory_update.bias",
+      "world_model.belief_memory_update.weight",
+      "world_model.belief_memory_update.bias",
+      "world_model.compact_decoder.weight",
+      "world_model.compact_decoder.bias",
+      "world_model.target_belief_head.weight",
+      "world_model.target_belief_head.bias",
+      "world_model.visibility_head.weight",
+      "world_model.visibility_head.bias",
+      "world_model.clearance_head.weight",
+      "world_model.clearance_head.bias",
+      "world_model.occluder_contact_head.weight",
+      "world_model.occluder_contact_head.bias",
+      "world_model.grasp_affordance_head.weight",
+      "world_model.grasp_affordance_head.bias",
+      "world_model.support_stability_head.weight",
+      "world_model.support_stability_head.bias",
+      "world_model.persistence_head.weight",
+      "world_model.persistence_head.bias",
+      "world_model.reocclusion_head.weight",
+      "world_model.reocclusion_head.bias",
+      "world_model.disturbance_head.weight",
+      "world_model.disturbance_head.bias",
+      "world_model.uncertainty_head.weight",
+      "world_model.uncertainty_head.bias",
+      "world_model.access_head.weight",
+      "world_model.access_head.bias",
+      "planner.residual.trunk.0.weight",
+      "planner.residual.trunk.0.bias",
+      "planner.residual.trunk.1.weight",
+      "planner.residual.trunk.1.bias",
+      "planner.residual.trunk.3.weight",
+      "planner.residual.trunk.3.bias",
+      "planner.residual.success_head.weight",
+      "planner.residual.success_head.bias",
+      "planner.residual.risk_head.weight",
+      "planner.residual.risk_head.bias",
+      "planner.residual.residual_head.weight",
+      "planner.residual.residual_head.bias"
+    ],
+    "unexpected_keys": []
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/benchmark_full/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/benchmark_full/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..4f97be52baa009b311fe02a1f75ca1a35ce4bd1d
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/benchmark_full/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.5,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.7083333333333334
+    },
+    "mean_success": 0.5833333333333334,
+    "visibility_integral": 32.27142822328541,
+    "corridor_availability": 0.882228939069642,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.1414308214317197,
+    "disturbance_cost": 0.3078485221550282
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/benchmark_full/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/benchmark_full/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..62fbc289c590765dbb96c23298be04df438e5364
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/benchmark_full/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/checkpoint_best.pt
+- mean_success: 0.583
+- visibility_integral: 32.271
+- corridor_availability: 0.882
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.141
+- disturbance_cost: 0.308
+- foliage_proxy_success: 0.500
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.708
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/benchmark_no_depth/reveal_benchmark.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/benchmark_no_depth/reveal_benchmark.json
new file mode 100644
index 0000000000000000000000000000000000000000..e1233443a30b93ddf34d7edeacd8420a5bfc4934
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/benchmark_no_depth/reveal_benchmark.json
@@ -0,0 +1,15 @@
+{
+  "full": {
+    "per_task_success": {
+      "foliage_proxy": 0.4583333333333333,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.7083333333333334
+    },
+    "mean_success": 0.5694444444444445,
+    "visibility_integral": 31.285325296223164,
+    "corridor_availability": 0.8095407477683492,
+    "reocclusion_rate": 0.013385315139701101,
+    "persistence_horizon_mae": 2.936828779474813,
+    "disturbance_cost": 0.3132026158790622
+  }
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/benchmark_no_depth/reveal_benchmark.md b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/benchmark_no_depth/reveal_benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..89f8f4c2eced501a98daf0e36f47cfdc54aea19f
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/benchmark_no_depth/reveal_benchmark.md
@@ -0,0 +1,13 @@
+# Reveal Proxy Benchmark
+
+## full
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/checkpoint_best.pt
+- mean_success: 0.569
+- visibility_integral: 31.285
+- corridor_availability: 0.810
+- reocclusion_rate: 0.013
+- persistence_horizon_mae: 2.937
+- disturbance_cost: 0.313
+- foliage_proxy_success: 0.458
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.708
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/config_resolved.yaml b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f95b269273891af66f01204fdc3d8d102945fa27
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/config_resolved.yaml
@@ -0,0 +1,149 @@
+experiment_name: proxy_interaction_r3d_stage3_clip_rgbd_seed18
+output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
+device: cuda
+seed: 18
+init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
+init_strict: false
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 224
+  dataset_version: reveal_proxy_v6_rgbd_elastic_state
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3_seed18.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_seed18.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 18
+optim:
+  epochs: 4
+  batch_size: 2
+  num_workers: 4
+  lr: 0.0003
+  weight_decay: 0.0001
+trainer:
+  policy_type: elastic_reveal
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+  use_depth: true
+  use_world_model: true
+  use_role_tokens: true
+  compute_equivariance_probe: true
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 512
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: false
+  fusion:
+    hidden_dim: 512
+    num_cameras: 3
+    num_layers: 4
+    num_heads: 8
+    ff_dim: 2048
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 512
+    action_dim: 14
+    history_steps: 6
+    scene_history_steps: 3
+    belief_history_steps: 8
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    scene_bank_size: 2
+    belief_bank_size: 2
+    num_heads: 8
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 4
+    ff_dim: 2048
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_proposal_modes: 6
+    planner_top_k: 4
+  reveal_head:
+    hidden_dim: 512
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 8
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 512
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+    scene_bank_size: 2
+    belief_bank_size: 2
+  planner:
+    hidden_dim: 512
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 8
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+    top_k: 4
+loss_weights:
+  action: 1.0
+  phase: 0.1
+  arm_role: 0.15
+  support_mode: 0.1
+  corridor: 0.15
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.25
+  belief: 0.05
+  visibility: 0.05
+  clearance: 0.05
+  support_stability: 0.05
+  reocclusion: 0.05
+  occluder_contact: 0.05
+  grasp_affordance: 0.05
+  planner_success: 0.25
+  planner_risk: 0.1
+  planner_ranking: 0.2
+  proposal_reconstruction: 0.1
+  proposal_success: 0.15
+  proposal_ranking: 0.2
+  proposal_diversity: 0.05
+  role_swap_consistency: 0.05
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/diagnostics_full/proxy_diagnostics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/diagnostics_full/proxy_diagnostics.json
new file mode 100644
index 0000000000000000000000000000000000000000..47717f873e78fbe6821e9076495a55f15214b0d4
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/diagnostics_full/proxy_diagnostics.json
@@ -0,0 +1,16 @@
+{
+  "planner_top1_accuracy": 0.32575757575757575,
+  "planner_regret": 0.013780632056295872,
+  "planner_score_utility_spearman": 0.2590909004211426,
+  "risk_calibration_mse": 0.011394849047064781,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.021189916878938675,
+  "left_right_equivariance_error": 4.1925632985746205e-05,
+  "belief_calibration_brier": 0.004418548196554184,
+  "reocclusion_calibration_brier": 0.2664291560649872,
+  "support_stability_mae": 0.03443893417716026,
+  "clearance_auc": 0.7519833077975241,
+  "memory_write_rate": 0.0,
+  "memory_saturation": 0.3426786959171295,
+  "num_samples": 132
+}
\ No newline at end of file
diff --git a/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/metrics.json b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/metrics.json
new file mode 100644
index 0000000000000000000000000000000000000000..d2a93ea85f23aad4210a8f0ceb474539d9c90cb8
--- /dev/null
+++ b/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/metrics.json
@@ -0,0 +1,230 @@
+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.025857772268549933,
+      "arm_role": 0.03370982127663977,
+      "belief": 0.11251551973882146,
+      "clearance": 0.09382800325868329,
+      "corridor": 0.27670082988227224,
+      "disturbance": 0.0059105119088080135,
+      "grasp_affordance": 0.01767918948786265,
+      "occluder_contact": 0.20790174291396016,
+      "persistence": 7.249429424941852,
+      "phase": 0.7393677396924084,
+      "planner_ranking": 0.18070141599557474,
+      "planner_risk": 0.014856026590806651,
+      "planner_success": 0.6360785897177551,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.2621668352506548,
+      "proposal_reconstruction": 0.06770114458278212,
+      "proposal_success": 0.6843253252393913,
+      "reocclusion": 0.733561779504047,
+      "role_swap_consistency": 0.0005007284111038572,
+      "support_mode": 0.7864344731675392,
+      "support_stability": 0.15464979819270797,
+      "total": 1.8893053952936103,
+      "uncertainty": 0.026158929999575815,
+      "visibility": 0.10713219952754949,
+      "world_model": 2.6855637587177816
+    },
+    "val": {
+      "action": 0.023218068632889877,
+      "arm_role": 7.036943040447747e-06,
+      "belief": 0.09010097717471195,
+      "clearance": 0.07386604707800981,
+      "corridor": 0.2412589482511535,
+      "disturbance": 0.002226812243944468,
+      "grasp_affordance": 0.009947083184890675,
+      "occluder_contact": 0.1953272647929914,
+      "persistence": 4.425601812926206,
+      "phase": 0.7730295369119355,
+      "planner_ranking": 0.04223714639947778,
+      "planner_risk": 0.01139929191435151,
+      "planner_success": 0.5929040561119715,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.185825102257006,
+      "proposal_reconstruction": 0.06571822122416714,
+      "proposal_success": 0.6717942483497389,
+      "reocclusion": 0.6932443497758923,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.726759416587425,
+      "support_stability": 0.14309178433860792,
+      "total": 1.6142820137919802,
+      "uncertainty": 0.007038127842613242,
+      "visibility": 0.08926947330209342,
+      "world_model": 2.4654864957838347
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.022110589677516702,
+      "arm_role": 1.3950919605674544e-05,
+      "belief": 0.10294150522086008,
+      "clearance": 0.08084176621199902,
+      "corridor": 0.2462356197038246,
+      "disturbance": 0.0031714126446295934,
+      "grasp_affordance": 0.00960333755054555,
+      "occluder_contact": 0.20049099460322195,
+      "persistence": 4.177739099682314,
+      "phase": 0.7043367555628273,
+      "planner_ranking": 0.04967470209174159,
+      "planner_risk": 0.01318734941372638,
+      "planner_success": 0.5723307314499511,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.150115387289936,
+      "proposal_reconstruction": 0.0644153186281002,
+      "proposal_success": 0.6753117866541078,
+      "reocclusion": 0.6987067278455065,
+      "role_swap_consistency": 0.00042066709767766197,
+      "support_mode": 0.6952358066099477,
+      "support_stability": 0.14108095217145084,
+      "total": 1.5119679860419628,
+      "uncertainty": 0.004842953095748942,
+      "visibility": 0.1003879376184878,
+      "world_model": 2.1801892893476635
+    },
+    "val": {
+      "action": 0.022499137826151018,
+      "arm_role": 6.598947992174406e-05,
+      "belief": 0.10226583751765164,
+      "clearance": 0.07544161554313067,
+      "corridor": 0.24163203528433136,
+      "disturbance": 0.001789042631902372,
+      "grasp_affordance": 0.008704299170693213,
+      "occluder_contact": 0.19236745504718838,
+      "persistence": 4.6868142503680605,
+      "phase": 0.6602843403816223,
+      "planner_ranking": 0.04781789132296884,
+      "planner_risk": 0.011765290400724167,
+      "planner_success": 0.5356740147778483,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1275392030224656,
+      "proposal_reconstruction": 0.06496855588347623,
+      "proposal_success": 0.6853868401411808,
+      "reocclusion": 0.7616267091397083,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6959606806437174,
+      "support_stability": 0.14195433682338757,
+      "total": 1.5516490313139828,
+      "uncertainty": 0.002906694681962218,
+      "visibility": 0.09106272707382838,
+      "world_model": 2.2986758297139946
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.02224867461317497,
+      "arm_role": 1.0064135047153653e-05,
+      "belief": 0.101081568201643,
+      "clearance": 0.08345475727545966,
+      "corridor": 0.2517957490776222,
+      "disturbance": 0.0029570121168599525,
+      "grasp_affordance": 0.009918136390520952,
+      "occluder_contact": 0.20453518028346657,
+      "persistence": 4.101219920899855,
+      "phase": 0.6840078943062827,
+      "planner_ranking": 0.039329256105235716,
+      "planner_risk": 0.01054730894868701,
+      "planner_success": 0.5327235887031905,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.13903670847728,
+      "proposal_reconstruction": 0.0643397857342403,
+      "proposal_success": 0.6784668517986517,
+      "reocclusion": 0.7002001897826868,
+      "role_swap_consistency": 0.0007044284810045129,
+      "support_mode": 0.6855724394633508,
+      "support_stability": 0.14184507578416333,
+      "total": 1.4588422775268555,
+      "uncertainty": 0.00190604528785849,
+      "visibility": 0.0957828067360124,
+      "world_model": 2.046591355538493
+    },
+    "val": {
+      "action": 0.022229419372072727,
+      "arm_role": 3.2403206591190994e-06,
+      "belief": 0.09474777622204839,
+      "clearance": 0.0783396718854254,
+      "corridor": 0.23935590486860636,
+      "disturbance": 0.0021153995247996963,
+      "grasp_affordance": 0.0088278352649826,
+      "occluder_contact": 0.19912470809438013,
+      "persistence": 3.8268655773365134,
+      "phase": 0.6582440318483295,
+      "planner_ranking": 0.03990731150164114,
+      "planner_risk": 0.011713616791618706,
+      "planner_success": 0.5134278662276991,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1207696849649602,
+      "proposal_reconstruction": 0.06454906855342966,
+      "proposal_success": 0.6825091883991704,
+      "reocclusion": 0.6904694817282937,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.654139602726156,
+      "support_stability": 0.1375041804699735,
+      "total": 1.509128962502335,
+      "uncertainty": 0.0008296288133627086,
+      "visibility": 0.0897342140475909,
+      "world_model": 2.3714701876495825
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.021563103183596857,
+      "arm_role": 9.534371460919605e-05,
+      "belief": 0.11680854938885304,
+      "clearance": 0.09178280478333616,
+      "corridor": 0.3002950780047043,
+      "disturbance": 0.003815645619568824,
+      "grasp_affordance": 0.019201423960654992,
+      "occluder_contact": 0.2269966553955178,
+      "persistence": 3.9096838283699666,
+      "phase": 0.6496198564299738,
+      "planner_ranking": 0.03442381335639937,
+      "planner_risk": 0.011457197463837164,
+      "planner_success": 0.5073940541341667,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1320187015683238,
+      "proposal_reconstruction": 0.06363906914182983,
+      "proposal_success": 0.6781648984130141,
+      "reocclusion": 0.6639381964491066,
+      "role_swap_consistency": 0.0008551519205571269,
+      "support_mode": 0.6192906564442899,
+      "support_stability": 0.15393702852593355,
+      "total": 1.4377938396643595,
+      "uncertainty": 0.007347671914954727,
+      "visibility": 0.11880907560674307,
+      "world_model": 2.0381793492127462
+    },
+    "val": {
+      "action": 0.025203945693757498,
+      "arm_role": 1.054821041179821e-06,
+      "belief": 0.11612348100452712,
+      "clearance": 0.10017752489357283,
+      "corridor": 0.3126967510942257,
+      "disturbance": 0.005627329672058819,
+      "grasp_affordance": 0.018004601313309235,
+      "occluder_contact": 0.2143698472416762,
+      "persistence": 3.829360609704798,
+      "phase": 0.7265884849158201,
+      "planner_ranking": 0.03285199802378498,
+      "planner_risk": 0.011394622192407647,
+      "planner_success": 0.4938382957920884,
+      "proposal_diversity": 0.0,
+      "proposal_ranking": 1.1160721056389087,
+      "proposal_reconstruction": 0.06633412369498701,
+      "proposal_success": 0.6799869302547339,
+      "reocclusion": 0.6805009720000353,
+      "role_swap_consistency": 0.0,
+      "support_mode": 0.6576023919112755,
+      "support_stability": 0.16122697604199251,
+      "total": 1.4556549372095051,
+      "uncertainty": 0.007743606238033284,
+      "visibility": 0.12334947400923932,
+      "world_model": 2.0783897286111657
+    }
+  }
+]
\ No newline at end of file
diff --git a/artifacts/outputs/r3d_smoke/smoke_checks.json b/artifacts/outputs/r3d_smoke/smoke_checks.json
new file mode 100644
index 0000000000000000000000000000000000000000..5003b8224d87f419882be0ae52b2401a1320a0e4
--- /dev/null
+++ b/artifacts/outputs/r3d_smoke/smoke_checks.json
@@ -0,0 +1,225 @@
+{
+  "proxy": {
+    "losses": {
+      "action": 0.464199423789978,
+      "phase": 1.8022403717041016,
+      "arm_role": 3.169889450073242,
+      "support_mode": 1.0952036380767822,
+      "corridor": 0.6662508249282837,
+      "persistence": 3.4530017375946045,
+      "disturbance": 0.19386449456214905,
+      "belief": 0.7597116827964783,
+      "visibility": 0.0,
+      "clearance": 0.0,
+      "support_stability": 0.0,
+      "occluder_contact": 0.0,
+      "grasp_affordance": 0.0,
+      "reocclusion": 0.67914879322052,
+      "uncertainty": 0.8255214691162109,
+      "world_model": 8.876996040344238,
+      "planner_success": 0.7317572236061096,
+      "planner_risk": 0.0804181918501854,
+      "planner_ranking": 1.3189352750778198,
+      "proposal_reconstruction": 0.5751029849052429,
+      "proposal_success": 0.7016865015029907,
+      "proposal_ranking": 0.8174347877502441,
+      "proposal_diversity": 0.0,
+      "role_swap_consistency": 0.0,
+      "total": 2.791285991668701
+    },
+    "grad_norm": 3.0634233951568604,
+    "candidate_shape": [
+      2,
+      8,
+      8,
+      14
+    ],
+    "rollout_phase_shape": [
+      2,
+      8,
+      8,
+      5
+    ]
+  },
+  "rlbench": {
+    "losses": {
+      "action": 0.9079780578613281,
+      "world_model": 0.0,
+      "planner_success": 0.0,
+      "planner_risk": 0.0,
+      "planner_ranking": 0.0,
+      "proposal_reconstruction": 0.0,
+      "proposal_success": 0.0,
+      "proposal_ranking": 0.0,
+      "proposal_diversity": 0.0,
+      "role_swap_consistency": 0.0,
+      "total": 0.9079780578613281
+    },
+    "grad_norm": 3.2202138900756836,
+    "candidate_shape": [
+      2,
+      4,
+      8,
+      14
+    ],
+    "rollout_phase_shape": [
+      2,
+      4,
+      8,
+      5
+    ],
+    "planner_enabled_for_eval": true,
+    "frozen_modules": [
+      "interaction_head",
+      "world_model",
+      "planner"
+    ]
+  },
+  "elastic_reveal": {
+    "rgb_only_candidate_shape": [
+      2,
+      4,
+      8,
+      14
+    ],
+    "rgbd_candidate_shape": [
+      2,
+      4,
+      8,
+      14
+    ],
+    "rgbd_topk_shape": [
+      2,
+      4
+    ],
+    "rgbd_rollout_shape": [
+      2,
+      4,
+      8,
+      1,
+      16,
+      16
+    ],
+    "noplanner_chunk_shape": [
+      2,
+      8,
+      14
+    ],
+    "equivariance_probe_shape": [
+      2,
+      8,
+      14
+    ],
+    "dataset_v6_missing_fields": []
+  },
+  "policy_config": {
+    "backbone": {
+      "model_name": "openai/clip-vit-base-patch32",
+      "hidden_dim": 64,
+      "max_text_tokens": 32,
+      "freeze_backbone": true,
+      "gradient_checkpointing": false,
+      "use_dummy_backbone": true,
+      "depth_patch_size": 16,
+      "geometry_feature_dim": 8,
+      "use_camera_geometry": true
+    },
+    "fusion": {
+      "hidden_dim": 64,
+      "num_cameras": 3,
+      "num_layers": 2,
+      "num_heads": 4,
+      "ff_dim": 128,
+      "dropout": 0.1,
+      "proprio_dim": 32,
+      "proprio_tokens": 1,
+      "geometry_num_heads": 4
+    },
+    "memory": {
+      "hidden_dim": 64,
+      "action_dim": 14,
+      "history_steps": 6,
+      "num_layers": 2,
+      "dropout": 0.1,
+      "memory_bank_size": 4,
+      "num_heads": 4,
+      "max_history_steps": 8,
+      "scene_bank_size": 2,
+      "belief_bank_size": 2,
+      "scene_history_steps": 3,
+      "belief_history_steps": 8,
+      "memory_write_threshold": 0.45,
+      "memory_suppression_margin": 0.05
+    },
+    "decoder": {
+      "hidden_dim": 64,
+      "num_heads": 4,
+      "num_layers": 2,
+      "ff_dim": 128,
+      "dropout": 0.1,
+      "chunk_size": 8,
+      "action_dim": 14,
+      "arm_action_dim": 7,
+      "num_candidates": 4,
+      "num_phases": 5,
+      "num_arm_roles": 4,
+      "num_proposal_modes": 6,
+      "planner_top_k": 4
+    },
+    "reveal_head": {
+      "hidden_dim": 64,
+      "num_support_modes": 3,
+      "num_approach_templates": 32,
+      "rollout_horizon": 3,
+      "belief_map_size": 32,
+      "field_size": 16,
+      "num_heads": 4,
+      "predict_belief_map": true,
+      "num_phases": 5,
+      "num_arm_roles": 4,
+      "num_interaction_tokens": 8
+    },
+    "world_model": {
+      "hidden_dim": 64,
+      "action_dim": 14,
+      "num_support_modes": 3,
+      "num_approach_templates": 32,
+      "rollout_horizon": 3,
+      "field_size": 16,
+      "num_heads": 4,
+      "num_phases": 5,
+      "num_arm_roles": 4,
+      "num_interaction_tokens": 8,
+      "belief_map_size": 32,
+      "predict_belief_map": true,
+      "scene_bank_size": 2,
+      "belief_bank_size": 2
+    },
+    "planner": {
+      "hidden_dim": 64,
+      "num_candidates": 4,
+      "action_dim": 14,
+      "num_support_modes": 3,
+      "utility_margin": 0.1,
+      "corridor_weight": 1.0,
+      "persistence_weight": 0.5,
+      "proposal_weight": 0.5,
+      "task_progress_weight": 0.75,
+      "disturbance_weight": 0.75,
+      "reocclusion_weight": 0.5,
+      "visibility_weight": 0.25,
+      "num_heads": 4,
+      "num_layers": 2,
+      "num_phases": 5,
+      "num_arm_roles": 4,
+      "top_k": 4,
+      "belief_gain_weight": 1.0,
+      "visibility_gain_weight": 0.75,
+      "clearance_weight": 0.75,
+      "occluder_contact_weight": 0.5,
+      "grasp_affordance_weight": 0.75,
+      "support_stability_weight": 0.5,
+      "residual_weight": 0.5
+    }
+  }
+}
\ No newline at end of file