Fix null-rollout world-model ablation

Browse files

Files changed (6) hide show

.gitignore +4 -0
README.md +2 -0
code/reveal_vla_bimanual/models/policy.py +52 -17
code/reveal_vla_bimanual/train/losses.py +1 -1
results/phase_tracking.md +2 -2
tests/test_policy_topk_cascade.py +43 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+__pycache__/
+.pytest_cache/
+.cache/
+*.pyc

README.md CHANGED Viewed

@@ -116,6 +116,8 @@ Bundle uploaded from the `/workspace` runpod session dated `2026-03-25 UTC`.
 Full artifact roots are indexed in `MODEL_INDEX.md`.
 ## Raw Training Summaries
 | Run | Mean train time (s) | Mean peak GPU memory (MB) |

 Full artifact roots are indexed in `MODEL_INDEX.md`.
+Note: the stored `stage2 dummy no_world_model` row above was produced before the `2026-03-25` null-rollout ablation fix in `ElasticRevealBimanualPolicy`. The raw artifact is retained unchanged, but it should be rerun before using it as a fair world-model comparison.
 ## Raw Training Summaries
 | Run | Mean train time (s) | Mean peak GPU memory (MB) |

code/reveal_vla_bimanual/models/policy.py CHANGED Viewed

@@ -485,6 +485,28 @@ class ElasticRevealBimanualPolicy(BackboneOnlyPolicy):
                 tiled[key] = self._tile_tensor(value, num_candidates)
         return tiled
     def forward(
         self,
         images: Tensor,
@@ -570,6 +592,7 @@ class ElasticRevealBimanualPolicy(BackboneOnlyPolicy):
             "reveal_state": elastic_state,
             "view_summaries": scene_output["view_summaries"],
             "geometry_summaries": scene_output["geometry_summaries"],
         }
         candidate_chunks = candidate_chunks_override
@@ -607,34 +630,45 @@ class ElasticRevealBimanualPolicy(BackboneOnlyPolicy):
         batch_size = candidate_chunks.shape[0]
         batch_indices = torch.arange(batch_size, device=candidate_chunks.device).unsqueeze(-1)
         topk_candidates = candidate_chunks[batch_indices, shortlist_indices]
         outputs["planner_topk_candidates"] = topk_candidates
         if proposal_logits is not None:
             topk_proposal_logits = proposal_logits.gather(1, shortlist_indices)
         else:
             topk_proposal_logits = None
         if not use_world_model:
-            score_source = topk_proposal_logits if topk_proposal_logits is not None else -topk_candidates.square().mean(dim=(-1, -2))
-            best_local = score_source.argmax(dim=-1)
-            best_indices = shortlist_indices[torch.arange(batch_size, device=best_local.device), best_local]
-            outputs["planned_chunk"] = candidate_chunks[torch.arange(batch_size, device=best_local.device), best_indices]
-            outputs["planned_rollout"] = {}
-            outputs["planner_success_logits"] = torch.zeros_like(score_source)
-            outputs["planner_risk_values"] = torch.zeros_like(score_source)
-            outputs["planner_scores"] = score_source
-            outputs["best_candidate_indices"] = best_indices
-            outputs["utility_structured"] = score_source
-            outputs["utility_residual"] = torch.zeros_like(score_source)
-            outputs["utility_total"] = score_source
             return outputs
-        num_topk = topk_candidates.shape[1]
         flat_chunks = topk_candidates.view(batch_size * num_topk, topk_candidates.shape[2], topk_candidates.shape[3])
         tiled_scene = self._tile_tensor(scene_tokens, num_topk)
-        planning_state = elastic_state
-        if not support_mode_conditioning:
-            planning_state = dict(elastic_state)
-            planning_state["support_mode_logits"] = torch.zeros_like(elastic_state["support_mode_logits"])
         tiled_state = self._tile_state(planning_state, num_topk)
         rollout = self.world_model(
             scene_tokens=tiled_scene,
@@ -664,4 +698,5 @@ class ElasticRevealBimanualPolicy(BackboneOnlyPolicy):
         outputs["utility_residual"] = selected["utility_residual"]
         outputs["utility_total"] = selected["utility_total"]
         outputs["ranking_diagnostics"] = selected["ranking_diagnostics"]
         return outputs

                 tiled[key] = self._tile_tensor(value, num_candidates)
         return tiled
+    def _detach_state(self, state: dict[str, Tensor]) -> dict[str, Tensor]:
+        detached: dict[str, Tensor] = {}
+        for key, value in state.items():
+            detached[key] = value.detach() if isinstance(value, Tensor) else value
+        return detached
+    def _repeat_rollout_tensor(self, value: Tensor, num_candidates: int, horizon: int) -> Tensor:
+        value = value.detach()
+        return value.unsqueeze(1).unsqueeze(2).expand(-1, num_candidates, horizon, *value.shape[1:])
+    def _identity_rollout(
+        self,
+        interaction_state: dict[str, Tensor],
+        num_candidates: int,
+    ) -> dict[str, Tensor]:
+        horizon = max(1, self.config.world_model.rollout_horizon)
+        rollout: dict[str, Tensor] = {}
+        for key, value in interaction_state.items():
+            if isinstance(value, Tensor):
+                rollout[key] = self._repeat_rollout_tensor(value, num_candidates, horizon)
+        return rollout
     def forward(
         self,
         images: Tensor,
             "reveal_state": elastic_state,
             "view_summaries": scene_output["view_summaries"],
             "geometry_summaries": scene_output["geometry_summaries"],
+            "rollout_source": "none",
         }
         candidate_chunks = candidate_chunks_override
         batch_size = candidate_chunks.shape[0]
         batch_indices = torch.arange(batch_size, device=candidate_chunks.device).unsqueeze(-1)
         topk_candidates = candidate_chunks[batch_indices, shortlist_indices]
+        num_topk = topk_candidates.shape[1]
         outputs["planner_topk_candidates"] = topk_candidates
         if proposal_logits is not None:
             topk_proposal_logits = proposal_logits.gather(1, shortlist_indices)
         else:
             topk_proposal_logits = None
+        planning_state = elastic_state
+        if not support_mode_conditioning:
+            planning_state = dict(elastic_state)
+            planning_state["support_mode_logits"] = torch.zeros_like(elastic_state["support_mode_logits"])
         if not use_world_model:
+            detached_state = self._detach_state(planning_state)
+            identity_rollout = self._identity_rollout(
+                interaction_state=detached_state,
+                num_candidates=num_topk,
+            )
+            selected = self.planner.select_best(
+                initial_state=detached_state,
+                candidate_chunks=topk_candidates,
+                rollout_state=identity_rollout,
+                proposal_logits=topk_proposal_logits,
+                candidate_indices=shortlist_indices,
+            )
+            outputs["planned_rollout"] = identity_rollout
+            outputs["planned_chunk"] = selected["best_chunk"]
+            outputs["planner_success_logits"] = selected["success_logits"]
+            outputs["planner_risk_values"] = selected["risk_values"]
+            outputs["planner_scores"] = selected["utility_total"]
+            outputs["best_candidate_indices"] = selected["best_indices"]
+            outputs["utility_structured"] = selected["utility_structured"]
+            outputs["utility_residual"] = selected["utility_residual"]
+            outputs["utility_total"] = selected["utility_total"]
+            outputs["ranking_diagnostics"] = selected["ranking_diagnostics"]
+            outputs["rollout_source"] = "identity"
             return outputs
         flat_chunks = topk_candidates.view(batch_size * num_topk, topk_candidates.shape[2], topk_candidates.shape[3])
         tiled_scene = self._tile_tensor(scene_tokens, num_topk)
         tiled_state = self._tile_state(planning_state, num_topk)
         rollout = self.world_model(
             scene_tokens=tiled_scene,
         outputs["utility_residual"] = selected["utility_residual"]
         outputs["utility_total"] = selected["utility_total"]
         outputs["ranking_diagnostics"] = selected["ranking_diagnostics"]
+        outputs["rollout_source"] = "learned"
         return outputs

code/reveal_vla_bimanual/train/losses.py CHANGED Viewed

@@ -303,7 +303,7 @@ def compute_total_loss(
             + 0.01 * reveal_losses["uncertainty"]
         )
-    if model_output.get("planned_rollout") and (
         "candidate_rollout_support_mode" in batch or "rollout_support_mode" in batch
     ):
         if "candidate_rollout_support_mode" in batch:

             + 0.01 * reveal_losses["uncertainty"]
         )
+    if model_output.get("planned_rollout") and model_output.get("rollout_source", "learned") == "learned" and (
         "candidate_rollout_support_mode" in batch or "rollout_support_mode" in batch
     ):
         if "candidate_rollout_support_mode" in batch:

results/phase_tracking.md CHANGED Viewed

@@ -83,10 +83,10 @@ Date closed: `2026-03-25 UTC`
   - `short_history`: `0.5463` mean success, delta `0.0000`
 - Gate decisions:
   - hard success gate `>= 0.60`: fail
-  - `no_world_model` must hurt: fail, no success drop and no persuasive secondary metric degradation
   - full memory must stop losing to short history: hard gate passes narrowly because full equals short-history; preferred gate fails because full does not beat short-history
   - state metrics should improve over phase 1: fail, reocclusion rate increased (`0.0000 -> 0.0121`), persistence MAE worsened (`1.9553 -> 2.2358`), and calibration worsened
-- Takeaway: the expanded state/memory path did not validate on the dummy proxy benchmark. Planner classification improved, but task success and state quality did not.
 ## Phase 3

   - `short_history`: `0.5463` mean success, delta `0.0000`
 - Gate decisions:
   - hard success gate `>= 0.60`: fail
+  - `no_world_model` must hurt: not interpretable from the stored artifact alone; the recorded `no_world_model` run predates the `2026-03-25` null-rollout ablation fix and should be rerun for a fair comparison
   - full memory must stop losing to short history: hard gate passes narrowly because full equals short-history; preferred gate fails because full does not beat short-history
   - state metrics should improve over phase 1: fail, reocclusion rate increased (`0.0000 -> 0.0121`), persistence MAE worsened (`1.9553 -> 2.2358`), and calibration worsened
+- Takeaway: the expanded state/memory path did not validate on the dummy proxy benchmark. Planner classification improved, but the world-model ablation needs a post-fix rerun before it can be interpreted fairly.
 ## Phase 3

tests/test_policy_topk_cascade.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from train.trainer import build_policy
@@ -23,3 +25,44 @@ def test_policy_topk_cascade(tiny_policy_config, tiny_trainer_config, tiny_batch
     assert output["planner_topk_indices"].shape[1] == config.planner.top_k
     assert output["planned_rollout"]["target_belief_field"].shape[1] == config.planner.top_k
     assert (output["best_candidate_indices"] < config.decoder.num_candidates).all()

+import torch
 from train.trainer import build_policy
     assert output["planner_topk_indices"].shape[1] == config.planner.top_k
     assert output["planned_rollout"]["target_belief_field"].shape[1] == config.planner.top_k
     assert (output["best_candidate_indices"] < config.decoder.num_candidates).all()
+def test_policy_null_rollout_ablation_keeps_planner_interface(
+    tiny_policy_config,
+    tiny_trainer_config,
+    tiny_batch,
+):
+    config = tiny_policy_config(num_candidates=4, top_k=2)
+    batch = tiny_batch(chunk_size=config.decoder.chunk_size)
+    policy = build_policy(config, tiny_trainer_config(policy_type="elastic_reveal"))
+    output = policy(
+        images=batch["images"],
+        depths=batch["depths"],
+        depth_valid=batch["depth_valid"],
+        camera_intrinsics=batch["camera_intrinsics"],
+        camera_extrinsics=batch["camera_extrinsics"],
+        proprio=batch["proprio"],
+        texts=batch["texts"],
+        history_images=batch["history_images"],
+        history_depths=batch["history_depths"],
+        history_depth_valid=batch["history_depth_valid"],
+        history_proprio=batch["history_proprio"],
+        history_actions=batch["history_actions"],
+        plan=True,
+        use_world_model=False,
+        use_planner=True,
+    )
+    rollout = output["planned_rollout"]
+    current_state = output["interaction_state"]
+    assert output["rollout_source"] == "identity"
+    assert output["planner_topk_indices"].shape[1] == config.planner.top_k
+    assert rollout["target_belief_field"].shape[1] == config.planner.top_k
+    repeated_belief = current_state["target_belief_field"].detach().unsqueeze(1).unsqueeze(2).expand_as(
+        rollout["target_belief_field"]
+    )
+    repeated_phase = current_state["phase_logits"].detach().unsqueeze(1).unsqueeze(2).expand_as(
+        rollout["phase_logits"]
+    )
+    assert output["utility_total"].shape == (batch["images"].shape[0], config.planner.top_k)
+    assert torch.allclose(rollout["target_belief_field"], repeated_belief)
+    assert torch.allclose(rollout["phase_logits"], repeated_phase)