lsnu commited on Mar 23

Commit

b1ef16c

verified ·

1 Parent(s): 6fa1956

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

code/reveal_vla_bimanual/models/__init__.py +24 -0
code/reveal_vla_bimanual/models/__pycache__/__init__.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/__init__.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/action_decoder.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/action_decoder.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/backbones.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/backbones.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/multiview_fusion.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/multiview_fusion.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/planner.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/planner.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/policy.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/policy.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/reveal_head.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/reveal_head.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/world_model.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/models/__pycache__/world_model.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/models/action_decoder.py +68 -0
code/reveal_vla_bimanual/models/backbones.py +116 -0
code/reveal_vla_bimanual/models/multiview_fusion.py +57 -0
code/reveal_vla_bimanual/models/planner.py +61 -0
code/reveal_vla_bimanual/models/policy.py +127 -0
code/reveal_vla_bimanual/models/reveal_head.py +55 -0
code/reveal_vla_bimanual/models/world_model.py +70 -0
code/reveal_vla_bimanual/sim_reveal/__init__.py +15 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/__init__.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/__init__.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/base.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/base.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/dataset.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/dataset.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/generate_dataset.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/generate_dataset.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/isaac_smoke.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/isaac_smoke.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/isaac_wrapper.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/isaac_wrapper.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/labels.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/procedural_envs.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/procedural_envs.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/proxy_specs.cpython-310.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/proxy_specs.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/__pycache__/teachers.cpython-311.pyc +0 -0
code/reveal_vla_bimanual/sim_reveal/base.py +32 -0
code/reveal_vla_bimanual/sim_reveal/dataset.py +137 -0
code/reveal_vla_bimanual/sim_reveal/generate_dataset.py +40 -0
code/reveal_vla_bimanual/sim_reveal/isaac_smoke.py +29 -0
code/reveal_vla_bimanual/sim_reveal/isaac_wrapper.py +16 -0
code/reveal_vla_bimanual/sim_reveal/labels.py +61 -0
code/reveal_vla_bimanual/sim_reveal/procedural_envs.py +545 -0

code/reveal_vla_bimanual/models/__init__.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from models.action_decoder import ACTBimanualChunkDecoder, ChunkDecoderConfig
+from models.backbones import FrozenVLBackbone, FrozenVLBackboneConfig
+from models.multiview_fusion import MultiViewFusion, MultiViewFusionConfig
+from models.planner import PlannerConfig, RevealPlanner
+from models.policy import BackboneOnlyPolicy, RevealBimanualPolicy
+from models.reveal_head import RevealHeadConfig, RevealStateHead
+from models.world_model import RevealWM, RevealWMConfig
+__all__ = [
+    "ACTBimanualChunkDecoder",
+    "BackboneOnlyPolicy",
+    "ChunkDecoderConfig",
+    "FrozenVLBackbone",
+    "FrozenVLBackboneConfig",
+    "MultiViewFusion",
+    "MultiViewFusionConfig",
+    "PlannerConfig",
+    "RevealBimanualPolicy",
+    "RevealHeadConfig",
+    "RevealPlanner",
+    "RevealStateHead",
+    "RevealWM",
+    "RevealWMConfig",
+]

code/reveal_vla_bimanual/models/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (840 Bytes). View file

code/reveal_vla_bimanual/models/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.02 kB). View file

code/reveal_vla_bimanual/models/__pycache__/action_decoder.cpython-310.pyc ADDED Viewed

Binary file (2.64 kB). View file

code/reveal_vla_bimanual/models/__pycache__/action_decoder.cpython-311.pyc ADDED Viewed

Binary file (4.77 kB). View file

code/reveal_vla_bimanual/models/__pycache__/backbones.cpython-310.pyc ADDED Viewed

Binary file (5.04 kB). View file

code/reveal_vla_bimanual/models/__pycache__/backbones.cpython-311.pyc ADDED Viewed

Binary file (9.38 kB). View file

code/reveal_vla_bimanual/models/__pycache__/multiview_fusion.cpython-310.pyc ADDED Viewed

Binary file (2.25 kB). View file

code/reveal_vla_bimanual/models/__pycache__/multiview_fusion.cpython-311.pyc ADDED Viewed

Binary file (3.9 kB). View file

code/reveal_vla_bimanual/models/__pycache__/planner.cpython-310.pyc ADDED Viewed

Binary file (2.52 kB). View file

code/reveal_vla_bimanual/models/__pycache__/planner.cpython-311.pyc ADDED Viewed

Binary file (3.62 kB). View file

code/reveal_vla_bimanual/models/__pycache__/policy.cpython-310.pyc ADDED Viewed

Binary file (5.17 kB). View file

code/reveal_vla_bimanual/models/__pycache__/policy.cpython-311.pyc ADDED Viewed

Binary file (8.91 kB). View file

code/reveal_vla_bimanual/models/__pycache__/reveal_head.cpython-310.pyc ADDED Viewed

Binary file (2.04 kB). View file

code/reveal_vla_bimanual/models/__pycache__/reveal_head.cpython-311.pyc ADDED Viewed

Binary file (3.84 kB). View file

code/reveal_vla_bimanual/models/__pycache__/world_model.cpython-310.pyc ADDED Viewed

Binary file (2.48 kB). View file

code/reveal_vla_bimanual/models/__pycache__/world_model.cpython-311.pyc ADDED Viewed

Binary file (4.71 kB). View file

code/reveal_vla_bimanual/models/action_decoder.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from __future__ import annotations
+from dataclasses import dataclass
+import torch
+from torch import Tensor, nn
+@dataclass
+class ChunkDecoderConfig:
+    hidden_dim: int = 512
+    num_heads: int = 8
+    num_layers: int = 4
+    ff_dim: int = 2048
+    dropout: float = 0.1
+    chunk_size: int = 8
+    action_dim: int = 14
+    num_candidates: int = 8
+class ACTBimanualChunkDecoder(nn.Module):
+    def __init__(self, config: ChunkDecoderConfig) -> None:
+        super().__init__()
+        self.config = config
+        decoder_layer = nn.TransformerDecoderLayer(
+            d_model=config.hidden_dim,
+            nhead=config.num_heads,
+            dim_feedforward=config.ff_dim,
+            dropout=config.dropout,
+            batch_first=True,
+            norm_first=True,
+        )
+        self.decoder = nn.TransformerDecoder(decoder_layer, num_layers=config.num_layers)
+        self.query_embed = nn.Embedding(config.chunk_size, config.hidden_dim)
+        self.action_mean = nn.Linear(config.hidden_dim, config.action_dim)
+        self.action_log_std = nn.Linear(config.hidden_dim, config.action_dim)
+        self.proposal_score = nn.Sequential(
+            nn.LayerNorm(config.hidden_dim),
+            nn.Linear(config.hidden_dim, 1),
+        )
+    def forward(self, scene_tokens: Tensor) -> dict[str, Tensor]:
+        batch_size = scene_tokens.shape[0]
+        query = self.query_embed.weight.unsqueeze(0).expand(batch_size, -1, -1)
+        decoded = self.decoder(query, scene_tokens)
+        return {
+            "decoded_tokens": decoded,
+            "action_mean": self.action_mean(decoded),
+            "action_log_std": self.action_log_std(decoded).clamp(min=-5.0, max=2.0),
+            "proposal_score": self.proposal_score(decoded.mean(dim=1)).squeeze(-1),
+        }
+    def sample_candidates(self, action_mean: Tensor, action_log_std: Tensor, num_candidates: int | None = None) -> Tensor:
+        num_candidates = num_candidates or self.config.num_candidates
+        if num_candidates <= 1:
+            return action_mean.unsqueeze(1)
+        std = action_log_std.exp()
+        noise = torch.randn(
+            action_mean.size(0),
+            num_candidates,
+            action_mean.size(1),
+            action_mean.size(2),
+            device=action_mean.device,
+            dtype=action_mean.dtype,
+        )
+        candidates = action_mean.unsqueeze(1) + noise * std.unsqueeze(1)
+        candidates[:, 0] = action_mean
+        return candidates

code/reveal_vla_bimanual/models/backbones.py ADDED Viewed

	@@ -0,0 +1,116 @@

+from __future__ import annotations
+from dataclasses import dataclass
+import math
+from typing import Sequence
+import torch
+import torch.nn.functional as F
+from torch import Tensor, nn
+@dataclass
+class FrozenVLBackboneConfig:
+    model_name: str = "openai/clip-vit-base-patch32"
+    hidden_dim: int = 512
+    max_text_tokens: int = 32
+    freeze_backbone: bool = True
+    gradient_checkpointing: bool = True
+    use_dummy_backbone: bool = False
+class _DummyTextTokenizer:
+    def __init__(self, vocab_size: int = 8192, max_length: int = 32) -> None:
+        self.vocab_size = vocab_size
+        self.max_length = max_length
+    def __call__(self, texts: Sequence[str], device: torch.device) -> dict[str, Tensor]:
+        token_ids = torch.zeros((len(texts), self.max_length), dtype=torch.long, device=device)
+        attention_mask = torch.zeros_like(token_ids)
+        for row, text in enumerate(texts):
+            encoded = [min(ord(char), self.vocab_size - 1) for char in text[: self.max_length]]
+            if encoded:
+                token_ids[row, : len(encoded)] = torch.tensor(encoded, dtype=torch.long, device=device)
+                attention_mask[row, : len(encoded)] = 1
+        return {"input_ids": token_ids, "attention_mask": attention_mask}
+class FrozenVLBackbone(nn.Module):
+    def __init__(self, config: FrozenVLBackboneConfig) -> None:
+        super().__init__()
+        self.config = config
+        self.hidden_dim = config.hidden_dim
+        self.use_dummy_backbone = config.use_dummy_backbone
+        if config.use_dummy_backbone:
+            self.image_patch_size = 16
+            self.tokenizer = _DummyTextTokenizer(max_length=config.max_text_tokens)
+        else:
+            from transformers import AutoTokenizer, CLIPModel
+            clip_model = CLIPModel.from_pretrained(config.model_name)
+            self.vision_model = clip_model.vision_model
+            self.text_model = clip_model.text_model
+            self.visual_projection = clip_model.visual_projection
+            self.text_projection = clip_model.text_projection
+            self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
+            self.hidden_dim = clip_model.config.projection_dim
+            if config.gradient_checkpointing:
+                if hasattr(self.vision_model, "gradient_checkpointing_enable"):
+                    self.vision_model.gradient_checkpointing_enable()
+                if hasattr(self.text_model, "gradient_checkpointing_enable"):
+                    self.text_model.gradient_checkpointing_enable()
+        if config.freeze_backbone:
+            for parameter in self.parameters():
+                parameter.requires_grad = False
+    def tokenize_text(self, texts: Sequence[str], device: torch.device) -> dict[str, Tensor]:
+        if self.use_dummy_backbone:
+            return self.tokenizer(texts, device=device)
+        return self.tokenizer(
+            list(texts),
+            padding=True,
+            truncation=True,
+            max_length=self.config.max_text_tokens,
+            return_tensors="pt",
+        ).to(device)
+    def encode_images(self, images: Tensor) -> Tensor:
+        batch_size, num_views, channels, height, width = images.shape
+        flat_images = images.reshape(batch_size * num_views, channels, height, width)
+        if self.use_dummy_backbone:
+            pooled = F.avg_pool2d(flat_images.float(), kernel_size=self.image_patch_size, stride=self.image_patch_size)
+            patch_tokens = pooled.flatten(2).transpose(1, 2)
+            grid_h, grid_w = pooled.shape[-2], pooled.shape[-1]
+            y_coords = torch.linspace(-1.0, 1.0, steps=grid_h, device=images.device)
+            x_coords = torch.linspace(-1.0, 1.0, steps=grid_w, device=images.device)
+            grid_y, grid_x = torch.meshgrid(y_coords, x_coords, indexing="ij")
+            coords = torch.stack([grid_x, grid_y], dim=-1).reshape(1, grid_h * grid_w, 2)
+            coords = coords.expand(patch_tokens.shape[0], -1, -1)
+            intensity = patch_tokens.mean(dim=-1, keepdim=True)
+            base = torch.cat([patch_tokens, intensity, coords], dim=-1)
+            repeat_factor = math.ceil(self.hidden_dim / base.shape[-1])
+            tokens = base.repeat(1, 1, repeat_factor)[..., : self.hidden_dim]
+        else:
+            outputs = self.vision_model(pixel_values=flat_images)
+            tokens = self.visual_projection(outputs.last_hidden_state)
+        num_tokens = tokens.shape[1]
+        return tokens.reshape(batch_size, num_views, num_tokens, -1)
+    def encode_text(self, input_ids: Tensor, attention_mask: Tensor) -> Tensor:
+        if self.use_dummy_backbone:
+            vocab_scale = float(self.tokenizer.vocab_size - 1)
+            token_values = input_ids.float() / vocab_scale
+            frequencies = torch.linspace(
+                1.0,
+                4.0,
+                steps=max(1, self.hidden_dim // 2),
+                device=input_ids.device,
+                dtype=token_values.dtype,
+            )
+            phases = token_values.unsqueeze(-1) * frequencies.view(1, 1, -1) * (2.0 * math.pi)
+            embeddings = torch.cat([torch.sin(phases), torch.cos(phases)], dim=-1)[..., : self.hidden_dim]
+            return embeddings * attention_mask.unsqueeze(-1).float()
+        outputs = self.text_model(input_ids=input_ids, attention_mask=attention_mask)
+        return self.text_projection(outputs.last_hidden_state)

code/reveal_vla_bimanual/models/multiview_fusion.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from __future__ import annotations
+from dataclasses import dataclass
+import torch
+from torch import Tensor, nn
+@dataclass
+class MultiViewFusionConfig:
+    hidden_dim: int = 512
+    num_cameras: int = 3
+    num_layers: int = 4
+    num_heads: int = 8
+    ff_dim: int = 2048
+    dropout: float = 0.1
+    proprio_dim: int = 32
+    proprio_tokens: int = 1
+class MultiViewFusion(nn.Module):
+    def __init__(self, config: MultiViewFusionConfig) -> None:
+        super().__init__()
+        self.config = config
+        self.camera_embedding = nn.Embedding(config.num_cameras, config.hidden_dim)
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=config.hidden_dim,
+            nhead=config.num_heads,
+            dim_feedforward=config.ff_dim,
+            dropout=config.dropout,
+            batch_first=True,
+            norm_first=True,
+        )
+        self.cross_view_transformer = nn.TransformerEncoder(
+            encoder_layer,
+            num_layers=config.num_layers,
+        )
+        self.proprio_adapter = nn.Sequential(
+            nn.LayerNorm(config.proprio_dim),
+            nn.Linear(config.proprio_dim, config.hidden_dim * config.proprio_tokens),
+            nn.GELU(),
+        )
+    def forward(self, image_tokens: Tensor, proprio: Tensor, language_tokens: Tensor) -> Tensor:
+        batch_size, num_views, num_tokens, hidden_dim = image_tokens.shape
+        if num_views != self.config.num_cameras:
+            raise ValueError(f"Expected {self.config.num_cameras} views, received {num_views}")
+        camera_ids = torch.arange(num_views, device=image_tokens.device)
+        camera_embed = self.camera_embedding(camera_ids).view(1, num_views, 1, hidden_dim)
+        image_tokens = image_tokens + camera_embed
+        fused = self.cross_view_transformer(image_tokens.reshape(batch_size, num_views * num_tokens, hidden_dim))
+        proprio_tokens = self.proprio_adapter(proprio).view(
+            batch_size, self.config.proprio_tokens, hidden_dim
+        )
+        return torch.cat([fused, proprio_tokens, language_tokens], dim=1)

code/reveal_vla_bimanual/models/planner.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from __future__ import annotations
+from dataclasses import dataclass
+import torch
+from torch import Tensor
+@dataclass
+class PlannerConfig:
+    num_candidates: int = 8
+    corridor_weight: float = 1.0
+    persistence_weight: float = 0.5
+    proposal_weight: float = 0.5
+    task_progress_weight: float = 0.75
+    disturbance_weight: float = 0.75
+    reocclusion_weight: float = 0.5
+    visibility_weight: float = 0.25
+class RevealPlanner:
+    def __init__(self, config: PlannerConfig) -> None:
+        self.config = config
+    def score_rollouts(
+        self,
+        rollout_state: dict[str, Tensor],
+        proposal_scores: Tensor,
+        candidate_chunks: Tensor | None = None,
+        belief_gain: Tensor | None = None,
+    ) -> Tensor:
+        corridor_prob = rollout_state["corridor_logits"].sigmoid().amax(dim=-1).mean(dim=(-1, -2))
+        persistence = rollout_state["persistence_horizon"].mean(dim=(-1, -2))
+        disturbance = rollout_state["disturbance_cost"].mean(dim=-1)
+        reocclusion_penalty = torch.relu(1.0 - rollout_state["corridor_logits"].sigmoid().amax(dim=-1)).mean(dim=(-1, -2))
+        task_progress = proposal_scores.new_zeros(proposal_scores.shape)
+        if candidate_chunks is not None:
+            actor_reach = torch.tanh(candidate_chunks[..., 8]).mean(dim=-1)
+            actor_retrieve = torch.tanh(candidate_chunks[..., 13]).amax(dim=-1)
+            task_progress = 0.5 * (actor_reach + 1.0) * 0.5 + 0.5 * (actor_retrieve + 1.0) * 0.5
+        score = (
+            self.config.corridor_weight * corridor_prob
+            + self.config.persistence_weight * persistence
+            + self.config.proposal_weight * proposal_scores
+            + self.config.task_progress_weight * task_progress
+            - self.config.disturbance_weight * disturbance
+            - self.config.reocclusion_weight * reocclusion_penalty
+        )
+        if belief_gain is not None:
+            score = score + self.config.visibility_weight * belief_gain
+        return score
+    def select_best(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor], proposal_scores: Tensor) -> dict[str, Tensor]:
+        scores = self.score_rollouts(rollout_state, proposal_scores, candidate_chunks=candidate_chunks)
+        best_idx = scores.argmax(dim=-1)
+        batch_indices = torch.arange(candidate_chunks.shape[0], device=candidate_chunks.device)
+        return {
+            "scores": scores,
+            "best_indices": best_idx,
+            "best_chunk": candidate_chunks[batch_indices, best_idx],
+        }

code/reveal_vla_bimanual/models/policy.py ADDED Viewed

	@@ -0,0 +1,127 @@

+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Sequence
+import torch
+from torch import Tensor, nn
+from models.action_decoder import ACTBimanualChunkDecoder, ChunkDecoderConfig
+from models.backbones import FrozenVLBackbone, FrozenVLBackboneConfig
+from models.multiview_fusion import MultiViewFusion, MultiViewFusionConfig
+from models.planner import PlannerConfig, RevealPlanner
+from models.reveal_head import RevealHeadConfig, RevealStateHead
+from models.world_model import RevealWM, RevealWMConfig
+@dataclass
+class PolicyConfig:
+    backbone: FrozenVLBackboneConfig = field(default_factory=FrozenVLBackboneConfig)
+    fusion: MultiViewFusionConfig = field(default_factory=MultiViewFusionConfig)
+    decoder: ChunkDecoderConfig = field(default_factory=ChunkDecoderConfig)
+    reveal_head: RevealHeadConfig = field(default_factory=RevealHeadConfig)
+    world_model: RevealWMConfig = field(default_factory=RevealWMConfig)
+    planner: PlannerConfig = field(default_factory=PlannerConfig)
+class BackboneOnlyPolicy(nn.Module):
+    def __init__(self, config: PolicyConfig) -> None:
+        super().__init__()
+        self.config = config
+        self.backbone = FrozenVLBackbone(config.backbone)
+        self.fusion = MultiViewFusion(config.fusion)
+        self.decoder = ACTBimanualChunkDecoder(config.decoder)
+    def _encode_language(
+        self,
+        images: Tensor,
+        texts: Sequence[str] | None = None,
+        language_tokens: dict[str, Tensor] | None = None,
+    ) -> Tensor:
+        if language_tokens is None:
+            if texts is None:
+                raise ValueError("Either texts or language_tokens must be provided.")
+            language_tokens = self.backbone.tokenize_text(texts, device=images.device)
+        return self.backbone.encode_text(
+            input_ids=language_tokens["input_ids"],
+            attention_mask=language_tokens["attention_mask"],
+        )
+    def encode_scene(
+        self,
+        images: Tensor,
+        proprio: Tensor,
+        texts: Sequence[str] | None = None,
+        language_tokens: dict[str, Tensor] | None = None,
+    ) -> Tensor:
+        image_tokens = self.backbone.encode_images(images)
+        text_tokens = self._encode_language(images, texts=texts, language_tokens=language_tokens)
+        return self.fusion(image_tokens=image_tokens, proprio=proprio, language_tokens=text_tokens)
+    def forward(
+        self,
+        images: Tensor,
+        proprio: Tensor,
+        texts: Sequence[str] | None = None,
+        language_tokens: dict[str, Tensor] | None = None,
+    ) -> dict[str, Tensor]:
+        scene_tokens = self.encode_scene(images, proprio, texts=texts, language_tokens=language_tokens)
+        decoded = self.decoder(scene_tokens)
+        decoded["scene_tokens"] = scene_tokens
+        return decoded
+class RevealBimanualPolicy(BackboneOnlyPolicy):
+    def __init__(self, config: PolicyConfig) -> None:
+        super().__init__(config)
+        self.reveal_head = RevealStateHead(config.reveal_head)
+        self.world_model = RevealWM(config.world_model)
+        self.planner = RevealPlanner(config.planner)
+    def forward(
+        self,
+        images: Tensor,
+        proprio: Tensor,
+        texts: Sequence[str] | None = None,
+        language_tokens: dict[str, Tensor] | None = None,
+        plan: bool = True,
+        support_mode_conditioning: bool = True,
+    ) -> dict[str, Tensor]:
+        outputs = super().forward(images, proprio, texts=texts, language_tokens=language_tokens)
+        reveal_state = self.reveal_head(outputs["scene_tokens"])
+        outputs["reveal_state"] = reveal_state
+        candidate_chunks = self.decoder.sample_candidates(
+            outputs["action_mean"],
+            outputs["action_log_std"],
+            num_candidates=self.config.decoder.num_candidates,
+        )
+        outputs["candidate_chunks"] = candidate_chunks
+        if plan:
+            batch_size, num_candidates, chunk_size, action_dim = candidate_chunks.shape
+            flat_chunks = candidate_chunks.view(batch_size * num_candidates, chunk_size, action_dim)
+            tiled_scene = outputs["scene_tokens"].unsqueeze(1).expand(-1, num_candidates, -1, -1)
+            tiled_scene = tiled_scene.reshape(batch_size * num_candidates, outputs["scene_tokens"].shape[1], outputs["scene_tokens"].shape[2])
+            planning_reveal_state = reveal_state
+            if not support_mode_conditioning:
+                planning_reveal_state = dict(reveal_state)
+                planning_reveal_state["support_mode_logits"] = torch.zeros_like(reveal_state["support_mode_logits"])
+            tiled_reveal = {
+                key: value.unsqueeze(1).expand(-1, num_candidates, *value.shape[1:]).reshape(batch_size * num_candidates, *value.shape[1:])
+                for key, value in planning_reveal_state.items()
+            }
+            rollout = self.world_model(tiled_scene, tiled_reveal, flat_chunks)
+            reshaped_rollout = {
+                key: value.view(batch_size, num_candidates, *value.shape[1:]) for key, value in rollout.items()
+            }
+            selected = self.planner.select_best(
+                candidate_chunks=candidate_chunks,
+                rollout_state=reshaped_rollout,
+                proposal_scores=outputs["proposal_score"].unsqueeze(-1).expand(-1, num_candidates),
+            )
+            outputs["planned_rollout"] = reshaped_rollout
+            outputs["planned_chunk"] = selected["best_chunk"]
+            outputs["planner_scores"] = selected["scores"]
+            outputs["best_candidate_indices"] = selected["best_indices"]
+        return outputs

code/reveal_vla_bimanual/models/reveal_head.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from torch import Tensor, nn
+@dataclass
+class RevealHeadConfig:
+    hidden_dim: int = 512
+    num_support_modes: int = 3
+    num_approach_templates: int = 32
+    rollout_horizon: int = 5
+    belief_map_size: int = 32
+    predict_belief_map: bool = False
+class RevealStateHead(nn.Module):
+    def __init__(self, config: RevealHeadConfig) -> None:
+        super().__init__()
+        self.config = config
+        self.trunk = nn.Sequential(
+            nn.LayerNorm(config.hidden_dim),
+            nn.Linear(config.hidden_dim, config.hidden_dim),
+            nn.GELU(),
+        )
+        self.support_mode = nn.Linear(config.hidden_dim, config.num_support_modes)
+        self.corridor = nn.Linear(
+            config.hidden_dim,
+            config.num_support_modes * config.num_approach_templates,
+        )
+        self.persistence = nn.Linear(config.hidden_dim, config.num_support_modes)
+        self.disturbance = nn.Linear(config.hidden_dim, 1)
+        self.belief_map = None
+        if config.predict_belief_map:
+            map_side = config.belief_map_size
+            self.belief_map = nn.Linear(config.hidden_dim, map_side * map_side)
+    def forward(self, scene_tokens: Tensor) -> dict[str, Tensor]:
+        pooled = scene_tokens.mean(dim=1)
+        hidden = self.trunk(pooled)
+        output = {
+            "support_mode_logits": self.support_mode(hidden),
+            "corridor_logits": self.corridor(hidden).view(
+                hidden.shape[0],
+                self.config.num_support_modes,
+                self.config.num_approach_templates,
+            ),
+            "persistence_horizon": self.persistence(hidden),
+            "disturbance_cost": self.disturbance(hidden).squeeze(-1),
+        }
+        if self.belief_map is not None:
+            side = self.config.belief_map_size
+            output["belief_map"] = self.belief_map(hidden).view(hidden.shape[0], 1, side, side)
+        return output

code/reveal_vla_bimanual/models/world_model.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from __future__ import annotations
+from dataclasses import dataclass
+import torch
+from torch import Tensor, nn
+@dataclass
+class RevealWMConfig:
+    hidden_dim: int = 512
+    action_dim: int = 14
+    num_support_modes: int = 3
+    num_approach_templates: int = 32
+    rollout_horizon: int = 5
+class RevealWM(nn.Module):
+    def __init__(self, config: RevealWMConfig) -> None:
+        super().__init__()
+        self.config = config
+        reveal_dim = (
+            config.num_support_modes
+            + config.num_support_modes * config.num_approach_templates
+            + config.num_support_modes
+            + 1
+        )
+        self.initial = nn.Sequential(
+            nn.LayerNorm(config.hidden_dim + reveal_dim),
+            nn.Linear(config.hidden_dim + reveal_dim, config.hidden_dim),
+            nn.GELU(),
+        )
+        self.action_encoder = nn.Linear(config.action_dim, config.hidden_dim)
+        self.gru = nn.GRU(config.hidden_dim, config.hidden_dim, batch_first=True)
+        self.support_mode = nn.Linear(config.hidden_dim, config.num_support_modes)
+        self.corridor = nn.Linear(
+            config.hidden_dim,
+            config.num_support_modes * config.num_approach_templates,
+        )
+        self.persistence = nn.Linear(config.hidden_dim, config.num_support_modes)
+        self.disturbance = nn.Linear(config.hidden_dim, 1)
+    def _flatten_reveal(self, reveal_state: dict[str, Tensor]) -> Tensor:
+        return torch.cat(
+            [
+                reveal_state["support_mode_logits"],
+                reveal_state["corridor_logits"].flatten(start_dim=1),
+                reveal_state["persistence_horizon"],
+                reveal_state["disturbance_cost"].unsqueeze(-1),
+            ],
+            dim=-1,
+        )
+    def forward(self, scene_tokens: Tensor, reveal_state: dict[str, Tensor], action_chunk: Tensor) -> dict[str, Tensor]:
+        pooled = scene_tokens.mean(dim=1)
+        initial_hidden = self.initial(torch.cat([pooled, self._flatten_reveal(reveal_state)], dim=-1))
+        encoded_actions = self.action_encoder(action_chunk)
+        rollout, _ = self.gru(encoded_actions, initial_hidden.unsqueeze(0))
+        batch_size, horizon, _ = rollout.shape
+        return {
+            "support_mode_logits": self.support_mode(rollout),
+            "corridor_logits": self.corridor(rollout).view(
+                batch_size,
+                horizon,
+                self.config.num_support_modes,
+                self.config.num_approach_templates,
+            ),
+            "persistence_horizon": self.persistence(rollout),
+            "disturbance_cost": self.disturbance(rollout).squeeze(-1),
+        }

code/reveal_vla_bimanual/sim_reveal/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from sim_reveal.base import RevealProxyConfig, RevealState, SupportMode
+from sim_reveal.procedural_envs import ProceduralRevealEnv, available_proxy_names, make_proxy_env
+from sim_reveal.proxy_specs import BAG_PROXY, CLOTH_PROXY, FOLIAGE_PROXY
+__all__ = [
+    "BAG_PROXY",
+    "CLOTH_PROXY",
+    "FOLIAGE_PROXY",
+    "ProceduralRevealEnv",
+    "RevealProxyConfig",
+    "RevealState",
+    "SupportMode",
+    "available_proxy_names",
+    "make_proxy_env",
+]

code/reveal_vla_bimanual/sim_reveal/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (542 Bytes). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (653 Bytes). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/base.cpython-310.pyc ADDED Viewed

Binary file (1.34 kB). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/base.cpython-311.pyc ADDED Viewed

Binary file (1.88 kB). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/dataset.cpython-310.pyc ADDED Viewed

Binary file (4.5 kB). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/dataset.cpython-311.pyc ADDED Viewed

Binary file (8.42 kB). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/generate_dataset.cpython-310.pyc ADDED Viewed

Binary file (1.37 kB). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/generate_dataset.cpython-311.pyc ADDED Viewed

Binary file (2.44 kB). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/isaac_smoke.cpython-310.pyc ADDED Viewed

Binary file (868 Bytes). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/isaac_smoke.cpython-311.pyc ADDED Viewed

Binary file (1.45 kB). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/isaac_wrapper.cpython-310.pyc ADDED Viewed

Binary file (874 Bytes). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/isaac_wrapper.cpython-311.pyc ADDED Viewed

Binary file (1.22 kB). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/labels.cpython-311.pyc ADDED Viewed

Binary file (3.57 kB). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/procedural_envs.cpython-310.pyc ADDED Viewed

Binary file (16.7 kB). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/procedural_envs.cpython-311.pyc ADDED Viewed

Binary file (33.2 kB). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/proxy_specs.cpython-310.pyc ADDED Viewed

Binary file (922 Bytes). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/proxy_specs.cpython-311.pyc ADDED Viewed

Binary file (1.09 kB). View file

code/reveal_vla_bimanual/sim_reveal/__pycache__/teachers.cpython-311.pyc ADDED Viewed

Binary file (3.68 kB). View file

code/reveal_vla_bimanual/sim_reveal/base.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from __future__ import annotations
+from dataclasses import dataclass, field
+from enum import IntEnum
+import numpy as np
+class SupportMode(IntEnum):
+    HOLD = 0
+    TRANSFER = 1
+    PASSIVE = 2
+@dataclass
+class RevealState:
+    support_mode_logits: np.ndarray
+    corridor_logits: np.ndarray
+    persistence_horizon: np.ndarray
+    disturbance_cost: np.ndarray
+    belief_map: np.ndarray | None = None
+@dataclass
+class RevealProxyConfig:
+    name: str
+    num_templates: int = 32
+    rollout_horizon: int = 5
+    max_steps: int = 80
+    disturbance_key: str = "disturbance_cost"
+    success_key: str = "retrieval_success"
+    metadata: dict[str, str] = field(default_factory=dict)

code/reveal_vla_bimanual/sim_reveal/dataset.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from __future__ import annotations
+from pathlib import Path
+from typing import Any, Sequence
+import torch
+from torch import Tensor
+from torch.utils.data import Dataset
+from sim_reveal.procedural_envs import available_proxy_names, make_proxy_env, render_views_from_state
+def collect_teacher_dataset(
+    proxy_names: Sequence[str] | None = None,
+    episodes_per_proxy: int = 32,
+    resolution: int = 96,
+    seed: int = 0,
+    chunk_horizon: int = 8,
+    rollout_horizon: int = 5,
+) -> dict[str, Any]:
+    proxy_names = tuple(proxy_names or available_proxy_names())
+    samples: list[dict[str, Any]] = []
+    summary: dict[str, dict[str, float]] = {}
+    for proxy_offset, proxy_name in enumerate(proxy_names):
+        proxy_samples = 0
+        proxy_success = 0
+        for episode_idx in range(episodes_per_proxy):
+            env = make_proxy_env(
+                proxy_name=proxy_name,
+                resolution=resolution,
+                seed=seed + proxy_offset * 10_000 + episode_idx,
+                rollout_horizon=rollout_horizon,
+            )
+            _, privileged_state = env.reset(seed=seed + proxy_offset * 10_000 + episode_idx)
+            while True:
+                action_chunk, rollout = env.teacher_chunk_and_rollout(
+                    chunk_horizon=chunk_horizon,
+                    rollout_horizon=rollout_horizon,
+                )
+                samples.append(
+                    {
+                        "proxy_name": proxy_name,
+                        "episode_id": episode_idx,
+                        "render_state": env.render_state(privileged_state),
+                        "proprio": env.get_observation(privileged_state)["proprio"].astype("float32"),
+                        "language_goal": env.get_observation(privileged_state)["text"],
+                        "action_chunk": action_chunk.astype("float32"),
+                        "support_mode": int(privileged_state["support_mode"]),
+                        "corridor_feasible": privileged_state["corridor_feasible"].astype("float32"),
+                        "persistence_horizon": privileged_state["persistence_horizon"].astype("float32"),
+                        "disturbance_cost": float(privileged_state["disturbance_cost"]),
+                        "belief_map": privileged_state["belief_map"].astype("float32"),
+                        "rollout_support_mode": rollout["rollout_support_mode"].astype("int64"),
+                        "rollout_corridor_feasible": rollout["rollout_corridor_feasible"].astype("float32"),
+                        "rollout_persistence_horizon": rollout["rollout_persistence_horizon"].astype("float32"),
+                        "rollout_disturbance_cost": rollout["rollout_disturbance_cost"].astype("float32"),
+                    }
+                )
+                proxy_samples += 1
+                _, _, terminated, truncated, privileged_state = env.step(env.teacher_action())
+                if terminated:
+                    proxy_success += 1
+                if terminated or truncated:
+                    break
+        summary[proxy_name] = {
+            "episodes": float(episodes_per_proxy),
+            "samples": float(proxy_samples),
+            "teacher_success": proxy_success / float(max(1, episodes_per_proxy)),
+        }
+    return {
+        "resolution": resolution,
+        "chunk_horizon": chunk_horizon,
+        "rollout_horizon": rollout_horizon,
+        "samples": samples,
+        "summary": summary,
+    }
+def save_teacher_dataset(output_path: str | Path, dataset_bundle: dict[str, Any]) -> Path:
+    output_path = Path(output_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    torch.save(dataset_bundle, output_path)
+    return output_path
+def load_teacher_dataset(dataset_path: str | Path) -> dict[str, Any]:
+    return torch.load(Path(dataset_path), map_location="cpu", weights_only=False)
+class RevealOfflineDataset(Dataset[dict[str, Any]]):
+    def __init__(self, samples: Sequence[dict[str, Any]], resolution: int = 96) -> None:
+        self.samples = list(samples)
+        self.resolution = resolution
+    def __len__(self) -> int:
+        return len(self.samples)
+    def __getitem__(self, index: int) -> dict[str, Any]:
+        sample = self.samples[index]
+        images = render_views_from_state(
+            proxy_name=sample["proxy_name"],
+            render_state=sample["render_state"],
+            resolution=self.resolution,
+        )
+        stacked = torch.from_numpy(
+            torch.stack(
+                [
+                    torch.from_numpy(images["front"]),
+                    torch.from_numpy(images["wrist_left"]),
+                    torch.from_numpy(images["wrist_right"]),
+                ],
+                dim=0,
+            ).numpy()
+        ).permute(0, 3, 1, 2).float() / 255.0
+        return {
+            "images": stacked,
+            "proprio": torch.as_tensor(sample["proprio"], dtype=torch.float32),
+            "texts": sample["language_goal"],
+            "action_chunk": torch.as_tensor(sample["action_chunk"], dtype=torch.float32),
+            "support_mode": torch.as_tensor(sample["support_mode"], dtype=torch.long),
+            "corridor_feasible": torch.as_tensor(sample["corridor_feasible"], dtype=torch.float32),
+            "persistence_horizon": torch.as_tensor(sample["persistence_horizon"], dtype=torch.float32),
+            "disturbance_cost": torch.as_tensor(sample["disturbance_cost"], dtype=torch.float32),
+            "belief_map": torch.as_tensor(sample["belief_map"], dtype=torch.float32).unsqueeze(0),
+            "rollout_support_mode": torch.as_tensor(sample["rollout_support_mode"], dtype=torch.long),
+            "rollout_corridor_feasible": torch.as_tensor(sample["rollout_corridor_feasible"], dtype=torch.float32),
+            "rollout_persistence_horizon": torch.as_tensor(sample["rollout_persistence_horizon"], dtype=torch.float32),
+            "rollout_disturbance_cost": torch.as_tensor(sample["rollout_disturbance_cost"], dtype=torch.float32),
+            "proxy_name": sample["proxy_name"],
+            "episode_id": sample["episode_id"],
+        }
+def dataset_from_bundle(dataset_bundle: dict[str, Any], resolution: int | None = None) -> RevealOfflineDataset:
+    resolution = resolution or int(dataset_bundle["resolution"])
+    return RevealOfflineDataset(dataset_bundle["samples"], resolution=resolution)

code/reveal_vla_bimanual/sim_reveal/generate_dataset.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from __future__ import annotations
+import argparse
+import json
+from pathlib import Path
+from sim_reveal.dataset import collect_teacher_dataset, save_teacher_dataset
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--proxies", nargs="*", default=None)
+    parser.add_argument("--episodes-per-proxy", type=int, default=32)
+    parser.add_argument("--resolution", type=int, default=96)
+    parser.add_argument("--seed", type=int, default=0)
+    parser.add_argument("--chunk-horizon", type=int, default=8)
+    parser.add_argument("--rollout-horizon", type=int, default=5)
+    parser.add_argument("--output-path", default="/workspace/data/reveal_proxy/reveal_proxy_teacher.pt")
+    args = parser.parse_args()
+    dataset_bundle = collect_teacher_dataset(
+        proxy_names=args.proxies,
+        episodes_per_proxy=args.episodes_per_proxy,
+        resolution=args.resolution,
+        seed=args.seed,
+        chunk_horizon=args.chunk_horizon,
+        rollout_horizon=args.rollout_horizon,
+    )
+    output_path = save_teacher_dataset(Path(args.output_path), dataset_bundle)
+    payload = {
+        "output_path": str(output_path),
+        "resolution": dataset_bundle["resolution"],
+        "num_samples": len(dataset_bundle["samples"]),
+        "summary": dataset_bundle["summary"],
+    }
+    print(json.dumps(payload, indent=2))
+if __name__ == "__main__":
+    main()

code/reveal_vla_bimanual/sim_reveal/isaac_smoke.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from __future__ import annotations
+import argparse
+import json
+from sim_reveal.isaac_wrapper import IsaacRevealRuntime
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--visible", action="store_true")
+    args = parser.parse_args()
+    runtime = IsaacRevealRuntime(headless=not args.visible)
+    try:
+        import isaacsim
+        payload = {
+            "headless": not args.visible,
+            "isaacsim_version": getattr(isaacsim, "__version__", "unknown"),
+            "status": "ok",
+        }
+        print(json.dumps(payload, indent=2))
+    finally:
+        runtime.close()
+if __name__ == "__main__":
+    main()

code/reveal_vla_bimanual/sim_reveal/isaac_wrapper.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from __future__ import annotations
+from dataclasses import dataclass
+@dataclass
+class IsaacRevealRuntime:
+    headless: bool = True
+    def __post_init__(self) -> None:
+        from isaacsim import SimulationApp
+        self._simulation_app = SimulationApp({"headless": self.headless})
+    def close(self) -> None:
+        self._simulation_app.close()

code/reveal_vla_bimanual/sim_reveal/labels.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from __future__ import annotations
+from typing import Any
+import numpy as np
+from sim_reveal.base import RevealState, SupportMode
+def privileged_state_to_reveal_labels(
+    state: dict[str, Any],
+    num_modes: int = 3,
+    num_templates: int = 32,
+    rollout_horizon: int = 5,
+) -> RevealState:
+    support_mode = int(state["support_mode"])
+    support_logits = np.full((num_modes,), -4.0, dtype=np.float32)
+    support_logits[support_mode] = 4.0
+    corridor = np.asarray(state["corridor_feasible"], dtype=np.float32)
+    if corridor.shape != (num_modes, num_templates):
+        raise ValueError(
+            f"Expected corridor_feasible shape {(num_modes, num_templates)}, got {corridor.shape}"
+        )
+    corridor_logits = np.where(corridor > 0.5, 4.0, -4.0).astype(np.float32)
+    persistence = np.asarray(state["persistence_horizon"], dtype=np.float32)
+    if persistence.shape != (num_modes,):
+        raise ValueError(f"Expected persistence_horizon shape {(num_modes,)}, got {persistence.shape}")
+    persistence = np.clip(persistence, 0.0, float(rollout_horizon))
+    disturbance = np.asarray([state["disturbance_cost"]], dtype=np.float32)
+    belief_map = state.get("belief_map")
+    if belief_map is not None:
+        belief_map = np.asarray(belief_map, dtype=np.float32)
+    return RevealState(
+        support_mode_logits=support_logits,
+        corridor_logits=corridor_logits,
+        persistence_horizon=persistence,
+        disturbance_cost=disturbance,
+        belief_map=belief_map,
+    )
+def reocclusion_rate(corridor_open_history: np.ndarray) -> float:
+    corridor_open_history = np.asarray(corridor_open_history, dtype=np.float32)
+    if corridor_open_history.ndim != 1:
+        raise ValueError("corridor_open_history must be 1D.")
+    if corridor_open_history.size < 2:
+        return 0.0
+    open_then_closed = np.logical_and(corridor_open_history[:-1] > 0.5, corridor_open_history[1:] <= 0.5)
+    return float(open_then_closed.mean())
+def infer_support_mode_from_flags(holding: bool, transferred: bool) -> SupportMode:
+    if holding:
+        return SupportMode.HOLD
+    if transferred:
+        return SupportMode.TRANSFER
+    return SupportMode.PASSIVE

code/reveal_vla_bimanual/sim_reveal/procedural_envs.py ADDED Viewed

	@@ -0,0 +1,545 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+import numpy as np
+from sim_reveal.base import RevealProxyConfig, SupportMode
+from sim_reveal.proxy_specs import BAG_PROXY, CLOTH_PROXY, FOLIAGE_PROXY
+@dataclass(frozen=True)
+class ProxyDynamics:
+    hold_decay: float
+    transfer_decay: float
+    passive_decay: float
+    disturbance_gain: float
+    settle_rate: float
+    desired_opening: float
+    preferred_mode: SupportMode
+    transfer_support_factor: float
+    passive_support_factor: float
+    visibility_bias: float
+    retrieve_visibility_threshold: float
+    palette: tuple[float, float, float]
+PROXY_CONFIGS: dict[str, RevealProxyConfig] = {
+    FOLIAGE_PROXY.name: FOLIAGE_PROXY,
+    BAG_PROXY.name: BAG_PROXY,
+    CLOTH_PROXY.name: CLOTH_PROXY,
+}
+PROXY_DYNAMICS: dict[str, ProxyDynamics] = {
+    FOLIAGE_PROXY.name: ProxyDynamics(
+        hold_decay=0.02,
+        transfer_decay=0.07,
+        passive_decay=0.15,
+        disturbance_gain=0.06,
+        settle_rate=0.03,
+        desired_opening=0.60,
+        preferred_mode=SupportMode.HOLD,
+        transfer_support_factor=0.76,
+        passive_support_factor=0.42,
+        visibility_bias=0.03,
+        retrieve_visibility_threshold=0.42,
+        palette=(0.16, 0.30, 0.12),
+    ),
+    BAG_PROXY.name: ProxyDynamics(
+        hold_decay=0.04,
+        transfer_decay=0.03,
+        passive_decay=0.12,
+        disturbance_gain=0.05,
+        settle_rate=0.02,
+        desired_opening=0.68,
+        preferred_mode=SupportMode.TRANSFER,
+        transfer_support_factor=0.96,
+        passive_support_factor=0.55,
+        visibility_bias=0.06,
+        retrieve_visibility_threshold=0.48,
+        palette=(0.26, 0.17, 0.10),
+    ),
+    CLOTH_PROXY.name: ProxyDynamics(
+        hold_decay=0.03,
+        transfer_decay=0.05,
+        passive_decay=0.04,
+        disturbance_gain=0.04,
+        settle_rate=0.04,
+        desired_opening=0.50,
+        preferred_mode=SupportMode.PASSIVE,
+        transfer_support_factor=0.82,
+        passive_support_factor=0.90,
+        visibility_bias=0.08,
+        retrieve_visibility_threshold=0.38,
+        palette=(0.24, 0.24, 0.29),
+    ),
+}
+PROXY_GOALS = {
+    FOLIAGE_PROXY.name: "create a gap in the foliage and retrieve the target",
+    BAG_PROXY.name: "open the bag mouth and retrieve the target object",
+    CLOTH_PROXY.name: "lift the top layer enough to retrieve the hidden object",
+}
+def available_proxy_names() -> tuple[str, ...]:
+    return tuple(PROXY_CONFIGS.keys())
+def make_proxy_env(
+    proxy_name: str,
+    resolution: int = 96,
+    seed: int = 0,
+    num_templates: int = 32,
+    rollout_horizon: int = 5,
+    max_steps: int | None = None,
+) -> "ProceduralRevealEnv":
+    return ProceduralRevealEnv(
+        proxy_name=proxy_name,
+        resolution=resolution,
+        seed=seed,
+        num_templates=num_templates,
+        rollout_horizon=rollout_horizon,
+        max_steps=max_steps,
+    )
+class ProceduralRevealEnv:
+    camera_names = ("front", "wrist_left", "wrist_right")
+    def __init__(
+        self,
+        proxy_name: str,
+        resolution: int = 96,
+        seed: int = 0,
+        num_templates: int = 32,
+        rollout_horizon: int = 5,
+        max_steps: int | None = None,
+    ) -> None:
+        if proxy_name not in PROXY_CONFIGS:
+            raise KeyError(f"Unknown proxy: {proxy_name}")
+        self.proxy = PROXY_CONFIGS[proxy_name]
+        self.dynamics = PROXY_DYNAMICS[proxy_name]
+        self.proxy_name = proxy_name
+        self.resolution = resolution
+        self.num_templates = num_templates
+        self.rollout_horizon = rollout_horizon
+        self.max_steps = max_steps or self.proxy.max_steps
+        self.rng = np.random.default_rng(seed)
+        self.reset(seed=seed)
+    def clone_state(self) -> dict[str, Any]:
+        return {
+            "step_count": self.step_count,
+            "opening": self.opening,
+            "disturbance": self.disturbance,
+            "target_template": self.target_template,
+            "target_depth": self.target_depth,
+            "holding": self.holding,
+            "transferred": self.transferred,
+            "retrieved": self.retrieved,
+            "actor_progress": self.actor_progress,
+            "last_actor_template": self.last_actor_template,
+            "visibility_trace": list(self.visibility_trace),
+            "corridor_trace": list(self.corridor_trace),
+        }
+    def restore_state(self, state: dict[str, Any]) -> None:
+        self.step_count = int(state["step_count"])
+        self.opening = float(state["opening"])
+        self.disturbance = float(state["disturbance"])
+        self.target_template = int(state["target_template"])
+        self.target_depth = float(state["target_depth"])
+        self.holding = bool(state["holding"])
+        self.transferred = bool(state["transferred"])
+        self.retrieved = bool(state["retrieved"])
+        self.actor_progress = float(state["actor_progress"])
+        self.last_actor_template = int(state["last_actor_template"])
+        self.visibility_trace = list(state["visibility_trace"])
+        self.corridor_trace = list(state["corridor_trace"])
+    def reset(self, seed: int | None = None) -> tuple[dict[str, Any], dict[str, Any]]:
+        if seed is not None:
+            self.rng = np.random.default_rng(seed)
+        self.step_count = 0
+        self.opening = float(self.rng.uniform(0.08, 0.22))
+        self.disturbance = float(self.rng.uniform(0.02, 0.12))
+        self.target_template = int(self.rng.integers(4, self.num_templates - 4))
+        self.target_depth = float(self.rng.uniform(0.15, 0.45))
+        self.holding = False
+        self.transferred = False
+        self.retrieved = False
+        self.actor_progress = 0.0
+        self.last_actor_template = self.target_template
+        privileged_state = self.get_privileged_state()
+        self.visibility_trace = [float(privileged_state["visibility"])]
+        self.corridor_trace = [float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any())]
+        return self.get_observation(privileged_state), privileged_state
+    def _normalized_template(self, template_index: int) -> float:
+        return (template_index / float(self.num_templates - 1)) * 2.0 - 1.0
+    def _current_support_mode(self) -> SupportMode:
+        if self.holding:
+            return SupportMode.HOLD
+        if self.transferred:
+            return SupportMode.TRANSFER
+        return SupportMode.PASSIVE
+    def _mode_from_action(self, action: np.ndarray) -> SupportMode:
+        hold_score = (np.tanh(float(action[6])) + 1.0) * 0.5
+        transfer_score = (np.tanh(float(action[1])) + 1.0) * 0.5
+        passive_score = (np.tanh(float(action[2])) + 1.0) * 0.5
+        if hold_score >= max(transfer_score, passive_score):
+            return SupportMode.HOLD
+        if transfer_score >= passive_score and self.opening >= 0.32:
+            return SupportMode.TRANSFER
+        return SupportMode.PASSIVE
+    def _visibility(self, opening: float | None = None, disturbance: float | None = None) -> float:
+        opening = self.opening if opening is None else float(opening)
+        disturbance = self.disturbance if disturbance is None else float(disturbance)
+        visibility = (
+            1.35 * opening
+            - 0.58 * disturbance
+            - 0.25 * self.target_depth
+            + self.dynamics.visibility_bias
+        )
+        return float(np.clip(visibility, 0.0, 1.0))
+    def _mode_factor(self, mode: SupportMode) -> float:
+        if mode == SupportMode.HOLD:
+            return 1.0
+        if mode == SupportMode.TRANSFER:
+            return self.dynamics.transfer_support_factor
+        return self.dynamics.passive_support_factor
+    def _mode_decay(self, mode: SupportMode) -> float:
+        if mode == SupportMode.HOLD:
+            return self.dynamics.hold_decay
+        if mode == SupportMode.TRANSFER:
+            return self.dynamics.transfer_decay
+        return self.dynamics.passive_decay
+    def _corridor_for_mode(
+        self,
+        mode: SupportMode,
+        opening: float | None = None,
+        disturbance: float | None = None,
+    ) -> np.ndarray:
+        opening = self.opening if opening is None else float(opening)
+        disturbance = self.disturbance if disturbance is None else float(disturbance)
+        visibility = self._visibility(opening, disturbance)
+        effective = opening * self._mode_factor(mode) - 0.35 * disturbance - 0.18 * self.target_depth
+        width = int(np.floor(max(0.0, effective) * 8.0))
+        corridor = np.zeros((self.num_templates,), dtype=np.float32)
+        if visibility < self.dynamics.retrieve_visibility_threshold * 0.7 or width <= 0:
+            return corridor
+        low = max(0, self.target_template - width)
+        high = min(self.num_templates, self.target_template + width + 1)
+        corridor[low:high] = 1.0
+        return corridor
+    def _persistence_for_mode(self, mode: SupportMode) -> float:
+        opening = self.opening
+        disturbance = self.disturbance
+        persisted = 0.0
+        for _ in range(self.rollout_horizon):
+            if self._corridor_for_mode(mode, opening, disturbance).any():
+                persisted += 1.0
+            else:
+                break
+            opening = float(np.clip(opening - self._mode_decay(mode) + (0.035 if mode == SupportMode.HOLD else 0.0), 0.0, 1.0))
+            disturbance = float(np.clip(disturbance * (1.0 - self.dynamics.settle_rate), 0.0, 1.0))
+        return persisted
+    def _belief_map(self, visibility: float) -> np.ndarray:
+        side = 32
+        x = np.linspace(0.0, 1.0, side, dtype=np.float32)
+        y = np.linspace(0.0, 1.0, side, dtype=np.float32)
+        yy, xx = np.meshgrid(y, x, indexing="ij")
+        center_x = self.target_template / float(self.num_templates - 1)
+        center_y = 0.72 - 0.25 * self.target_depth
+        sigma = 0.08 + 0.05 * (1.0 - visibility)
+        belief = np.exp(-(((xx - center_x) ** 2) + ((yy - center_y) ** 2)) / (2.0 * sigma**2))
+        belief *= visibility
+        return belief.astype(np.float32)
+    def get_privileged_state(self) -> dict[str, Any]:
+        support_mode = int(self._current_support_mode())
+        corridor = np.stack(
+            [self._corridor_for_mode(mode) for mode in SupportMode],
+            axis=0,
+        )
+        persistence = np.asarray([self._persistence_for_mode(mode) for mode in SupportMode], dtype=np.float32)
+        visibility = self._visibility()
+        disturbance_cost = float(np.clip(self.disturbance + 0.08 * max(0.0, self.opening - self.dynamics.desired_opening), 0.0, 1.0))
+        return {
+            "support_mode": support_mode,
+            "corridor_feasible": corridor,
+            "persistence_horizon": persistence,
+            "disturbance_cost": disturbance_cost,
+            "belief_map": self._belief_map(visibility),
+            "visibility": visibility,
+            "retrieval_success": bool(self.retrieved),
+            "target_template": self.target_template,
+        }
+    def render_state(self, privileged_state: dict[str, Any] | None = None) -> dict[str, Any]:
+        privileged_state = privileged_state or self.get_privileged_state()
+        current_mode = int(privileged_state["support_mode"])
+        return {
+            "opening": float(self.opening),
+            "disturbance": float(self.disturbance),
+            "target_template": int(self.target_template),
+            "support_mode": current_mode,
+            "visibility": float(privileged_state["visibility"]),
+            "actor_template": int(self.last_actor_template),
+            "actor_progress": float(self.actor_progress),
+            "corridor_current": privileged_state["corridor_feasible"][current_mode].astype(np.float32),
+            "step_fraction": float(self.step_count / max(1, self.max_steps)),
+        }
+    def _proprio(self, privileged_state: dict[str, Any]) -> np.ndarray:
+        mode = privileged_state["support_mode"]
+        features = np.zeros((32,), dtype=np.float32)
+        features[0] = self.opening
+        features[1] = self.disturbance
+        features[2] = privileged_state["visibility"]
+        features[3 + mode] = 1.0
+        features[6] = self.target_template / float(self.num_templates - 1)
+        features[7] = self.last_actor_template / float(self.num_templates - 1)
+        features[8] = self.step_count / float(max(1, self.max_steps))
+        features[9:12] = privileged_state["persistence_horizon"] / float(self.rollout_horizon)
+        features[12] = float(privileged_state["corridor_feasible"][mode].any())
+        features[13] = float(self.retrieved)
+        features[14] = self.actor_progress
+        return features
+    def get_observation(self, privileged_state: dict[str, Any] | None = None) -> dict[str, Any]:
+        privileged_state = privileged_state or self.get_privileged_state()
+        render_state = self.render_state(privileged_state)
+        images = render_views_from_state(
+            proxy_name=self.proxy_name,
+            render_state=render_state,
+            resolution=self.resolution,
+            num_templates=self.num_templates,
+        )
+        return {
+            "images": np.stack([images[camera] for camera in self.camera_names], axis=0),
+            "proprio": self._proprio(privileged_state),
+            "text": PROXY_GOALS[self.proxy_name],
+            "camera_names": self.camera_names,
+            "render_state": render_state,
+        }
+    def teacher_action(self) -> np.ndarray:
+        privileged_state = self.get_privileged_state()
+        preferred_mode = self.dynamics.preferred_mode
+        if self.opening < self.dynamics.desired_opening:
+            chosen_mode = SupportMode.HOLD
+            open_cmd = 0.95
+        elif privileged_state["persistence_horizon"][preferred_mode] >= 2.0:
+            chosen_mode = preferred_mode
+            open_cmd = 0.12
+        else:
+            chosen_mode = SupportMode.HOLD
+            open_cmd = 0.30
+        corridor = privileged_state["corridor_feasible"][int(chosen_mode)]
+        actor_ready = bool(corridor[self.target_template] > 0.5)
+        retrieve = (
+            actor_ready
+            and privileged_state["visibility"] >= self.dynamics.retrieve_visibility_threshold
+            and self.actor_progress >= 0.55
+        )
+        action = np.zeros((14,), dtype=np.float32)
+        action[0] = np.float32(open_cmd)
+        action[1] = np.float32(1.0 if chosen_mode == SupportMode.TRANSFER else -1.0)
+        action[2] = np.float32(1.0 if chosen_mode == SupportMode.PASSIVE else -1.0)
+        action[6] = np.float32(1.0 if chosen_mode == SupportMode.HOLD else -1.0)
+        action[7] = np.float32(self._normalized_template(self.target_template))
+        action[8] = np.float32(1.0 if actor_ready else 0.2)
+        action[13] = np.float32(1.0 if retrieve else -1.0)
+        return action
+    def teacher_chunk_and_rollout(
+        self,
+        chunk_horizon: int = 8,
+        rollout_horizon: int | None = None,
+    ) -> tuple[np.ndarray, dict[str, np.ndarray]]:
+        rollout_horizon = rollout_horizon or self.rollout_horizon
+        snapshot = self.clone_state()
+        action_chunk: list[np.ndarray] = []
+        rollout_support_mode = []
+        rollout_corridor = []
+        rollout_persistence = []
+        rollout_disturbance = []
+        for step in range(chunk_horizon):
+            action = self.teacher_action()
+            action_chunk.append(action)
+            _, _, terminated, truncated, privileged_state = self.step(action)
+            if step < rollout_horizon:
+                rollout_support_mode.append(privileged_state["support_mode"])
+                rollout_corridor.append(privileged_state["corridor_feasible"])
+                rollout_persistence.append(privileged_state["persistence_horizon"])
+                rollout_disturbance.append(privileged_state["disturbance_cost"])
+            if terminated or truncated:
+                break
+        while len(action_chunk) < chunk_horizon:
+            action_chunk.append(np.zeros((14,), dtype=np.float32))
+        while len(rollout_support_mode) < rollout_horizon:
+            rollout_support_mode.append(int(self._current_support_mode()))
+            rollout_corridor.append(self.get_privileged_state()["corridor_feasible"])
+            rollout_persistence.append(self.get_privileged_state()["persistence_horizon"])
+            rollout_disturbance.append(self.get_privileged_state()["disturbance_cost"])
+        self.restore_state(snapshot)
+        return np.stack(action_chunk, axis=0).astype(np.float32), {
+            "rollout_support_mode": np.asarray(rollout_support_mode, dtype=np.int64),
+            "rollout_corridor_feasible": np.asarray(rollout_corridor, dtype=np.float32),
+            "rollout_persistence_horizon": np.asarray(rollout_persistence, dtype=np.float32),
+            "rollout_disturbance_cost": np.asarray(rollout_disturbance, dtype=np.float32),
+        }
+    def step(self, action: np.ndarray) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
+        action = np.asarray(action, dtype=np.float32)
+        mode = self._mode_from_action(action)
+        self.holding = mode == SupportMode.HOLD
+        self.transferred = mode == SupportMode.TRANSFER
+        open_cmd = float(np.clip(action[0], -1.0, 1.0))
+        actor_reach = float((np.tanh(float(action[8])) + 1.0) * 0.5)
+        retrieve_cmd = float((np.tanh(float(action[13])) + 1.0) * 0.5)
+        self.last_actor_template = int(
+            np.clip(
+                round(((float(np.clip(action[7], -1.0, 1.0)) + 1.0) * 0.5) * (self.num_templates - 1)),
+                0,
+                self.num_templates - 1,
+            )
+        )
+        support_bonus = {SupportMode.HOLD: 0.08, SupportMode.TRANSFER: 0.04, SupportMode.PASSIVE: 0.0}[mode]
+        closure = self._mode_decay(mode)
+        self.opening = float(
+            np.clip(
+                self.opening + 0.16 * open_cmd + support_bonus - closure - 0.05 * self.disturbance,
+                0.0,
+                1.0,
+            )
+        )
+        self.disturbance = float(
+            np.clip(
+                self.disturbance
+                + self.dynamics.disturbance_gain * abs(open_cmd)
+                + 0.025 * actor_reach
+                + 0.05 * max(0.0, self.opening - self.dynamics.desired_opening)
+                - self.dynamics.settle_rate,
+                0.0,
+                1.0,
+            )
+        )
+        self.step_count += 1
+        privileged_state = self.get_privileged_state()
+        corridor = privileged_state["corridor_feasible"][privileged_state["support_mode"]]
+        if corridor[self.last_actor_template] > 0.5 and actor_reach >= 0.55:
+            persistence_ratio = privileged_state["persistence_horizon"][privileged_state["support_mode"]] / float(
+                max(1, self.rollout_horizon)
+            )
+            self.actor_progress = float(np.clip(self.actor_progress + 0.55 * persistence_ratio, 0.0, 1.0))
+            shock = 0.16 * max(0.0, 0.8 - persistence_ratio)
+            if shock > 0.0:
+                self.opening = float(np.clip(self.opening - shock, 0.0, 1.0))
+                privileged_state = self.get_privileged_state()
+                corridor = privileged_state["corridor_feasible"][privileged_state["support_mode"]]
+        else:
+            self.actor_progress = float(np.clip(self.actor_progress - 0.20, 0.0, 1.0))
+        success = bool(
+            retrieve_cmd >= 0.55
+            and self.actor_progress >= 0.80
+            and corridor[self.last_actor_template] > 0.5
+            and privileged_state["visibility"] >= self.dynamics.retrieve_visibility_threshold
+            and self.disturbance < 0.9
+        )
+        if success:
+            self.retrieved = True
+            privileged_state["retrieval_success"] = True
+        self.visibility_trace.append(float(privileged_state["visibility"]))
+        self.corridor_trace.append(float(corridor.any()))
+        reward = 1.0 if success else (0.08 * privileged_state["visibility"] - 0.03 * privileged_state["disturbance_cost"])
+        terminated = bool(self.retrieved)
+        truncated = bool(self.step_count >= self.max_steps)
+        return self.get_observation(privileged_state), float(reward), terminated, truncated, privileged_state
+def render_views_from_state(
+    proxy_name: str,
+    render_state: dict[str, Any],
+    resolution: int,
+    num_templates: int = 32,
+) -> dict[str, np.ndarray]:
+    dynamics = PROXY_DYNAMICS[proxy_name]
+    opening = float(render_state["opening"])
+    disturbance = float(render_state["disturbance"])
+    target_template = int(render_state["target_template"])
+    support_mode = int(render_state["support_mode"])
+    visibility = float(render_state["visibility"])
+    actor_template = int(render_state["actor_template"])
+    actor_progress = float(render_state["actor_progress"])
+    corridor_current = np.asarray(render_state["corridor_current"], dtype=np.float32)
+    step_fraction = float(render_state["step_fraction"])
+    height = width = resolution
+    base = np.ones((height, width, 3), dtype=np.float32)
+    base *= np.asarray(dynamics.palette, dtype=np.float32)
+    x = np.linspace(0.0, 1.0, width, dtype=np.float32)
+    y = np.linspace(0.0, 1.0, height, dtype=np.float32)
+    yy, xx = np.meshgrid(y, x, indexing="ij")
+    center_x = target_template / float(max(1, num_templates - 1))
+    gap_width = 0.04 + 0.18 * opening
+    gap_mask = np.abs(xx - center_x) <= gap_width
+    stripe_mask = (np.sin(xx * np.pi * 18.0) > 0.2).astype(np.float32)
+    front = base.copy()
+    front[..., 1] += 0.22 * stripe_mask
+    front[..., 0] += 0.07 * stripe_mask
+    front[gap_mask, :] = np.clip(front[gap_mask, :] + np.asarray([0.18, 0.18, 0.18], dtype=np.float32), 0.0, 1.0)
+    target_mask = ((xx - center_x) ** 2 + (yy - 0.76) ** 2) <= (0.03 + 0.015 * visibility) ** 2
+    front[target_mask, 0] = np.clip(front[target_mask, 0] + 0.55 * visibility, 0.0, 1.0)
+    front[target_mask, 1] *= 0.55
+    front[..., 2] = np.clip(front[..., 2] + 0.18 * disturbance + 0.05 * step_fraction, 0.0, 1.0)
+    wrist_left = np.full((height, width, 3), 0.12, dtype=np.float32)
+    open_rows = int(opening * height)
+    wrist_left[height - open_rows :, : width // 3, 1] = 0.75
+    wrist_left[height - int(disturbance * height) :, width // 3 : (2 * width) // 3, 0] = 0.85
+    mode_colors = {
+        SupportMode.HOLD: np.asarray([0.92, 0.82, 0.16], dtype=np.float32),
+        SupportMode.TRANSFER: np.asarray([0.16, 0.78, 0.92], dtype=np.float32),
+        SupportMode.PASSIVE: np.asarray([0.86, 0.86, 0.86], dtype=np.float32),
+    }
+    wrist_left[:, (2 * width) // 3 :, :] = mode_colors[SupportMode(support_mode)]
+    wrist_right = np.full((height, width, 3), 0.08, dtype=np.float32)
+    template_edges = np.linspace(0, width, num_templates + 1, dtype=np.int32)
+    for template_idx in range(num_templates):
+        col_start = template_edges[template_idx]
+        col_end = template_edges[template_idx + 1]
+        if corridor_current[template_idx] > 0.5:
+            wrist_right[:, col_start:col_end, 1] = 0.70
+        if template_idx == target_template:
+            wrist_right[:, col_start:col_end, 0] = 0.78
+        if template_idx == actor_template:
+            wrist_right[:, col_start:col_end, 2] = 0.90
+    wrist_right[: max(1, int(visibility * height)), :, :] += 0.10
+    wrist_right[height - max(1, int(actor_progress * height)) :, :, 2] += 0.12
+    wrist_right = np.clip(wrist_right, 0.0, 1.0)
+    return {
+        "front": (front * 255.0).astype(np.uint8),
+        "wrist_left": (wrist_left * 255.0).astype(np.uint8),
+        "wrist_right": (wrist_right * 255.0).astype(np.uint8),
+    }