lsnu commited on Apr 1

Commit

bfb9665

verified ·

1 Parent(s): 5fbeceb

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep25/command.txt +1 -0
artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep25/stdout.txt +121 -0
artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep25/summary.json +35 -0
artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep5/stderr.txt +35 -0
artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep5/summary.json +41 -0
artifacts/reports/proxy_base_fast_eval/active/reveal_benchmark.json +0 -0
artifacts/reports/proxy_base_fast_eval/active/reveal_benchmark.md +17 -0
artifacts/reports/proxy_base_fast_eval/noop/reveal_benchmark.json +0 -0
artifacts/reports/proxy_base_fast_eval/noop/reveal_benchmark.md +17 -0
artifacts/reports/proxy_rank_fast_eval/active/reveal_benchmark.json +0 -0
artifacts/reports/proxy_rank_fast_eval/active/reveal_benchmark.md +17 -0
artifacts/reports/proxy_rank_fast_eval/candidate0/reveal_benchmark.json +0 -0
artifacts/reports/proxy_rank_fast_eval/candidate0/reveal_benchmark.md +17 -0
artifacts/reports/proxy_rank_fast_eval/noop/reveal_benchmark.json +0 -0
artifacts/reports/proxy_rank_fast_eval/noop/reveal_benchmark.md +17 -0
artifacts/reports/proxy_rank_fast_eval/oracle/reveal_benchmark.json +0 -0
artifacts/reports/proxy_rank_fast_eval/oracle/reveal_benchmark.md +17 -0
artifacts/reports/proxy_rank_only_live/active/reveal_benchmark.md +17 -0
artifacts/reports/proxy_rank_only_rebuild128_smoke/active/reveal_benchmark.json +0 -0
artifacts/reports/proxy_rank_only_rebuild128_smoke/active/reveal_benchmark.md +17 -0
artifacts/reports/proxy_rank_only_rebuild128_smoke/noop/reveal_benchmark.json +0 -0
artifacts/reports/proxy_rank_only_rebuild128_smoke/noop/reveal_benchmark.md +17 -0
artifacts/reports/proxy_semantic_nowm_quick12_final/reveal_benchmark.json +0 -0
artifacts/reports/proxy_semantic_nowm_quick12_final/reveal_benchmark.md +17 -0
code/VLAarchtests2_code/CHANGE_AND_TEST_LOG.md +221 -0
code/VLAarchtests2_code/MODEL_AND_ARTIFACT_INDEX.md +59 -0
code/VLAarchtests2_code/README.md +301 -0
code/VLAarchtests2_code/RESULTS_RAW.md +178 -0
code/VLAarchtests2_code/VLAarchtests/MODEL_INDEX.md +81 -0
code/VLAarchtests2_code/VLAarchtests/README.md +172 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/generated_configs/reveal_proxy_sprint_benchmark_v7.json +2702 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/ablations_v7/ablations.md +92 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json +67 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.md +27 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/status.md +189 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/controller_sweep_v7/controller_sweep.md +96 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.json +49 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.md +54 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proposal_alignment_diagnostics_v7/proposal_alignment_diagnostics.json +718 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proposal_alignment_diagnostics_v7/proposal_alignment_diagnostics.md +65 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proxy_diagnostics_v7/proxy_diagnostics.json +16 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/real_baseline_compare_v7_full/reveal_benchmark.md +38 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/summary.json +42 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/summary.md +16 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_full/reveal_benchmark.json +0 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_full/reveal_benchmark.md +17 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_smoke/reveal_benchmark.json +566 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_smoke/reveal_benchmark.md +17 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_factorization_v7/fullset_planner_score_off/reveal_benchmark.json +0 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_factorization_v7/fullset_planner_score_off/reveal_benchmark.md +17 -0

artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep25/command.txt ADDED Viewed

	@@ -0,0 +1 @@

+ /workspace/envs/rlbench/bin/python /workspace/third_party/AnyBimanual/eval.py method=PERACT_BC framework.logdir=/workspace/baselines/AnyBimanual_release_eval_live framework.start_seed=0 framework.eval_type=60000 framework.eval_episodes=25 framework.eval_envs=1 framework.gpu=0 rlbench.task_name=perlf_release_dual_push_buttons_smoke1 rlbench.tasks=[dual_push_buttons] rlbench.demo_path=/workspace/baselines/AnyBimanual_subset3_demo_root rlbench.headless=True rlbench.gripper_mode=BimanualDiscrete rlbench.arm_action_mode=BimanualEndEffectorPoseViaPlanning rlbench.action_mode=BimanualMoveArmThenGripper

artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep25/stdout.txt ADDED Viewed

	@@ -0,0 +1,121 @@

+[2026-03-31 23:32:28,801][root][INFO] -
+method:
+  name: PERACT_BC
+  agent_type: leader_follower
+  robot_name: bimanual
+  image_crop_size: 64
+  bounds_offset:
+  - 0.15
+  voxel_sizes:
+  - 100
+  include_prev_layer: false
+  num_latents: 2048
+  latent_dim: 512
+  transformer_depth: 6
+  transformer_iterations: 1
+  cross_heads: 1
+  cross_dim_head: 64
+  latent_heads: 8
+  latent_dim_head: 64
+  pos_encoding_with_lang: true
+  conv_downsample: true
+  lang_fusion_type: seq
+  voxel_patch_size: 5
+  voxel_patch_stride: 5
+  final_dim: 64
+  low_dim_size: 4
+  input_dropout: 0.1
+  attn_dropout: 0.1
+  decoder_dropout: 0.0
+  lr: 0.0005
+  lr_scheduler: false
+  num_warmup_steps: 3000
+  optimizer: lamb
+  lambda_weight_l2: 1.0e-06
+  trans_loss_weight: 1.0
+  rot_loss_weight: 1.0
+  grip_loss_weight: 1.0
+  collision_loss_weight: 1.0
+  rotation_resolution: 5
+  activation: lrelu
+  norm: None
+  crop_augmentation: true
+  transform_augmentation:
+    apply_se3: true
+    aug_xyz:
+    - 0.125
+    - 0.125
+    - 0.125
+    aug_rpy:
+    - 0.0
+    - 0.0
+    - 45.0
+    aug_rot_resolution: ${method.rotation_resolution}
+  demo_augmentation: true
+  demo_augmentation_every_n: 10
+  no_skip_connection: false
+  no_perceiver: false
+  no_language: false
+  keypoint_method: heuristic
+rlbench:
+  task_name: perlf_release_dual_push_buttons_smoke1
+  tasks:
+  - dual_push_buttons
+  demo_path: /workspace/baselines/AnyBimanual_subset3_demo_root
+  episode_length: 25
+  cameras:
+  - over_shoulder_left
+  - over_shoulder_right
+  - overhead
+  - wrist_right
+  - wrist_left
+  - front
+  camera_resolution:
+  - 256
+  - 256
+  scene_bounds:
+  - -0.3
+  - -0.5
+  - 0.6
+  - 0.7
+  - 0.5
+  - 1.6
+  include_lang_goal_in_obs: true
+  time_in_state: true
+  headless: true
+  gripper_mode: BimanualDiscrete
+  arm_action_mode: BimanualEndEffectorPoseViaPlanning
+  action_mode: BimanualMoveArmThenGripper
+framework:
+  tensorboard_logging: true
+  csv_logging: true
+  gpu: 0
+  logdir: /workspace/baselines/AnyBimanual_release_eval_live
+  start_seed: 0
+  record_every_n: 5
+  eval_envs: 1
+  eval_from_eps_number: 0
+  eval_episodes: 25
+  eval_type: 60000
+  eval_save_metrics: true
+cinematic_recorder:
+  enabled: false
+  camera_resolution:
+  - 1280
+  - 720
+  fps: 30
+  rotate_speed: 0.005
+  save_path: /tmp/videos/
+[2026-03-31 23:32:28,811][root][INFO] - Using env device cuda:0.
+[2026-03-31 23:32:28,812][root][INFO] - Evaluating seed 0.
+[2026-03-31 23:32:28,812][root][INFO] - Using method PERACT_BC with type leader_follower
+Weight: [60000]
+[03/31/26 23:33:05] INFO     INFO:root:eval_env:  _independent_env_runner.py:131
+                             Launching env.
+                    INFO     INFO:root:Agent      _independent_env_runner.py:134
+                             information:
+                    INFO     INFO:root:<yarr.agen _independent_env_runner.py:135
+                             ts.agent.LeaderFollo
+                             werAgent object at
+                             0x77975ba9b970>

artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep25/summary.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "adapter_mode": "trunk_only",
+  "task_name": "perlf_release_dual_push_buttons_smoke1",
+  "tasks": [
+    "dual_push_buttons"
+  ],
+  "task_families": [
+    "generic"
+  ],
+  "passthrough_only": true,
+  "passthrough_reason": "generic_task_family",
+  "episodes_requested": 25,
+  "episode_scores": [],
+  "mean_score": 0.0,
+  "final_score": null,
+  "subprocess_returncode": 0,
+  "command": [
+    "/workspace/envs/rlbench/bin/python",
+    "/workspace/third_party/AnyBimanual/eval.py",
+    "method=PERACT_BC",
+    "framework.logdir=/workspace/baselines/AnyBimanual_release_eval_live",
+    "framework.start_seed=0",
+    "framework.eval_type=60000",
+    "framework.eval_episodes=25",
+    "framework.eval_envs=1",
+    "framework.gpu=0",
+    "rlbench.task_name=perlf_release_dual_push_buttons_smoke1",
+    "rlbench.tasks=[dual_push_buttons]",
+    "rlbench.demo_path=/workspace/baselines/AnyBimanual_subset3_demo_root",
+    "rlbench.headless=True",
+    "rlbench.gripper_mode=BimanualDiscrete",
+    "rlbench.arm_action_mode=BimanualEndEffectorPoseViaPlanning",
+    "rlbench.action_mode=BimanualMoveArmThenGripper"
+  ]
+}

artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep5/stderr.txt ADDED Viewed

	@@ -0,0 +1,35 @@

+/workspace/third_party/AnyBimanual/eval.py:183: UserWarning:
+The version_base parameter is not specified.
+Please specify a compatability version level, or None.
+Will assume defaults for version 1.1
+  @hydra.main(config_name="eval", config_path="conf")
+/workspace/envs/rlbench/lib/python3.10/site-packages/hydra/_internal/defaults_list.py:251: UserWarning: In 'eval': Defaults list is missing `_self_`. See https://hydra.cc/docs/1.2/upgrades/1.0_to_1.1/default_composition_order for more information
+  warnings.warn(msg, UserWarning)
+/workspace/envs/rlbench/lib/python3.10/site-packages/hydra/core/default_element.py:124: UserWarning: In 'method/PERACT_BC': Usage of deprecated keyword in package header '# @package _group_'.
+See https://hydra.cc/docs/1.2/upgrades/1.0_to_1.1/changes_to_package_header for more information
+  deprecation_warning(
+/workspace/envs/rlbench/lib/python3.10/site-packages/hydra/_internal/hydra.py:119: UserWarning: Future Hydra versions will no longer change working directory at job runtime by default.
+See https://hydra.cc/docs/1.2/upgrades/1.1_to_1.2/changes_to_job_working_dir/ for more information.
+  ret = run_job(
+/workspace/envs/rlbench/lib/python3.10/site-packages/wandb/apis/public.py:3109: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+  from pkg_resources import parse_version
+/workspace/third_party/AnyBimanual/eval.py:183: UserWarning:
+The version_base parameter is not specified.
+Please specify a compatability version level, or None.
+Will assume defaults for version 1.1
+  @hydra.main(config_name="eval", config_path="conf")
+/workspace/third_party/AnyBimanual/eval.py:183: UserWarning:
+The version_base parameter is not specified.
+Please specify a compatability version level, or None.
+Will assume defaults for version 1.1
+  @hydra.main(config_name="eval", config_path="conf")
+/workspace/envs/rlbench/lib/python3.10/site-packages/wandb/apis/public.py:3109: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+  from pkg_resources import parse_version
+/workspace/third_party/AnyBimanual/eval.py:183: UserWarning:
+The version_base parameter is not specified.
+Please specify a compatability version level, or None.
+Will assume defaults for version 1.1
+  @hydra.main(config_name="eval", config_path="conf")
+/workspace/third_party/YARR/yarr/utils/rollout_generator.py:73: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at /opt/conda/conda-bld/pytorch_1716905979055/work/torch/csrc/utils/tensor_new.cpp:274.)
+  prepped_data = {k: torch.tensor([v], device=self._env_device) for k, v in obs_history.items()}
+[W CudaIPCTypes.cpp:16] Producer process has been terminated before all shared CUDA tensors released. See Note [Sharing CUDA tensors]

artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep5/summary.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "adapter_mode": "trunk_only",
+  "task_name": "perlf_release_dual_push_buttons_smoke1",
+  "tasks": [
+    "dual_push_buttons"
+  ],
+  "task_families": [
+    "generic"
+  ],
+  "passthrough_only": true,
+  "passthrough_reason": "generic_task_family",
+  "episodes_requested": 5,
+  "episode_scores": [
+    0.0,
+    0.0,
+    100.0,
+    0.0,
+    0.0,
+    20.0
+  ],
+  "mean_score": 20.0,
+  "subprocess_returncode": 0,
+  "command": [
+    "/workspace/envs/rlbench/bin/python",
+    "/workspace/third_party/AnyBimanual/eval.py",
+    "method=PERACT_BC",
+    "framework.logdir=/workspace/baselines/AnyBimanual_release_eval_live",
+    "framework.start_seed=0",
+    "framework.eval_type=60000",
+    "framework.eval_episodes=5",
+    "framework.eval_envs=1",
+    "framework.gpu=0",
+    "rlbench.task_name=perlf_release_dual_push_buttons_smoke1",
+    "rlbench.tasks=[dual_push_buttons]",
+    "rlbench.demo_path=/workspace/baselines/AnyBimanual_subset3_demo_root",
+    "rlbench.headless=True",
+    "rlbench.gripper_mode=BimanualDiscrete",
+    "rlbench.arm_action_mode=BimanualEndEffectorPoseViaPlanning",
+    "rlbench.action_mode=BimanualMoveArmThenGripper"
+  ]
+}

artifacts/reports/proxy_base_fast_eval/active/reveal_benchmark.json ADDED Viewed

The diff for this file is too large to render. See raw diff

artifacts/reports/proxy_base_fast_eval/active/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Reveal Proxy Benchmark
+## adapter
+- controller: model
+- checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_fast_seed17/checkpoint_best.pt
+- episodes: 24.000
+- mean_success: 0.000
+- visibility_integral: 0.160
+- corridor_availability: 0.002
+- reocclusion_rate: 0.002
+- disturbance_cost: 0.676
+- premature_retrieve_rate: 0.835
+- reocclusion_after_reveal_rate: 0.000
+- planner_regret: 0.000
+- foliage_success: 0.000
+- bag_success: 0.000
+- cloth_success: 0.000

artifacts/reports/proxy_base_fast_eval/noop/reveal_benchmark.json ADDED Viewed

The diff for this file is too large to render. See raw diff

artifacts/reports/proxy_base_fast_eval/noop/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Reveal Proxy Benchmark
+## adapter
+- controller: model
+- checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_fast_seed17/checkpoint_best.pt
+- episodes: 24.000
+- mean_success: 0.000
+- visibility_integral: 0.160
+- corridor_availability: 0.002
+- reocclusion_rate: 0.002
+- disturbance_cost: 0.676
+- premature_retrieve_rate: 0.835
+- reocclusion_after_reveal_rate: 0.000
+- planner_regret: 0.046
+- foliage_success: 0.000
+- bag_success: 0.000
+- cloth_success: 0.000

artifacts/reports/proxy_rank_fast_eval/active/reveal_benchmark.json ADDED Viewed

The diff for this file is too large to render. See raw diff

artifacts/reports/proxy_rank_fast_eval/active/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Reveal Proxy Benchmark
+## adapter
+- controller: model
+- checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_fast_seed17/checkpoint_best.pt
+- episodes: 24.000
+- mean_success: 0.000
+- visibility_integral: 0.160
+- corridor_availability: 0.002
+- reocclusion_rate: 0.002
+- disturbance_cost: 0.676
+- premature_retrieve_rate: 0.835
+- reocclusion_after_reveal_rate: 0.000
+- planner_regret: 0.000
+- foliage_success: 0.000
+- bag_success: 0.000
+- cloth_success: 0.000

artifacts/reports/proxy_rank_fast_eval/candidate0/reveal_benchmark.json ADDED Viewed

The diff for this file is too large to render. See raw diff

artifacts/reports/proxy_rank_fast_eval/candidate0/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Reveal Proxy Benchmark
+## adapter
+- controller: candidate0
+- checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_fast_seed17/checkpoint_best.pt
+- episodes: 24.000
+- mean_success: 0.000
+- visibility_integral: 0.160
+- corridor_availability: 0.002
+- reocclusion_rate: 0.002
+- disturbance_cost: 0.676
+- premature_retrieve_rate: 0.835
+- reocclusion_after_reveal_rate: 0.000
+- planner_regret: 0.046
+- foliage_success: 0.000
+- bag_success: 0.000
+- cloth_success: 0.000

artifacts/reports/proxy_rank_fast_eval/noop/reveal_benchmark.json ADDED Viewed

The diff for this file is too large to render. See raw diff

artifacts/reports/proxy_rank_fast_eval/noop/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Reveal Proxy Benchmark
+## adapter
+- controller: model
+- checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_fast_seed17/checkpoint_best.pt
+- episodes: 24.000
+- mean_success: 0.000
+- visibility_integral: 0.160
+- corridor_availability: 0.002
+- reocclusion_rate: 0.002
+- disturbance_cost: 0.676
+- premature_retrieve_rate: 0.835
+- reocclusion_after_reveal_rate: 0.000
+- planner_regret: 0.046
+- foliage_success: 0.000
+- bag_success: 0.000
+- cloth_success: 0.000

artifacts/reports/proxy_rank_fast_eval/oracle/reveal_benchmark.json ADDED Viewed

The diff for this file is too large to render. See raw diff

artifacts/reports/proxy_rank_fast_eval/oracle/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Reveal Proxy Benchmark
+## adapter
+- controller: oracle
+- checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_fast_seed17/checkpoint_best.pt
+- episodes: 24.000
+- mean_success: 0.000
+- visibility_integral: 10.234
+- corridor_availability: 0.173
+- reocclusion_rate: 0.008
+- disturbance_cost: 0.655
+- premature_retrieve_rate: 0.811
+- reocclusion_after_reveal_rate: 0.250
+- planner_regret: 0.000
+- foliage_success: 0.000
+- bag_success: 0.000
+- cloth_success: 0.000

artifacts/reports/proxy_rank_only_live/active/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Reveal Proxy Benchmark
+## adapter
+- controller: model
+- checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_fast_seed17/checkpoint_best.pt
+- episodes: 300.000
+- mean_success: 0.000
+- visibility_integral: 0.135
+- corridor_availability: 0.000
+- reocclusion_rate: 0.000
+- disturbance_cost: 0.675
+- premature_retrieve_rate: 0.825
+- reocclusion_after_reveal_rate: 0.000
+- planner_regret: 0.001
+- foliage_success: 0.000
+- bag_success: 0.000
+- cloth_success: 0.000

artifacts/reports/proxy_rank_only_rebuild128_smoke/active/reveal_benchmark.json ADDED Viewed

The diff for this file is too large to render. See raw diff

artifacts/reports/proxy_rank_only_rebuild128_smoke/active/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Reveal Proxy Benchmark
+## adapter
+- controller: model
+- checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_rebuild128_seed17/checkpoint_best.pt
+- episodes: 72.000
+- mean_success: 0.000
+- visibility_integral: 2.161
+- corridor_availability: 0.029
+- reocclusion_rate: 0.020
+- disturbance_cost: 0.746
+- premature_retrieve_rate: 0.363
+- reocclusion_after_reveal_rate: 0.250
+- planner_regret: 0.010
+- foliage_success: 0.000
+- bag_success: 0.000
+- cloth_success: 0.000

artifacts/reports/proxy_rank_only_rebuild128_smoke/noop/reveal_benchmark.json ADDED Viewed

The diff for this file is too large to render. See raw diff

artifacts/reports/proxy_rank_only_rebuild128_smoke/noop/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Reveal Proxy Benchmark
+## adapter
+- controller: model
+- checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_rebuild128_seed17/checkpoint_best.pt
+- episodes: 72.000
+- mean_success: 0.000
+- visibility_integral: 2.161
+- corridor_availability: 0.029
+- reocclusion_rate: 0.020
+- disturbance_cost: 0.746
+- premature_retrieve_rate: 0.363
+- reocclusion_after_reveal_rate: 0.250
+- planner_regret: 0.021
+- foliage_success: 0.000
+- bag_success: 0.000
+- cloth_success: 0.000

artifacts/reports/proxy_semantic_nowm_quick12_final/reveal_benchmark.json ADDED Viewed

The diff for this file is too large to render. See raw diff

artifacts/reports/proxy_semantic_nowm_quick12_final/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Reveal Proxy Benchmark
+## patched
+- controller: model
+- checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_reuse128_seed17/checkpoint_best.pt
+- episodes: 36.000
+- mean_success: 0.667
+- visibility_integral: 19.950
+- corridor_availability: 0.797
+- reocclusion_rate: 0.003
+- disturbance_cost: 0.284
+- premature_retrieve_rate: 0.372
+- reocclusion_after_reveal_rate: 0.222
+- planner_regret: 0.159
+- foliage_success: 0.667
+- bag_success: 0.750
+- cloth_success: 0.583

code/VLAarchtests2_code/CHANGE_AND_TEST_LOG.md ADDED Viewed

	@@ -0,0 +1,221 @@

+# Change And Test Log
+This file records the main code changes and executed test commands copied into this repo. Result statements below are raw command outcomes only.
+## Previous Repo Work Included Here
+Copied from `history/VLAarchtests_previous_README.md`:
+- core model, memory, planner, and dataset changes under:
+  - `VLAarchtests/code/reveal_vla_bimanual/models/`
+  - `VLAarchtests/code/reveal_vla_bimanual/train/losses.py`
+  - `VLAarchtests/code/reveal_vla_bimanual/sim_reveal/`
+  - `VLAarchtests/code/reveal_vla_bimanual/sim_rlbench/dataset.py`
+- training and eval paths under:
+  - `VLAarchtests/code/reveal_vla_bimanual/train/`
+  - `VLAarchtests/code/reveal_vla_bimanual/eval/`
+- earlier test suite under:
+  - `VLAarchtests/tests/`
+## Current Session File Changes
+### Core reveal/proxy path
+- `VLAarchtests/code/reveal_vla_bimanual/models/policy.py`
+- `VLAarchtests/code/reveal_vla_bimanual/models/action_decoder.py`
+- `VLAarchtests/code/reveal_vla_bimanual/models/backbones.py`
+- `VLAarchtests/code/reveal_vla_bimanual/models/rvt_backbone.py`
+- `VLAarchtests/code/reveal_vla_bimanual/train/losses.py`
+- `VLAarchtests/code/reveal_vla_bimanual/train/run_rlbench_experiment.py`
+- `VLAarchtests/code/reveal_vla_bimanual/eval/run_reveal_benchmark.py`
+- `VLAarchtests/code/reveal_vla_bimanual/eval/summarize_anybimanual_overlap_eval.py`
+- `VLAarchtests/code/reveal_vla_bimanual/eval/summarize_rvt_overlap_branch.py`
+- `VLAarchtests/code/reveal_vla_bimanual/eval/compose_task_routed_proxy_summary.py`
+- `VLAarchtests/code/reveal_vla_bimanual/eval/run_proposal_alignment_diagnostics.py`
+- `VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_knn_eval.py`
+- `VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_knn_task_sweep.py`
+- `VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_retargeted_demo_eval.py`
+- `VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_full_arch_hybrid_eval.py`
+- `VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_retarget_utils.py`
+- `VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_full_arch_utils.py`
+- `VLAarchtests/code/reveal_vla_bimanual/sim_reveal/build_task_specialized_episode_specs.py`
+- `VLAarchtests/code/reveal_vla_bimanual/sim_reveal/procedural_envs.py`
+- `VLAarchtests/code/reveal_vla_bimanual/sim_rlbench/task_resolver.py`
+### Training/eval wrappers and configs
+- `VLAarchtests/code/reveal_vla_bimanual/scripts/run_task_routed_proxy_eval.sh`
+- `VLAarchtests/code/reveal_vla_bimanual/scripts/run_bag_selector_iter9.sh`
+- `VLAarchtests/code/reveal_vla_bimanual/scripts/run_anybimanual_subset3_overlap_train.sh`
+- `VLAarchtests/code/reveal_vla_bimanual/scripts/run_anybimanual_subset3_overlap_eval.sh`
+- `VLAarchtests/code/reveal_vla_bimanual/scripts/run_rvt_overlap_branch.sh`
+- `VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_retargeted_demo_eval.sh`
+- `VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_full_arch_hybrid_eval.sh`
+- `VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter6.yaml`
+- `VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter7.yaml`
+- `VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter8.yaml`
+- `VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter9_bag.yaml`
+- `VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_clip_100demo_fair_step1_full.yaml`
+- `VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17.yaml`
+- `VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_rvt_100demo_unfreeze_top2_seed17.yaml`
+- `VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_dual_push_backbone_only_clip_finetune_seed17.yaml`
+- `VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_dual_push_backbone_only_clip_finetune_weighted_seed17.yaml`
+- `VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_dual_push_backbone_only_clip_chunk8_weighted_seed17.yaml`
+- `environment/reconstruct_anybimanual_overlap_replay.sh`
+### Test additions or updates
+- `VLAarchtests/tests/test_eval_toggle_paths_work.py`
+- `VLAarchtests/tests/test_task_routed_model_eval.py`
+- `VLAarchtests/tests/test_anybimanual_resume_logic.py`
+- `VLAarchtests/tests/test_anybimanual_overlap_eval_summary.py`
+- `VLAarchtests/tests/test_candidate_ranking_loss.py`
+- `VLAarchtests/tests/test_compose_task_routed_proxy_summary.py`
+- `VLAarchtests/tests/test_build_task_specialized_episode_specs.py`
+- `VLAarchtests/tests/test_proposal_mode_names_label_base_action.py`
+- `VLAarchtests/tests/test_proxy_scripted_bench.py`
+- `VLAarchtests/tests/test_rvt_backbone_forward.py`
+- `VLAarchtests/tests/test_rlbench_dataset_rgbd_geometry.py`
+- `VLAarchtests/tests/test_rlbench_init_checkpoint.py`
+- `VLAarchtests/tests/test_rlbench_pickle_bootstrap.py`
+- `VLAarchtests/tests/test_rlbench_task_resolver_aliases.py`
+- `VLAarchtests/tests/test_summarize_rvt_overlap_branch.py`
+- `VLAarchtests/tests/test_dual_push_retarget_utils.py`
+- `VLAarchtests/tests/test_dual_push_full_arch_utils.py`
+### Third-party baseline path changes
+- `third_party/AnyBimanual/third_party/YARR/yarr/runners/offline_train_runner.py`
+- `third_party/AnyBimanual/third_party/YARR/yarr/runners/weight_init_utils.py`
+- `third_party/AnyBimanual/agents/peract_bc/launch_utils.py`
+- `third_party/AnyBimanual/agents/peract_bc/qattention_peract_bc_agent.py`
+- `third_party/AnyBimanual/agents/peract_bimanual/qattention_peract_bc_agent.py`
+## Current Session Test Commands
+Executed commands recorded in the workspace:
+- `python -m py_compile /workspace/VLAarchtests/code/reveal_vla_bimanual/models/action_decoder.py /workspace/VLAarchtests/tests/test_proposal_mode_names_label_base_action.py`
+- `PYTHONPATH=/workspace/VLAarchtests/code/reveal_vla_bimanual pytest -q /workspace/VLAarchtests/tests/test_proposal_mode_names_label_base_action.py /workspace/VLAarchtests/tests/test_candidate_ranking_loss.py /workspace/VLAarchtests/tests/test_compose_task_routed_proxy_summary.py /workspace/VLAarchtests/tests/test_build_task_specialized_episode_specs.py`
+  - result: `11 passed`
+- `pytest -q /workspace/VLAarchtests/tests/test_anybimanual_overlap_eval_summary.py`
+  - result: `2 passed`
+- `pytest -q /workspace/VLAarchtests/tests/test_task_routed_model_eval.py /workspace/VLAarchtests/tests/test_eval_toggle_paths_work.py`
+  - result: `4 passed`
+- `pytest -q /workspace/VLAarchtests/tests/test_rvt_backbone_forward.py /workspace/VLAarchtests/tests/test_rlbench_dataset_rgbd_geometry.py /workspace/VLAarchtests/tests/test_eval_toggle_paths_work.py /workspace/VLAarchtests/tests/test_rlbench_init_checkpoint.py /workspace/VLAarchtests/tests/test_rlbench_pickle_bootstrap.py /workspace/VLAarchtests/tests/test_rlbench_task_resolver_aliases.py /workspace/VLAarchtests/tests/test_summarize_rvt_overlap_branch.py`
+  - result: `passed`
+- `pytest -q /workspace/VLAarchtests/tests/test_build_task_specialized_episode_specs.py /workspace/VLAarchtests/tests/test_candidate_ranking_loss.py /workspace/VLAarchtests/tests/test_compose_task_routed_proxy_summary.py`
+  - result: `10 passed`
+- `pytest -q /workspace/VLAarchtests/tests/test_dual_push_retarget_utils.py /workspace/VLAarchtests/tests/test_rlbench_knn_eval_scene_kwargs.py`
+  - result: `passed`
+- `pytest -q /workspace/VLAarchtests/tests/test_dual_push_retarget_utils.py`
+  - result: `6 passed`
+- `pytest -q /workspace/VLAarchtests/tests/test_dual_push_retarget_utils.py /workspace/VLAarchtests/tests/test_dual_push_full_arch_utils.py`
+  - result: `9 passed`
+- `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_bag_selector_iter9.sh`
+- `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_task_routed_proxy_eval.sh`
+- `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_anybimanual_subset3_overlap_train.sh`
+- `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_rvt_overlap_branch.sh`
+- `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_retargeted_demo_eval.sh`
+- `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_full_arch_hybrid_eval.sh`
+- `PYTHONPATH=/workspace/third_party/AnyBimanual/third_party/YARR pytest -q /workspace/VLAarchtests/tests/test_anybimanual_resume_logic.py`
+  - result: `4 passed`
+- `python -m py_compile /workspace/VLAarchtests/code/reveal_vla_bimanual/models/rvt_backbone.py /workspace/VLAarchtests/code/reveal_vla_bimanual/train/run_rlbench_experiment.py /workspace/VLAarchtests/code/reveal_vla_bimanual/sim_rlbench/dataset.py /workspace/VLAarchtests/code/reveal_vla_bimanual/sim_rlbench/task_resolver.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/summarize_rvt_overlap_branch.py`
+  - result: `passed`
+- `python -m py_compile /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_retarget_utils.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_retargeted_demo_eval.py`
+  - result: `passed`
+- `python -m py_compile /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_retarget_utils.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_full_arch_utils.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_full_arch_hybrid_eval.py`
+  - result: `passed`
+## Current Session Generated Reports
+Current-session report roots staged in this repo:
+- `VLAarchtests/artifacts/reports/sprint_v7_summary/`
+- `VLAarchtests/artifacts/reports/sprint_v7_followup/`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iterations/`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter6/`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter7/`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/`
+- `VLAarchtests/artifacts/reports/task_routed_proxy_v1/`
+- `VLAarchtests/artifacts/reports/rlbench_general_debug_20260330/`
+- `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/`
+- `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/`
+- `VLAarchtests/artifacts/reports/bag_mode_specialization_20260330/`
+- `VLAarchtests/artifacts/reports/general_task_anchor_20260330_dual_push_buttons/`
+- `VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/`
+- `VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/`
+- `VLAarchtests/artifacts/reports/dual_push_nonzero_branch_20260330/`
+- `VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/`
+## HF Packaging Notes
+Raw packaging changes applied to the staged HF export:
+- `baselines/AnyBimanual_overlap_replay/multi/` was reshaped from one flat directory into shard subdirectories:
+  - `00000-04999/`
+  - `05000-09999/`
+  - `10000-14999/`
+- file count after reshape: `14034`
+- reconstruction helper added at:
+  - `environment/reconstruct_anybimanual_overlap_replay.sh`
+- exact rejected Hub error before reshape:
+  - `Your push was rejected because it contains too many files per directory. Each directory in your git repo can only contain up to 10000 files. Offending directories: /baselines/AnyBimanual_overlap_replay/multi/`
+## Current Session Logs
+Main logs staged in this repo:
+- `reports/anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_train.log`
+- `reports/anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_train_presavefix.log`
+- `reports/anybimanual_subset3_overlap_resume1000_eval.log`
+- `reports/anybimanual_subset3_overlap_resume1000_summary.log`
+- `reports/task_routed_proxy_v1_rerun.log`
+- `reports/run_bag_selector_iter9_prebuild.log`
+- `reports/anybimanual_release_subset3_eval_ep5.log`
+- `reports/rvt_overlap_branch_fixedbounds_20260330_chain.sh`
+- `reports/dual_push_full_arch_hybrid_iter6_scene_ep5.log`
+- `reports/dual_push_full_arch_hybrid_iter6_backbone_ep2_r005.log`
+## Official Overlap Eval Final Raw Outputs
+Sources:
+- `reports/anybimanual_subset3_overlap_resume1000_eval.log`
+- `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json`
+Raw values:
+- step `1000`
+- local mean success `0.16`
+- `coordinated_push_box`: success `0.0`, return `0.0`
+- `coordinated_lift_ball`: success `0.0`, return `0.0`
+- `dual_push_buttons`: success `0.48`, return `12.0`
+## General-Task Anchor Raw Outputs
+Sources:
+- `VLAarchtests/artifacts/reports/general_task_anchor_20260330_dual_push_buttons/summary.json`
+Raw values:
+- public AnyBimanual release, step `60000`: success `0.96`, return `24.0`, length `21.56`
+- local official single-task eval, step `60000`, `25` episodes: success `0.96`, return `24.0`, length `21.84`
+- local clip backbone-only result: success `0.0`, return `0.0`
+- local elastic reveal proxy iter6 result: success `0.0`, return `0.0`
+- local RVT frozen fixed-bounds result: success `0.0`, return `0.0`
+## Dual-Push Branch Raw Outputs
+Sources:
+- `VLAarchtests/artifacts/reports/dual_push_nonzero_branch_20260330/summary.md`
+- `VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.md`
+Raw values:
+- demo replay through `absolute_action_from_delta`: mean success `0.8`, mean return `0.8`
+- retargeted demo with checkpoint backbone retrieval and vision-only button localization, `5` episodes: mean success `1.0`, mean return `1.0`
+- elastic checkpoint retargeted-demo probe with scene retrieval and vision-only button localization, `1` episode: mean success `1.0`, mean return `1.0`
+- full-architecture hybrid eval with elastic controller checkpoint plus dual-push retrieval checkpoint, `1` episode: mean success `1.0`, mean return `1.0`, steps `116`, path recoveries `0`, noop fallbacks `0`

code/VLAarchtests2_code/MODEL_AND_ARTIFACT_INDEX.md ADDED Viewed

	@@ -0,0 +1,59 @@

+# Model And Artifact Index
+Main staged roots:
+- `VLAarchtests/code/reveal_vla_bimanual/`
+- `VLAarchtests/tests/`
+- `VLAarchtests/artifacts/`
+- `third_party/AnyBimanual/`
+- `baselines/`
+- `outputs/`
+- `reports/`
+- `handoff/instructions4.md`
+- `history/VLAarchtests_previous_README.md`
+Key current-session report roots:
+- `VLAarchtests/artifacts/reports/sprint_v7_summary/`
+- `VLAarchtests/artifacts/reports/sprint_v7_followup/`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iterations/`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter6/`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter7/`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/`
+- `VLAarchtests/artifacts/reports/task_routed_proxy_v1/`
+- `VLAarchtests/artifacts/reports/rlbench_general_debug_20260330/`
+- `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/`
+- `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/`
+- `VLAarchtests/artifacts/reports/bag_mode_specialization_20260330/`
+- `VLAarchtests/artifacts/reports/general_task_anchor_20260330_dual_push_buttons/`
+- `VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/`
+- `VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/`
+- `VLAarchtests/artifacts/reports/dual_push_nonzero_branch_20260330/`
+- `VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/`
+Key current-session run/log roots:
+- `baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/`
+- `baselines/AnyBimanual_release_eval_anchor/perlf_release_dual_push_buttons_ep25/`
+- `baselines/AnyBimanual_overlap_replay/`
+- `outputs/rlbench_true_baselines/`
+- `outputs/rlbench_dual_push/`
+- `outputs/rlbench_rvt_branch/`
+- `reports/anybimanual_subset3_overlap_resume1000_eval.log`
+- `reports/anybimanual_subset3_overlap_resume1000_summary.log`
+- `reports/anybimanual_release_subset3_eval_ep5.log`
+- `reports/dual_push_full_arch_probe_iter6_scene_ep1/`
+- `reports/dual_push_full_arch_hybrid_iter6_backbone_ep1/`
+- `reports/dual_push_nonzero_branch_20260330/`
+- `reports/run_bag_selector_iter9_prebuild.log`
+- `reports/task_routed_proxy_v1_rerun.log`
+- `environment/reconstruct_anybimanual_overlap_replay.sh`
+Key final official overlap summary files:
+- `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.md`
+- `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json`
+HF export packaging note:
+- `baselines/AnyBimanual_overlap_replay/multi/` is sharded into subdirectories in this repo copy.

code/VLAarchtests2_code/README.md ADDED Viewed

	@@ -0,0 +1,301 @@

+# VLAarchtests2
+Bundle staged from `/workspace` on `2026-03-31 UTC`.
+This repo is the follow-on organization repo to `lsnu/VLAarchtests`. It includes:
+- current code under `VLAarchtests/`
+- current third-party baseline code under `third_party/`
+- current baseline runs, replay artifacts, demo roots, and released checkpoint material under `baselines/`
+- current training outputs and checkpoints under `outputs/`
+- current logs under `reports/`
+- environment recreation files under `environment/`
+- raw results and change/test logs at the repo root
+- the previous repo README under `history/VLAarchtests_previous_README.md`
+- the active handoff file under `handoff/instructions4.md`
+## Top-Level Contents
+- `VLAarchtests/`
+  - code, tests, configs, generated configs, reports, checkpoints, and proxy datasets from the current runpod workspace
+- `third_party/AnyBimanual/`
+  - local AnyBimanual checkout used for the official overlap baseline branch, including local compatibility patches
+- `baselines/`
+  - released AnyBimanual checkpoint material
+  - overlap replay artifacts
+    - HF export packaging note: `baselines/AnyBimanual_overlap_replay/multi/` is sharded into subdirectories to satisfy the Hub `10000 files per directory` limit
+  - overlap run directories
+  - local subset3 demo roots used by the overlap branch
+- `outputs/`
+  - RLBench training outputs and checkpoints used by the current anchor, RVT, dual-push, and elastic-controller branches
+- `reports/`
+  - training and evaluation logs copied from `/workspace/reports`
+- `environment/`
+  - machine snapshot, package lists, and setup helpers
+- `history/`
+  - copied previous-repo README
+- `handoff/`
+  - active sprint instruction file
+- `RESULTS_RAW.md`
+  - raw result tables and final official overlap eval outputs
+- `CHANGE_AND_TEST_LOG.md`
+  - file-level change log and executed test commands
+- `MODEL_AND_ARTIFACT_INDEX.md`
+  - staged directory map with main artifact roots
+## Previous Repo Coverage
+The earlier `lsnu/VLAarchtests` repo covered the `2026-03-25/26` work. Its README is copied verbatim at:
+- `history/VLAarchtests_previous_README.md`
+Previous-repo items explicitly referenced there include:
+- compact, spatial, compact-phase, and spatial-phase proxy branches
+- earlier RLBench direct-policy and kNN runs
+- environment recreation files
+- prior raw result tables
+## Current Session Additions
+Current-session folders added or expanded in this repo include:
+- `VLAarchtests/artifacts/reports/sprint_v7_summary/`
+- `VLAarchtests/artifacts/reports/sprint_v7_followup/`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iterations/`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter6/`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter7/`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/`
+- `VLAarchtests/artifacts/reports/task_routed_proxy_v1/`
+- `VLAarchtests/artifacts/reports/rlbench_general_debug_20260330/`
+- `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/`
+- `VLAarchtests/artifacts/reports/bag_mode_specialization_20260330/`
+- `VLAarchtests/artifacts/reports/general_task_anchor_20260330_dual_push_buttons/`
+- `VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/`
+- `VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/`
+- `VLAarchtests/artifacts/reports/dual_push_nonzero_branch_20260330/`
+- `VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/`
+## Raw Results Snapshot
+### Proxy sprint v7
+Source:
+- `VLAarchtests/artifacts/reports/sprint_v7_summary/reveal_sprint_summary_compact.json`
+Raw values:
+- base model mean success: `0.28`
+- base per-task: foliage `0.39`, bag `0.31`, cloth `0.14`
+- random mean success: `0.43333333333333335`
+- candidate0 mean success: `0.2`
+- oracle mean success: `0.4066666666666667`
+- scripted mean success: `1.0`
+### Eval-time ablations
+Source:
+- `VLAarchtests/artifacts/reports/sprint_v7_summary/reveal_sprint_summary_compact.json`
+Raw values:
+- `no_planner`: `0.2`
+- `no_memory`: `0.3233333333333333`
+- `no_task_conditioning`: `0.28`
+- `no_geometry`: `0.27`
+- `no_camera_pose`: `0.29333333333333333`
+### Selector checkpoints
+Sources:
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter6/default/reveal_benchmark.json`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter7/full_fixed_default/reveal_benchmark.json`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/bag_fixed_default/reveal_benchmark.json`
+- `VLAarchtests/artifacts/reports/task_routed_proxy_v1/summary.md`
+Raw values:
+- `iter6` mean success: `0.4566666666666667`
+  - foliage `0.46`, bag `0.4`, cloth `0.51`
+- `iter7` mean success: `0.4666666666666666`
+  - foliage `0.4`, bag `0.41`, cloth `0.59`
+- `iter8` bag-only fixed slice: `0.41`
+- routed controller mean success: `0.48666666666666664`
+  - routing rule: `foliage -> iter6`, `bag -> iter8`, `cloth -> iter8`
+  - per-task: foliage `0.46`, bag `0.41`, cloth `0.59`
+### Real baseline compare on proxy suite
+Source:
+- `VLAarchtests/artifacts/reports/real_baseline_compare_v7_full/reveal_benchmark.json`
+Raw values:
+- `baseline_rgbd_stage3` mean success: `0.31`
+  - foliage `0.21`, bag `0.15`, cloth `0.57`
+- `iter5_selector` mean success: `0.45`
+  - foliage `0.44`, bag `0.4`, cloth `0.51`
+### RLBench recovered push-box comparator
+Sources:
+- `reports/rlbench_general_debug/rlbench_push_box_fair_step1_final_knn_ep10_x99_res224_len180_train80_fixed/bimanual_push_box/rollout_eval.json`
+- `reports/rlbench_general_debug/rlbench_push_box_historical_step1_knn_ep10_x99_res224_len180_train80_fixed/bimanual_push_box/rollout_eval.json`
+Raw values:
+- current fair-step1 final mean success: `0.7`
+- current fair-step1 final successes:
+  - `[1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]`
+- historical push-box control mean success: `0.4`
+- historical push-box control successes:
+  - `[0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0]`
+### Official AnyBimanual overlap branch
+Sources:
+- `baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/training.log`
+- `reports/anybimanual_subset3_overlap_resume1000_eval.log`
+Raw train milestones:
+- global step `300`: loss `40.91718`
+- global step `400`: loss `33.26684`
+- global step `500`: loss `36.07054`
+- global step `600`: loss `35.32345`
+- global step `700`: loss `28.50959`
+- global step `800`: loss `23.60169`
+- global step `900`: loss `15.28901`
+- run reached `weights/1000` and the train exited cleanly
+Raw eval outputs:
+- source log: `reports/anybimanual_subset3_overlap_resume1000_eval.log`
+- summary files:
+  - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.md`
+  - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json`
+- local last complete step: `1000`
+- local mean success: `0.16`
+- local per-task success:
+  - `coordinated_push_box`: `0.0`
+  - `coordinated_lift_ball`: `0.0`
+  - `dual_push_buttons`: `0.48`
+- local per-task return:
+  - `coordinated_push_box`: `0.0`
+  - `coordinated_lift_ball`: `0.0`
+  - `dual_push_buttons`: `12.0`
+- public best overlap step in the local summary: `60000`
+- public best mean success in the local summary: `0.6933333333333334`
+### Validated general-task anchor: `dual_push_buttons`
+Sources:
+- `VLAarchtests/artifacts/reports/general_task_anchor_20260330_dual_push_buttons/summary.json`
+- `baselines/AnyBimanual_release_eval_anchor/perlf_release_dual_push_buttons_ep25/PERACT_BC/seed0/eval_data.csv`
+Raw values:
+- public AnyBimanual release, step `60000`: success `0.96`, return `24.0`, length `21.56`
+- local official single-task eval, step `60000`, `25` episodes: success `0.96`, return `24.0`, length `21.84`
+- local clip backbone-only result on same task: success `0.0`, return `0.0`
+- local elastic reveal proxy iter6 result on same task: success `0.0`, return `0.0`
+- local RVT frozen fixed-bounds result on same task: success `0.0`, return `0.0`
+### RVT overlap branch
+Sources:
+- `VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/summary.md`
+- `VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/summary.md`
+Raw values:
+- frozen RVT stage1 train summary:
+  - `outputs/rlbench_rvt_branch/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17/summary.json`
+  - final train total `0.043179353826920445`
+  - final val total `0.039591669984665984`
+- frozen RVT overlap eval: mean success `0.0`
+- frozen fixed-bounds RVT overlap eval: mean success `0.0`
+- both branch gates:
+  - local AnyBimanual overlap floor `0.16`
+  - stage2 run `false`
+### Dual-push non-privileged retarget branch
+Sources:
+- `VLAarchtests/artifacts/reports/dual_push_nonzero_branch_20260330/summary.md`
+Raw values:
+- demo replay through `absolute_action_from_delta`:
+  - `reports/dual_push_nonzero_branch_20260330/demo_replay/replay_summary.json`
+  - mean success `0.8`
+  - mean return `0.8`
+- retargeted demo with checkpoint backbone retrieval and vision-only button localization:
+  - `reports/dual_push_nonzero_branch_20260330/retargeted_demo_backbone_vision_ep1/summary.json`
+  - mean success `1.0`
+  - mean return `1.0`
+- retargeted demo with checkpoint backbone retrieval and vision-only button localization:
+  - `reports/dual_push_nonzero_branch_20260330/retargeted_demo_backbone_vision_ep5/summary.json`
+  - mean success `1.0`
+  - mean return `1.0`
+### Dual-push full-architecture hybrid branch
+Sources:
+- `VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.md`
+- `reports/dual_push_full_arch_probe_iter6_scene_ep1/summary.json`
+- `reports/dual_push_full_arch_hybrid_iter6_backbone_ep1/summary.json`
+Raw values:
+- elastic checkpoint retargeted-demo probe with scene retrieval and vision-only button localization:
+  - `1` episode
+  - mean success `1.0`
+  - mean return `1.0`
+  - steps `94`
+  - retrieved episode index `11`
+  - retrieval similarity `0.9998629689216614`
+- full-architecture hybrid eval with elastic controller checkpoint plus dual-push retrieval checkpoint:
+  - `1` episode
+  - mean success `1.0`
+  - mean return `1.0`
+  - steps `116`
+  - path recoveries `0`
+  - noop fallbacks `0`
+  - first selected mode `residual::maintain_opening`
+  - last selected mode `residual::base_action`
+## Environment Recreation
+Environment files are under `environment/`, including:
+- `environment/setup_same_hardware.sh`
+- `environment/runtime_env_vars.sh`
+- `environment/reconstruct_anybimanual_overlap_replay.sh`
+- `environment/hardware_snapshot.txt`
+- `environment/env_list.txt`
+- `environment/base_python.txt`
+- `environment/base_pip_freeze.txt`
+- `environment/rlbench_python.txt`
+- `environment/rlbench_pip_freeze.txt`
+## Notes On Result Presentation
+This repo-level README and the new root docs intentionally keep result text raw:
+- file paths
+- exact commands
+- exact numeric outputs
+- exact partial status for in-flight runs
+Interpretive material already present inside older staged artifacts remains preserved as part of the historical workspace contents.

code/VLAarchtests2_code/RESULTS_RAW.md ADDED Viewed

	@@ -0,0 +1,178 @@

+# Results Raw
+This file records exact values and exact partial statuses without additional conclusions.
+## Proxy Sprint v7 Main Table
+Source:
+- `VLAarchtests/artifacts/reports/sprint_v7_summary/reveal_sprint_summary_compact.json`
+| Item | Raw values |
+| --- | --- |
+| base_model | mean success `0.28`; foliage `0.39`; bag `0.31`; cloth `0.14` |
+| random | mean success `0.43333333333333335`; foliage `0.41`; bag `0.37`; cloth `0.52` |
+| candidate0 | mean success `0.2`; foliage `0.24`; bag `0.22`; cloth `0.14` |
+| oracle | mean success `0.4066666666666667`; foliage `0.5`; bag `0.42`; cloth `0.3` |
+| scripted | mean success `1.0`; foliage `1.0`; bag `1.0`; cloth `1.0` |
+## Proxy Sprint v7 Ablation Table
+Source:
+- `VLAarchtests/artifacts/reports/sprint_v7_summary/reveal_sprint_summary_compact.json`
+| Item | Raw values |
+| --- | --- |
+| no_planner | `0.2` |
+| no_memory | `0.3233333333333333` |
+| no_task_conditioning | `0.28` |
+| no_geometry | `0.27` |
+| no_camera_pose | `0.29333333333333333` |
+## Selector Table
+Sources:
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter6/default/reveal_benchmark.json`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter7/full_fixed_default/reveal_benchmark.json`
+- `VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/bag_fixed_default/reveal_benchmark.json`
+- `VLAarchtests/artifacts/reports/task_routed_proxy_v1/summary.md`
+| Item | Raw values |
+| --- | --- |
+| iter6 | mean success `0.4566666666666667`; foliage `0.46`; bag `0.4`; cloth `0.51` |
+| iter7 | mean success `0.4666666666666666`; foliage `0.4`; bag `0.41`; cloth `0.59` |
+| iter8 bag fixed slice | mean success `0.41`; nominal `0.45`; high_reocclusion `0.4`; camera_perturbation `0.5`; one_sided_slip `0.25` |
+| routed controller | mean success `0.48666666666666664`; route `foliage -> iter6`, `bag -> iter8`, `cloth -> iter8`; foliage `0.46`; bag `0.41`; cloth `0.59` |
+## Proxy Baseline Compare Table
+Source:
+- `VLAarchtests/artifacts/reports/real_baseline_compare_v7_full/reveal_benchmark.json`
+| Item | Raw values |
+| --- | --- |
+| baseline_rgbd_stage3 | mean success `0.31`; foliage `0.21`; bag `0.15`; cloth `0.57` |
+| iter5_selector | mean success `0.45`; foliage `0.44`; bag `0.4`; cloth `0.51` |
+## RLBench Recovered Push-Box Comparator
+Sources:
+- `reports/rlbench_general_debug/rlbench_push_box_fair_step1_final_knn_ep10_x99_res224_len180_train80_fixed/bimanual_push_box/rollout_eval.json`
+- `reports/rlbench_general_debug/rlbench_push_box_historical_step1_knn_ep10_x99_res224_len180_train80_fixed/bimanual_push_box/rollout_eval.json`
+| Item | Raw values |
+| --- | --- |
+| current fair-step1 final | mean success `0.7`; mean return `0.7`; successes `[1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]` |
+| historical push-box control | mean success `0.4`; mean return `0.4`; successes `[0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0]` |
+## Official AnyBimanual Overlap Training Milestones
+Sources:
+- `baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/training.log`
+- `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/status.md`
+| Global step | Raw values |
+| --- | --- |
+| 300 | loss `40.91718`; sample time `0.093029`; step time `14.0686` |
+| 400 | loss `33.26684`; sample time `0.073085`; step time `14.3032` |
+| 500 | loss `36.07054`; sample time `0.048558`; step time `11.1376` |
+| 600 | loss `35.32345`; sample time `0.040642`; step time `9.7719` |
+| 700 | loss `28.50959`; sample time `0.057937`; step time `10.9347` |
+| 800 | loss `23.60169`; sample time `0.032697`; step time `11.8652` |
+| 900 | loss `15.28901`; sample time `0.051232`; step time `11.5073` |
+| 1000 checkpoint | train reached `weights/1000` and exited cleanly |
+## Official AnyBimanual Overlap Eval Final Output
+Sources:
+- `reports/anybimanual_subset3_overlap_resume1000_eval.log`
+- `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json`
+| Item | Raw values |
+| --- | --- |
+| local last complete step | `1000` |
+| local mean success | `0.16` |
+| coordinated_push_box | success `0.0`; return `0.0`; final score log line `0.0` |
+| coordinated_lift_ball | success `0.0`; return `0.0`; final score log line `0.0` |
+| dual_push_buttons | success `0.48`; return `12.0`; final score log line `12.0` |
+| public best overlap step in local summary | step `60000`; mean success `0.6933333333333334` |
+| public best overlap per-task success | coordinated_push_box `0.8`; coordinated_lift_ball `0.32`; dual_push_buttons `0.96` |
+| delta vs public best mean success | `-0.5333333333333333` |
+| delta vs public best per-task success | coordinated_push_box `-0.8`; coordinated_lift_ball `-0.32`; dual_push_buttons `-0.48` |
+## Validated General-Task Anchor: dual_push_buttons
+Source:
+- `VLAarchtests/artifacts/reports/general_task_anchor_20260330_dual_push_buttons/summary.json`
+| Item | Raw values |
+| --- | --- |
+| public AnyBimanual release | step `60000`; success `0.96`; return `24.0`; length `21.56` |
+| local official single-task eval | step `60000`; episodes `25`; success `0.96`; return `24.0`; length `21.84` |
+| local clip backbone-only | success `0.0`; return `0.0`; path `reports/true_baseline_compare_subset3_v1/rlbench_subset3_backbone_only_clip_100demo_fair_seed17_noplan_split/bimanual_dual_push_buttons/rollout_eval.json` |
+| local elastic reveal proxy iter6 | success `0.0`; return `0.0`; path `reports/true_baseline_compare_subset3_v1/rlbench_subset3_elastic_reveal_proxy_iter6_100demo_fair_seed17_noplan_split/bimanual_dual_push_buttons/rollout_eval.json` |
+| local RVT hybrid frozen fixed-bounds | success `0.0`; return `0.0`; path `reports/rvt_overlap_branch_fixedbounds_20260330/evals/rlbench_subset3_backbone_only_rvt_100demo_frozen_fixedbounds_seed17_noplan_split/bimanual_dual_push_buttons/rollout_eval.json` |
+## RVT Overlap Branch
+Sources:
+- `VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/status.md`
+- `VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/summary.md`
+- `VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/summary.md`
+| Item | Raw values |
+| --- | --- |
+| frozen RVT stage1 train | checkpoint `outputs/rlbench_rvt_branch/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17/checkpoint_best.pt`; final train total `0.043179353826920445`; final val total `0.039591669984665984`; train seconds `2261.2839448451996` |
+| frozen RVT overlap eval | mean success `0.0`; push_box `0.0`; lift_ball `0.0`; dual_push_buttons `0.0` |
+| frozen fixed-bounds RVT overlap eval | mean success `0.0`; push_box `0.0`; lift_ball `0.0`; dual_push_buttons `0.0` |
+| local overlap floor used for gate | `0.16` |
+| stage2 run flag | `false` |
+## Dual-Push Nonzero Branch
+Source:
+- `VLAarchtests/artifacts/reports/dual_push_nonzero_branch_20260330/summary.md`
+| Item | Raw values |
+| --- | --- |
+| direct rollout smoke planning | `5` episodes; `25` steps; mean success `0.0`; path `reports/dual_push_nonzero_branch_20260330/smoke_planning/rollout_eval.json` |
+| controller sweep planning_c4 | `0.0` |
+| controller sweep ik_c1 | `0.0` |
+| controller sweep planning_c1_s05 | `0.0` |
+| kNN top-1 planning | `5` episodes; `25` steps; mean success `0.0` |
+| weighted rollout smoke planning | `5` episodes; `25` steps; mean success `0.0` |
+| demo replay through absolute_action_from_delta | mean success `0.8`; mean return `0.8`; successful demo step counts `89`, `112`, `93`, `112` |
+| weighted kNN top-1 planning length120 | `2` episodes; mean success `0.0` |
+| chunk8 probe IK length120 | `1` episode; success `0.0`; return `0.0`; path recoveries `119`; noop fallbacks `1` |
+| retargeted demo task_state smoke | `2` episodes; mean success `1.0`; mean return `1.0` |
+| retargeted demo checkpoint-backbone ep5 | `5` episodes; mean success `1.0`; mean return `1.0` |
+| retargeted demo checkpoint-backbone vision ep1 | `1` episode; mean success `1.0`; mean return `1.0` |
+| retargeted demo checkpoint-backbone vision ep5 | `5` episodes; mean success `1.0`; mean return `1.0` |
+## Dual-Push Full-Architecture Hybrid
+Sources:
+- `VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.md`
+- `reports/dual_push_full_arch_probe_iter6_scene_ep1/summary.json`
+- `reports/dual_push_full_arch_hybrid_iter6_backbone_ep1/summary.json`
+| Item | Raw values |
+| --- | --- |
+| elastic checkpoint retargeted-demo probe | `1` episode; mean success `1.0`; mean return `1.0`; steps `94`; retrieved episode index `11`; retrieval similarity `0.9998629689216614` |
+| full-architecture hybrid eval | `1` episode; mean success `1.0`; mean return `1.0`; steps `116`; path recoveries `0`; noop fallbacks `0`; first selected mode `residual::maintain_opening`; last selected mode `residual::base_action` |
+## Previous Repo Raw Results
+Previous raw tables are preserved in:
+- `history/VLAarchtests_previous_README.md`

code/VLAarchtests2_code/VLAarchtests/MODEL_INDEX.md ADDED Viewed

	@@ -0,0 +1,81 @@

+# Model Index
+## 2026-03-25/26 Additions
+### Handoff Proxy Checkpoints
+| Run | Checkpoint | Summary | Report |
+| --- | --- | --- | --- |
+| spatial handoff | `artifacts/outputs/r3d_handoff/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial_seed17/checkpoint_best.pt` | `artifacts/outputs/r3d_handoff/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial_seed17/summary.json` | `artifacts/reports/reveal_handoff_compare_serious/reveal_benchmark.json` |
+| compact handoff | `artifacts/outputs/r3d_handoff/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_seed17/checkpoint_best.pt` | `artifacts/outputs/r3d_handoff/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_seed17/summary.json` | `artifacts/reports/reveal_handoff_compact_train_probe/reveal_benchmark.json` |
+| compact-phase handoff | `artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt` | `artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/summary.json` | `artifacts/reports/reveal_phase_compare_serious_compact/reveal_benchmark.json` |
+| spatial-phase handoff | `artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial_phase_seed17/checkpoint_best.pt` | `artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial_phase_seed17/summary.json` | `artifacts/reports/reveal_phase_compare_serious_spatial_compactwm/reveal_benchmark.json` |
+### RLBench Current Checkpoints
+| Run | Checkpoint | Related files |
+| --- | --- | --- |
+| subset3 valid9 | `artifacts/outputs/rlbench_current/rlbench_subset3_backbone_only_clip_current_valid9/checkpoint_best.pt` | `artifacts/outputs/rlbench_current/rlbench_subset3_backbone_only_clip_current_valid9/checkpoint_stable.pt` |
+| subset3 common23 | `artifacts/outputs/rlbench_current/rlbench_subset3_backbone_only_clip_current_common23/checkpoint_best.pt` | `artifacts/outputs/rlbench_current/rlbench_subset3_backbone_only_clip_current_common23/checkpoint_stable.pt` |
+| lift-ball wide | `artifacts/outputs/rlbench_current/rlbench_lift_ball_backbone_only_clip_current_wide/checkpoint_best.pt` | `artifacts/outputs/rlbench_current/rlbench_lift_ball_backbone_only_clip_current_wide/checkpoint_stable.pt` |
+| push-box step1 | `artifacts/outputs/rlbench_current/rlbench_push_box_backbone_only_clip_step1/checkpoint_best.pt` | `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1/rollout_eval.json`, `artifacts/reports/rlbench_push_box_knn_step1_ep5_top1_dense/rollout_eval.json` |
+### RLBench Result Files
+| Artifact | File |
+| --- | --- |
+| lift-ball wide, one-step replanning | `artifacts/reports/rlbench_lift_ball_wide_len160_ep1_ik_c1/rollout_eval.json` |
+| push-box step1, one-step replanning | `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1/rollout_eval.json` |
+| push-box step1, one-step replanning, `delta_scale=0.05` | `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1_s005/rollout_eval.json` |
+| push-box kNN, `episodes=1` | `artifacts/reports/rlbench_push_box_knn_step1_ep1/rollout_eval.json` |
+| push-box kNN, `episodes=5`, `top_k=5` | `artifacts/reports/rlbench_push_box_knn_step1_ep5/rollout_eval.json` |
+| push-box kNN, `episodes=5`, `top_k=1`, dense bank | `artifacts/reports/rlbench_push_box_knn_step1_ep5_top1_dense/rollout_eval.json` |
+## R3D Proxy Runs
+| Run | Config | Seed | Checkpoint | Summary | Benchmark | Diagnostics |
+| --- | --- | ---: | --- | --- | --- | --- |
+| stage1 dummy | `proxy_interaction_r3d_stage1_dummy.yaml` | 13 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/diagnostics_full/proxy_diagnostics.json` |
+| stage1 dummy | `proxy_interaction_r3d_stage1_dummy.yaml` | 14 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/diagnostics_full/proxy_diagnostics.json` |
+| stage1 dummy | `proxy_interaction_r3d_stage1_dummy.yaml` | 15 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/diagnostics_full/proxy_diagnostics.json` |
+| stage2 dummy | `proxy_interaction_r3d_stage2_dummy.yaml` | 21 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/diagnostics_full/proxy_diagnostics.json` |
+| stage2 dummy | `proxy_interaction_r3d_stage2_dummy.yaml` | 22 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/diagnostics_full/proxy_diagnostics.json` |
+| stage2 dummy | `proxy_interaction_r3d_stage2_dummy.yaml` | 23 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/diagnostics_full/proxy_diagnostics.json` |
+| stage1 clip | `proxy_interaction_r3d_stage1_clip.yaml` | 7 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/diagnostics_full/proxy_diagnostics.json` |
+| stage1 clip | `proxy_interaction_r3d_stage1_clip.yaml` | 8 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/diagnostics_full/proxy_diagnostics.json` |
+| stage1 clip | `proxy_interaction_r3d_stage1_clip.yaml` | 9 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/diagnostics_full/proxy_diagnostics.json` |
+| stage2 clip | `proxy_interaction_r3d_stage2_clip.yaml` | 11 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/diagnostics_full/proxy_diagnostics.json` |
+| stage2 clip | `proxy_interaction_r3d_stage2_clip.yaml` | 12 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/diagnostics_full/proxy_diagnostics.json` |
+| stage2 clip | `proxy_interaction_r3d_stage2_clip.yaml` | 13 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/diagnostics_full/proxy_diagnostics.json` |
+| stage3 clip rgbd | `proxy_interaction_r3d_stage3_clip_rgbd.yaml` | 17 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/diagnostics_full/proxy_diagnostics.json` |
+| stage3 clip rgbd | `proxy_interaction_r3d_stage3_clip_rgbd.yaml` | 18 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/diagnostics_full/proxy_diagnostics.json` |
+| stage3 clip rgbd | `proxy_interaction_r3d_stage3_clip_rgbd.yaml` | 19 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed19/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed19/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed19/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed19/diagnostics_full/proxy_diagnostics.json` |
+## Ablation Benchmark Files
+| Ablation | File |
+| --- | --- |
+| stage1 dummy `no_planner` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.json` |
+| stage1 dummy `no_role_symmetry` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.json` |
+| stage2 dummy `no_world_model` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_no_world_model/reveal_benchmark.json` |
+| stage2 dummy `no_world_model` pre-fix backup | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_no_world_model/reveal_benchmark_pre_null_rollout_fix.json` |
+| stage2 dummy `short_history` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_short_history/reveal_benchmark.json` |
+| stage3 clip RGB-D `no_depth` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_no_depth/reveal_benchmark.json` |
+Equivalent files exist under the other seed directories.
+## Integration Artifacts
+| Artifact | File |
+| --- | --- |
+| RLBench import/config smoke | `artifacts/outputs/r3d/rlbench_smokes/smoke_test_output.txt` |
+| RLBench `open_drawer` launch smoke | `artifacts/outputs/r3d/rlbench_smokes/launch_smoke_open_drawer.txt` |
+| RLBench `open_drawer` rollout | `artifacts/outputs/r3d/rlbench_open_drawer_r3d_rollout/rollout_eval.json` |
+| PerAct2 13-task launch smoke summary | `artifacts/outputs/r3d/peract2_13_launch_smoke/launch_smoke_summary.json` |
+## Historical References
+| File | Purpose |
+| --- | --- |
+| `regression/baselines.md` | historical baseline metrics from the downloaded snapshot |
+| `results/phase_tracking.md` | phase-by-phase acceptance tracking |

code/VLAarchtests2_code/VLAarchtests/README.md ADDED Viewed

	@@ -0,0 +1,172 @@

+---
+tags:
+  - robotics
+  - vision-language-action
+  - bimanual-manipulation
+  - rlbench
+  - rgbd
+---
+# VLAarchtests
+Bundle uploaded from `/workspace` runpod sessions dated `2026-03-25 UTC` and `2026-03-26 UTC`.
+## Top-Level Contents
+- `code/reveal_vla_bimanual/`
+  - project code used for the proxy and RLBench runs in this bundle
+- `artifacts/data/reveal_proxy/`
+  - proxy dataset bundles used by the handoff runs
+- `artifacts/outputs/r3d/`
+  - previously uploaded R3D proxy outputs already present in the bundle
+- `artifacts/outputs/r3d_handoff/`
+  - handoff proxy checkpoints
+- `artifacts/outputs/r3d_handoff_phase/`
+  - phase-supervised handoff proxy checkpoints
+- `artifacts/outputs/rlbench_current/`
+  - RLBench checkpoints from the current session
+- `artifacts/reports/`
+  - proxy and RLBench result files copied from `/workspace/reports`
+- `environment/`
+  - same-machine setup files and validation helpers
+- `tests/`
+  - local test suite
+- `handoff/instructions.md`
+  - instruction file used for the handoff work
+- `MODEL_INDEX.md`
+  - checkpoint and result index
+- `results/session_results_20260326.md`
+  - raw result tables for the `2026-03-25/26` work
+## Code Added Or Updated
+### Core model, memory, planner, and dataset paths
+- `code/reveal_vla_bimanual/models/backbones.py`
+- `code/reveal_vla_bimanual/models/multiview_fusion.py`
+- `code/reveal_vla_bimanual/models/observation_memory.py`
+- `code/reveal_vla_bimanual/models/reveal_head.py`
+- `code/reveal_vla_bimanual/models/world_model.py`
+- `code/reveal_vla_bimanual/models/action_decoder.py`
+- `code/reveal_vla_bimanual/models/planner.py`
+- `code/reveal_vla_bimanual/models/policy.py`
+- `code/reveal_vla_bimanual/train/losses.py`
+- `code/reveal_vla_bimanual/sim_reveal/dataset.py`
+- `code/reveal_vla_bimanual/sim_reveal/procedural_envs.py`
+- `code/reveal_vla_bimanual/sim_rlbench/dataset.py`
+### Training and evaluation paths
+- `code/reveal_vla_bimanual/train/run_rlbench_experiment.py`
+- `code/reveal_vla_bimanual/eval/run_reveal_benchmark.py`
+- `code/reveal_vla_bimanual/eval/run_ablations.py`
+- `code/reveal_vla_bimanual/eval/run_teacher_audit.py`
+- `code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py`
+- `code/reveal_vla_bimanual/eval/run_rlbench_knn_eval.py`
+### Added or updated training configs
+- `code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact.yaml`
+- `code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial.yaml`
+- `code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase.yaml`
+- `code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial_phase.yaml`
+- `code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_clip_current_valid9.yaml`
+- `code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_clip_current_common23.yaml`
+- `code/reveal_vla_bimanual/train/configs/rlbench_lift_ball_backbone_only_clip_current_wide.yaml`
+- `code/reveal_vla_bimanual/train/configs/rlbench_lift_ball_backbone_only_clip_step1.yaml`
+- `code/reveal_vla_bimanual/train/configs/rlbench_push_box_backbone_only_clip_step1.yaml`
+### Test files
+The staged `tests/` directory contains `32` test modules plus `conftest.py`, including:
+- geometry and camera rotation coverage
+- phase-label and candidate-ranking coverage
+- planner gradient-flow and reocclusion gating coverage
+- world-model null-rollout, field-consistency, and task-adapter coverage
+- proxy scripted benchmark and teacher-audit coverage
+## Verification
+- local test command:
+  - `PYTHONPATH=/workspace/VLAarchtests_work/code/reveal_vla_bimanual python -m pytest -q /workspace/VLAarchtests_work/tests`
+- result:
+  - `33 passed`
+## Raw Result Files
+### Proxy and handoff results
+- `artifacts/reports/reveal_smoke_mod/reveal_benchmark.json`
+- `artifacts/reports/reveal_smoke_nogeom/reveal_benchmark.json`
+- `artifacts/reports/reveal_smoke_noplanner/reveal_benchmark.json`
+- `artifacts/reports/reveal_handoff_compare_serious/reveal_benchmark.json`
+- `artifacts/reports/reveal_handoff_compare_serious_compact/reveal_benchmark.json`
+- `artifacts/reports/reveal_phase_compare_serious_compact/reveal_benchmark.json`
+- `artifacts/reports/reveal_phase_compare_serious_spatial_compactwm/reveal_benchmark.json`
+- `artifacts/reports/reveal_phase_ablations_compact/ablations.json`
+- `artifacts/reports/reveal_teacher_audit_serious/teacher_audit.json`
+### RLBench result files
+- `artifacts/reports/rlbench_dual_buttons_baseline_len100_ep1_ik_rescale/rollout_eval.json`
+- `artifacts/reports/rlbench_dual_buttons_common23_len100_ep1_ik_rescale/rollout_eval.json`
+- `artifacts/reports/rlbench_push_box_common23_len100_ep1_ik_rescale/rollout_eval.json`
+- `artifacts/reports/rlbench_lift_ball_wide_len160_ep1_ik_c1/rollout_eval.json`
+- `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1/rollout_eval.json`
+- `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1_s005/rollout_eval.json`
+- `artifacts/reports/rlbench_push_box_knn_step1_ep1/rollout_eval.json`
+- `artifacts/reports/rlbench_push_box_knn_step1_ep5/rollout_eval.json`
+- `artifacts/reports/rlbench_push_box_knn_step1_ep5_top1_dense/rollout_eval.json`
+## Raw Result Tables
+### Proxy serious runs
+| Artifact | File | Raw values |
+| --- | --- | --- |
+| spatial handoff vs released baseline | `artifacts/reports/reveal_handoff_compare_serious/reveal_benchmark.json` | baseline mean success `0.5833`, handoff mean success `0.2167` |
+| spatial-trained checkpoint with compact world model vs released baseline | `artifacts/reports/reveal_handoff_compare_serious_compact/reveal_benchmark.json` | baseline mean success `0.5833`, handoff mean success `0.5200` |
+| compact-phase vs released baseline | `artifacts/reports/reveal_phase_compare_serious_compact/reveal_benchmark.json` | baseline mean success `0.5833`, compact-phase mean success `0.5133` |
+| spatial-phase with compact world model vs released baseline | `artifacts/reports/reveal_phase_compare_serious_spatial_compactwm/reveal_benchmark.json` | baseline mean success `0.5833`, spatial-phase compact-world-model mean success `0.4933` |
+### Proxy ablations
+| Artifact | File | Raw values |
+| --- | --- | --- |
+| compact-phase ablations | `artifacts/reports/reveal_phase_ablations_compact/ablations.json` | full `0.5133`, `no_geometry` `0.5133`, `no_spatial_memory` `0.4967`, `compact_world_model` `0.5133`, `no_planner` `0.4333`, `gaussian_candidates_only` `0.4667`, `no_task_head` `0.5133`, `no_support_mode_conditioning` `0.5133` |
+### RLBench direct-policy runs
+| Artifact | File | Raw values |
+| --- | --- | --- |
+| lift-ball wide checkpoint, one-step replanning | `artifacts/reports/rlbench_lift_ball_wide_len160_ep1_ik_c1/rollout_eval.json` | mean success `0.0`, mean return `0.0`, path recoveries `[148]`, noop fallbacks `[11]` |
+| push-box step-1 checkpoint, one-step replanning | `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1/rollout_eval.json` | mean success `0.0`, mean return `0.0`, path recoveries `[177]`, noop fallbacks `[0]` |
+| push-box step-1 checkpoint, one-step replanning, `delta_scale=0.05` | `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1_s005/rollout_eval.json` | mean success `0.0`, mean return `0.0`, path recoveries `[180]`, noop fallbacks `[0]` |
+### RLBench retrieval runs
+| Artifact | File | Raw values |
+| --- | --- | --- |
+| push-box kNN, `bank_stride=4`, `top_k=5`, `time_window=8`, `episodes=1` | `artifacts/reports/rlbench_push_box_knn_step1_ep1/rollout_eval.json` | mean success `1.0`, mean return `1.0`, bank size `2815` |
+| push-box kNN, `bank_stride=4`, `top_k=5`, `time_window=8`, `episodes=5` | `artifacts/reports/rlbench_push_box_knn_step1_ep5/rollout_eval.json` | successes `[0.0, 1.0, 0.0, 0.0, 0.0]`, mean success `0.2`, bank size `2815` |
+| push-box kNN, `bank_stride=1`, `top_k=1`, `time_window=4`, `episodes=5` | `artifacts/reports/rlbench_push_box_knn_step1_ep5_top1_dense/rollout_eval.json` | successes `[0.0, 0.0, 1.0, 1.0, 0.0]`, mean success `0.4`, bank size `11259` |
+## Environment Recreation Files
+- `environment/setup_same_machine.sh`
+- `environment/validate_same_machine.sh`
+- `environment/run_peract2_13_rollouts.sh`
+- `environment/runtime_env_vars.sh`
+- `environment/hardware_snapshot.txt`
+- `environment/glxinfo_B.txt`
+- `environment/upstream_revisions.txt`
+- `environment/system_packages_same_machine.txt`
+- `environment/rlbench_env_export.yaml`
+- `environment/rlbench_env_explicit.txt`
+- `environment/rlbench_pip_freeze.txt`
+- `environment/reveal_env_export.yaml`
+- `environment/reveal_env_explicit.txt`
+- `environment/reveal_pip_freeze.txt`
+Detailed raw tables for the `2026-03-25/26` work are in `results/session_results_20260326.md`.

code/VLAarchtests2_code/VLAarchtests/artifacts/generated_configs/reveal_proxy_sprint_benchmark_v7.json ADDED Viewed

	@@ -0,0 +1,2702 @@

+[
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 0,
+    "seed": 0
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 1,
+    "seed": 1000
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 2,
+    "seed": 2000
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 3,
+    "seed": 3000
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 4,
+    "seed": 4000
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "medium",
+    "episode_index": 5,
+    "seed": 5000
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 6,
+    "seed": 6000
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "hard",
+    "episode_index": 7,
+    "seed": 7000
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 8,
+    "seed": 1
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 9,
+    "seed": 1001
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 10,
+    "seed": 2001
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 11,
+    "seed": 3001
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 12,
+    "seed": 4001
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "medium",
+    "episode_index": 13,
+    "seed": 5001
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 14,
+    "seed": 6001
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "hard",
+    "episode_index": 15,
+    "seed": 7001
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 16,
+    "seed": 2
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 17,
+    "seed": 1002
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 18,
+    "seed": 2002
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 19,
+    "seed": 3002
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 20,
+    "seed": 4002
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "medium",
+    "episode_index": 21,
+    "seed": 5002
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 22,
+    "seed": 6002
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "hard",
+    "episode_index": 23,
+    "seed": 7002
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 24,
+    "seed": 3
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 25,
+    "seed": 1003
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 26,
+    "seed": 2003
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 27,
+    "seed": 3003
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 28,
+    "seed": 4003
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "medium",
+    "episode_index": 29,
+    "seed": 5003
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 30,
+    "seed": 6003
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "hard",
+    "episode_index": 31,
+    "seed": 7003
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 32,
+    "seed": 4
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 33,
+    "seed": 1004
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 34,
+    "seed": 2004
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 35,
+    "seed": 3004
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 36,
+    "seed": 4004
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "medium",
+    "episode_index": 37,
+    "seed": 5004
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 38,
+    "seed": 6004
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "hard",
+    "episode_index": 39,
+    "seed": 7004
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 40,
+    "seed": 5
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 41,
+    "seed": 1005
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 42,
+    "seed": 2005
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 43,
+    "seed": 3005
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 44,
+    "seed": 4005
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "medium",
+    "episode_index": 45,
+    "seed": 5005
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 46,
+    "seed": 6005
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "hard",
+    "episode_index": 47,
+    "seed": 7005
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 48,
+    "seed": 6
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 49,
+    "seed": 1006
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 50,
+    "seed": 2006
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 51,
+    "seed": 3006
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 52,
+    "seed": 4006
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "medium",
+    "episode_index": 53,
+    "seed": 5006
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 54,
+    "seed": 6006
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "hard",
+    "episode_index": 55,
+    "seed": 7006
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 56,
+    "seed": 7
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 57,
+    "seed": 1007
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 58,
+    "seed": 2007
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 59,
+    "seed": 3007
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 60,
+    "seed": 4007
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "medium",
+    "episode_index": 61,
+    "seed": 5007
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 62,
+    "seed": 6007
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "hard",
+    "episode_index": 63,
+    "seed": 7007
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 64,
+    "seed": 8
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 65,
+    "seed": 1008
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 66,
+    "seed": 2008
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 67,
+    "seed": 3008
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 68,
+    "seed": 4008
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "medium",
+    "episode_index": 69,
+    "seed": 5008
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 70,
+    "seed": 6008
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "hard",
+    "episode_index": 71,
+    "seed": 7008
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 72,
+    "seed": 9
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 73,
+    "seed": 1009
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 74,
+    "seed": 2009
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 75,
+    "seed": 3009
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 76,
+    "seed": 4009
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "medium",
+    "episode_index": 77,
+    "seed": 5009
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 78,
+    "seed": 6009
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "tight_corridor_high_collateral",
+    "difficulty_bin": "hard",
+    "episode_index": 79,
+    "seed": 7009
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 80,
+    "seed": 10
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 81,
+    "seed": 2010
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 82,
+    "seed": 11
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 83,
+    "seed": 2011
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 84,
+    "seed": 12
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 85,
+    "seed": 2012
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 86,
+    "seed": 13
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 87,
+    "seed": 2013
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 88,
+    "seed": 14
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 89,
+    "seed": 2014
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 90,
+    "seed": 15
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 91,
+    "seed": 2015
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 92,
+    "seed": 16
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 93,
+    "seed": 2016
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 94,
+    "seed": 17
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 95,
+    "seed": 2017
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 96,
+    "seed": 18
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 97,
+    "seed": 2018
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 98,
+    "seed": 19
+  },
+  {
+    "proxy_name": "foliage_proxy",
+    "task_name": "foliage",
+    "task_id": 0,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 99,
+    "seed": 2019
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 0,
+    "seed": 100000
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 1,
+    "seed": 101000
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 2,
+    "seed": 102000
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 3,
+    "seed": 103000
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 4,
+    "seed": 104000
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "medium",
+    "episode_index": 5,
+    "seed": 105000
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 6,
+    "seed": 106000
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "hard",
+    "episode_index": 7,
+    "seed": 107000
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 8,
+    "seed": 100001
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 9,
+    "seed": 101001
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 10,
+    "seed": 102001
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 11,
+    "seed": 103001
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 12,
+    "seed": 104001
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "medium",
+    "episode_index": 13,
+    "seed": 105001
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 14,
+    "seed": 106001
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "hard",
+    "episode_index": 15,
+    "seed": 107001
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 16,
+    "seed": 100002
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 17,
+    "seed": 101002
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 18,
+    "seed": 102002
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 19,
+    "seed": 103002
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 20,
+    "seed": 104002
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "medium",
+    "episode_index": 21,
+    "seed": 105002
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 22,
+    "seed": 106002
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "hard",
+    "episode_index": 23,
+    "seed": 107002
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 24,
+    "seed": 100003
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 25,
+    "seed": 101003
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 26,
+    "seed": 102003
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 27,
+    "seed": 103003
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 28,
+    "seed": 104003
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "medium",
+    "episode_index": 29,
+    "seed": 105003
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 30,
+    "seed": 106003
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "hard",
+    "episode_index": 31,
+    "seed": 107003
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 32,
+    "seed": 100004
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 33,
+    "seed": 101004
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 34,
+    "seed": 102004
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 35,
+    "seed": 103004
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 36,
+    "seed": 104004
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "medium",
+    "episode_index": 37,
+    "seed": 105004
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 38,
+    "seed": 106004
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "hard",
+    "episode_index": 39,
+    "seed": 107004
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 40,
+    "seed": 100005
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 41,
+    "seed": 101005
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 42,
+    "seed": 102005
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 43,
+    "seed": 103005
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 44,
+    "seed": 104005
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "medium",
+    "episode_index": 45,
+    "seed": 105005
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 46,
+    "seed": 106005
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "hard",
+    "episode_index": 47,
+    "seed": 107005
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 48,
+    "seed": 100006
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 49,
+    "seed": 101006
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 50,
+    "seed": 102006
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 51,
+    "seed": 103006
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 52,
+    "seed": 104006
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "medium",
+    "episode_index": 53,
+    "seed": 105006
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 54,
+    "seed": 106006
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "hard",
+    "episode_index": 55,
+    "seed": 107006
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 56,
+    "seed": 100007
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 57,
+    "seed": 101007
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 58,
+    "seed": 102007
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 59,
+    "seed": 103007
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 60,
+    "seed": 104007
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "medium",
+    "episode_index": 61,
+    "seed": 105007
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 62,
+    "seed": 106007
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "hard",
+    "episode_index": 63,
+    "seed": 107007
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 64,
+    "seed": 100008
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 65,
+    "seed": 101008
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 66,
+    "seed": 102008
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 67,
+    "seed": 103008
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 68,
+    "seed": 104008
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "medium",
+    "episode_index": 69,
+    "seed": 105008
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 70,
+    "seed": 106008
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "hard",
+    "episode_index": 71,
+    "seed": 107008
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 72,
+    "seed": 100009
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 73,
+    "seed": 101009
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 74,
+    "seed": 102009
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 75,
+    "seed": 103009
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 76,
+    "seed": 104009
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "medium",
+    "episode_index": 77,
+    "seed": 105009
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 78,
+    "seed": 106009
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "one_sided_slip",
+    "difficulty_bin": "hard",
+    "episode_index": 79,
+    "seed": 107009
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 80,
+    "seed": 100010
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 81,
+    "seed": 102010
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 82,
+    "seed": 100011
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 83,
+    "seed": 102011
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 84,
+    "seed": 100012
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 85,
+    "seed": 102012
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 86,
+    "seed": 100013
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 87,
+    "seed": 102013
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 88,
+    "seed": 100014
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 89,
+    "seed": 102014
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 90,
+    "seed": 100015
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 91,
+    "seed": 102015
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 92,
+    "seed": 100016
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 93,
+    "seed": 102016
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 94,
+    "seed": 100017
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 95,
+    "seed": 102017
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 96,
+    "seed": 100018
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 97,
+    "seed": 102018
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 98,
+    "seed": 100019
+  },
+  {
+    "proxy_name": "bag_proxy",
+    "task_name": "bag",
+    "task_id": 1,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 99,
+    "seed": 102019
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 0,
+    "seed": 200000
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 1,
+    "seed": 201000
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 2,
+    "seed": 202000
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 3,
+    "seed": 203000
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 4,
+    "seed": 204000
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "medium",
+    "episode_index": 5,
+    "seed": 205000
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 6,
+    "seed": 206000
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "hard",
+    "episode_index": 7,
+    "seed": 207000
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 8,
+    "seed": 200001
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 9,
+    "seed": 201001
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 10,
+    "seed": 202001
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 11,
+    "seed": 203001
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 12,
+    "seed": 204001
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "medium",
+    "episode_index": 13,
+    "seed": 205001
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 14,
+    "seed": 206001
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "hard",
+    "episode_index": 15,
+    "seed": 207001
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 16,
+    "seed": 200002
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 17,
+    "seed": 201002
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 18,
+    "seed": 202002
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 19,
+    "seed": 203002
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 20,
+    "seed": 204002
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "medium",
+    "episode_index": 21,
+    "seed": 205002
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 22,
+    "seed": 206002
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "hard",
+    "episode_index": 23,
+    "seed": 207002
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 24,
+    "seed": 200003
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 25,
+    "seed": 201003
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 26,
+    "seed": 202003
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 27,
+    "seed": 203003
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 28,
+    "seed": 204003
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "medium",
+    "episode_index": 29,
+    "seed": 205003
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 30,
+    "seed": 206003
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "hard",
+    "episode_index": 31,
+    "seed": 207003
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 32,
+    "seed": 200004
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 33,
+    "seed": 201004
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 34,
+    "seed": 202004
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 35,
+    "seed": 203004
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 36,
+    "seed": 204004
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "medium",
+    "episode_index": 37,
+    "seed": 205004
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 38,
+    "seed": 206004
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "hard",
+    "episode_index": 39,
+    "seed": 207004
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 40,
+    "seed": 200005
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 41,
+    "seed": 201005
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 42,
+    "seed": 202005
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 43,
+    "seed": 203005
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 44,
+    "seed": 204005
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "medium",
+    "episode_index": 45,
+    "seed": 205005
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 46,
+    "seed": 206005
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "hard",
+    "episode_index": 47,
+    "seed": 207005
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 48,
+    "seed": 200006
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 49,
+    "seed": 201006
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 50,
+    "seed": 202006
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 51,
+    "seed": 203006
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 52,
+    "seed": 204006
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "medium",
+    "episode_index": 53,
+    "seed": 205006
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 54,
+    "seed": 206006
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "hard",
+    "episode_index": 55,
+    "seed": 207006
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 56,
+    "seed": 200007
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 57,
+    "seed": 201007
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 58,
+    "seed": 202007
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 59,
+    "seed": 203007
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 60,
+    "seed": 204007
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "medium",
+    "episode_index": 61,
+    "seed": 205007
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 62,
+    "seed": 206007
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "hard",
+    "episode_index": 63,
+    "seed": 207007
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 64,
+    "seed": 200008
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 65,
+    "seed": 201008
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 66,
+    "seed": 202008
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 67,
+    "seed": 203008
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 68,
+    "seed": 204008
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "medium",
+    "episode_index": 69,
+    "seed": 205008
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 70,
+    "seed": 206008
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "hard",
+    "episode_index": 71,
+    "seed": 207008
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 72,
+    "seed": 200009
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "medium",
+    "episode_index": 73,
+    "seed": 201009
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 74,
+    "seed": 202009
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "high_reocclusion",
+    "difficulty_bin": "hard",
+    "episode_index": 75,
+    "seed": 203009
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "medium",
+    "episode_index": 76,
+    "seed": 204009
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "medium",
+    "episode_index": 77,
+    "seed": 205009
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "camera_perturbation",
+    "difficulty_bin": "hard",
+    "episode_index": 78,
+    "seed": 206009
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "fold_sensitive_long_persistence",
+    "difficulty_bin": "hard",
+    "episode_index": 79,
+    "seed": 207009
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 80,
+    "seed": 200010
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 81,
+    "seed": 202010
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 82,
+    "seed": 200011
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 83,
+    "seed": 202011
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 84,
+    "seed": 200012
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 85,
+    "seed": 202012
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 86,
+    "seed": 200013
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 87,
+    "seed": 202013
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 88,
+    "seed": 200014
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 89,
+    "seed": 202014
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 90,
+    "seed": 200015
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 91,
+    "seed": 202015
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 92,
+    "seed": 200016
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 93,
+    "seed": 202016
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 94,
+    "seed": 200017
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 95,
+    "seed": 202017
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 96,
+    "seed": 200018
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 97,
+    "seed": 202018
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "medium",
+    "episode_index": 98,
+    "seed": 200019
+  },
+  {
+    "proxy_name": "cloth_proxy",
+    "task_name": "cloth",
+    "task_id": 2,
+    "stress_slice": "nominal",
+    "difficulty_bin": "hard",
+    "episode_index": 99,
+    "seed": 202019
+  }
+]

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/ablations_v7/ablations.md ADDED Viewed

	@@ -0,0 +1,92 @@

+# Reveal Ablations
+## full_model
+- mean_success: 0.280
+- visibility_integral: 31.968
+- corridor_availability: 0.675
+- reocclusion_rate: 0.014
+- disturbance_cost: 0.393
+- premature_retrieve_rate: 0.125
+- reocclusion_after_reveal_rate: 0.547
+- planner_regret: 0.091
+- chunk_commit_steps: 0.000
+## no_planner
+- mean_success: 0.200
+- visibility_integral: 6.978
+- corridor_availability: 0.230
+- reocclusion_rate: 0.020
+- disturbance_cost: 0.110
+- premature_retrieve_rate: 0.151
+- reocclusion_after_reveal_rate: 0.800
+- planner_regret: 0.041
+- chunk_commit_steps: 0.000
+- paired_paired_episodes_vs_full_model: 300.000
+- paired_success_delta_vs_full_model: -0.080
+- paired_visibility_delta_vs_full_model: -24.990
+- paired_reocclusion_delta_vs_full_model: 0.006
+- paired_disturbance_delta_vs_full_model: -0.283
+## no_spatial_memory
+- mean_success: 0.323
+- visibility_integral: 37.043
+- corridor_availability: 0.825
+- reocclusion_rate: 0.015
+- disturbance_cost: 0.441
+- premature_retrieve_rate: 0.112
+- reocclusion_after_reveal_rate: 0.260
+- planner_regret: 0.163
+- chunk_commit_steps: 0.000
+- paired_paired_episodes_vs_full_model: 300.000
+- paired_success_delta_vs_full_model: 0.043
+- paired_visibility_delta_vs_full_model: 5.075
+- paired_reocclusion_delta_vs_full_model: 0.001
+- paired_disturbance_delta_vs_full_model: 0.048
+## no_task_head
+- mean_success: 0.280
+- visibility_integral: 31.965
+- corridor_availability: 0.675
+- reocclusion_rate: 0.014
+- disturbance_cost: 0.393
+- premature_retrieve_rate: 0.125
+- reocclusion_after_reveal_rate: 0.547
+- planner_regret: 0.091
+- chunk_commit_steps: 0.000
+- paired_paired_episodes_vs_full_model: 300.000
+- paired_success_delta_vs_full_model: 0.000
+- paired_visibility_delta_vs_full_model: -0.003
+- paired_reocclusion_delta_vs_full_model: 0.000
+- paired_disturbance_delta_vs_full_model: 0.000
+## no_geometry
+- mean_success: 0.270
+- visibility_integral: 32.415
+- corridor_availability: 0.675
+- reocclusion_rate: 0.013
+- disturbance_cost: 0.398
+- premature_retrieve_rate: 0.127
+- reocclusion_after_reveal_rate: 0.550
+- planner_regret: 0.091
+- chunk_commit_steps: 0.000
+- paired_paired_episodes_vs_full_model: 300.000
+- paired_success_delta_vs_full_model: -0.010
+- paired_visibility_delta_vs_full_model: 0.446
+- paired_reocclusion_delta_vs_full_model: -0.001
+- paired_disturbance_delta_vs_full_model: 0.004
+## no_camera_pose
+- mean_success: 0.293
+- visibility_integral: 31.640
+- corridor_availability: 0.681
+- reocclusion_rate: 0.017
+- disturbance_cost: 0.389
+- premature_retrieve_rate: 0.126
+- reocclusion_after_reveal_rate: 0.543
+- planner_regret: 0.092
+- chunk_commit_steps: 0.000
+- paired_paired_episodes_vs_full_model: 300.000
+- paired_success_delta_vs_full_model: 0.013
+- paired_visibility_delta_vs_full_model: -0.329
+- paired_reocclusion_delta_vs_full_model: 0.003
+- paired_disturbance_delta_vs_full_model: -0.004

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json ADDED Viewed

	@@ -0,0 +1,67 @@

+{
+  "local_eval_csv": "/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/eval_data.csv",
+  "public_eval_csv": "/workspace/baselines/AnyBimanual/Peract-LF_AnyBimanual/eval_data.csv",
+  "overlap_tasks": [
+    "coordinated_push_box",
+    "coordinated_lift_ball",
+    "dual_push_buttons"
+  ],
+  "local_last_complete_step": {
+    "step": 1000,
+    "eval_episodes": 25,
+    "per_task_return": {
+      "coordinated_push_box": 0.0,
+      "coordinated_lift_ball": 0.0,
+      "dual_push_buttons": 12.0
+    },
+    "per_task_success": {
+      "coordinated_push_box": 0.0,
+      "coordinated_lift_ball": 0.0,
+      "dual_push_buttons": 0.48
+    },
+    "per_task_length": {
+      "coordinated_push_box": 25.0,
+      "coordinated_lift_ball": 25.0,
+      "dual_push_buttons": 23.12
+    },
+    "per_task_total_transitions": {
+      "coordinated_push_box": 625.0,
+      "coordinated_lift_ball": 1250.0,
+      "dual_push_buttons": 1828.0
+    },
+    "mean_success": 0.16
+  },
+  "public_best_overlap_step": {
+    "step": 60000,
+    "eval_episodes": 25,
+    "per_task_return": {
+      "coordinated_push_box": 20.0,
+      "coordinated_lift_ball": 8.0,
+      "dual_push_buttons": 24.0
+    },
+    "per_task_success": {
+      "coordinated_push_box": 0.8,
+      "coordinated_lift_ball": 0.32,
+      "dual_push_buttons": 0.96
+    },
+    "per_task_length": {
+      "coordinated_push_box": 25.0,
+      "coordinated_lift_ball": 23.24,
+      "dual_push_buttons": 21.56
+    },
+    "per_task_total_transitions": {
+      "coordinated_push_box": 3693.0,
+      "coordinated_lift_ball": 2443.0,
+      "dual_push_buttons": 4857.0
+    },
+    "mean_success": 0.6933333333333334
+  },
+  "delta_vs_public_best": {
+    "mean_success_delta": -0.5333333333333333,
+    "per_task_success_delta": {
+      "coordinated_push_box": -0.8,
+      "coordinated_lift_ball": -0.32,
+      "dual_push_buttons": -0.48
+    }
+  }
+}

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.md ADDED Viewed

	@@ -0,0 +1,27 @@

+# AnyBimanual Overlap Eval Summary
+- Local eval CSV: `/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/eval_data.csv`
+- Public eval CSV: `/workspace/baselines/AnyBimanual/Peract-LF_AnyBimanual/eval_data.csv`
+## Local Last Complete Step
+- step: `1000`
+- mean_success: `0.160`
+- coordinated_push_box: success=`0.000`, return=`0.0`
+- coordinated_lift_ball: success=`0.000`, return=`0.0`
+- dual_push_buttons: success=`0.480`, return=`12.0`
+## Public Best Overlap Step
+- step: `60000`
+- mean_success: `0.693`
+- coordinated_push_box: success=`0.800`, return=`20.0`
+- coordinated_lift_ball: success=`0.320`, return=`8.0`
+- dual_push_buttons: success=`0.960`, return=`24.0`
+## Delta Vs Public Best
+- mean_success_delta: `-0.533`
+- coordinated_push_box: `-0.800`
+- coordinated_lift_ball: `-0.320`
+- dual_push_buttons: `-0.480`

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/status.md ADDED Viewed

	@@ -0,0 +1,189 @@

+# AnyBimanual Overlap Baseline Status 2026-03-30
+## Goal
+Establish a fair official-baseline comparison path on the exact 3-task subset3 overlap used for local RLBench general-task checks:
+- `coordinated_push_box`
+- `coordinated_lift_ball`
+- `dual_push_buttons`
+The purpose is not to replace the reveal proxy benchmark as the main selector for the custom foliage / bag / cloth architecture. The purpose is to get a credible general-task baseline anchor on an official external method.
+## Code Changes
+- Added overlap-train wrapper:
+  - [run_anybimanual_subset3_overlap_train.sh](/workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_anybimanual_subset3_overlap_train.sh)
+- Added overlap-eval wrapper:
+  - [run_anybimanual_subset3_overlap_eval.sh](/workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_anybimanual_subset3_overlap_eval.sh)
+- Patched AnyBimanual offline runner to:
+  - accept `ANYBIMANUAL_PRETRAINED_WEIGHTS_DIR` instead of a hardcoded placeholder path
+  - guard unconditional `wandb` logging in the q-attention agents
+  - lazily import `pandas` in the eval-resume helper
+  - prefer existing local checkpoints over release weights when `framework.load_existing_weights=True`
+  - always save a final checkpoint at `framework.training_iterations` when the loop exits without already writing that step
+- Added a lightweight checkpoint-init helper:
+  - [weight_init_utils.py](/workspace/third_party/AnyBimanual/third_party/YARR/yarr/runners/weight_init_utils.py)
+- Added targeted resume-logic coverage:
+  - [test_anybimanual_resume_logic.py](/workspace/VLAarchtests/tests/test_anybimanual_resume_logic.py)
+- Added overlap-eval summary parser:
+  - [summarize_anybimanual_overlap_eval.py](/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/summarize_anybimanual_overlap_eval.py)
+- Added targeted summary-parser coverage:
+  - [test_anybimanual_overlap_eval_summary.py](/workspace/VLAarchtests/tests/test_anybimanual_overlap_eval_summary.py)
+Wrapper responsibilities:
+- activate `/workspace/envs/rlbench`
+- keep all outputs under `/workspace`
+- point AnyBimanual at the local subset3 overlap demo root
+- use the exact overlap task list
+- avoid the upstream tmux launcher path
+## Public Reference
+Local official release artifact:
+- [eval_data.csv](/workspace/baselines/AnyBimanual/Peract-LF_AnyBimanual/eval_data.csv)
+Best overlap-relevant released step in the local CSV:
+- step `60000`
+- `coordinated_push_box`: `20/25 = 0.80`
+- `coordinated_lift_ball`: `8/25 = 0.32`
+- `dual_push_buttons`: `24/25 = 0.96`
+- overlap-task mean: `0.693`
+These are the external target-line numbers for this branch.
+## Completed Tests
+- Released AnyBimanual checkpoint live sanity on local overlap subset, `1` episode per task:
+  - [anybimanual_live_subset3_ep1.log](/workspace/reports/true_baseline_compare_subset3_v1/anybimanual_live_subset3_ep1.log)
+  - Result: `0.0` on all three tasks in that local subset sanity.
+  - Interpretation: released checkpoint plus local overlap subset is not an apples-to-apples comparison by itself.
+- Official overlap smoke train launch, `200` iterations target:
+  - log: [anybimanual_subset3_overlap_smoke200_train.log](/workspace/reports/anybimanual_subset3_overlap_smoke200_train.log)
+  - run root: [/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200](/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200)
+  - replay root: [/workspace/baselines/AnyBimanual_overlap_replay](/workspace/baselines/AnyBimanual_overlap_replay)
+- Resume-path verification after the runner patch:
+  - `python -m py_compile /workspace/third_party/AnyBimanual/third_party/YARR/yarr/runners/offline_train_runner.py /workspace/third_party/AnyBimanual/third_party/YARR/yarr/runners/weight_init_utils.py`
+  - `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_anybimanual_subset3_overlap_train.sh`
+  - `PYTHONPATH=/workspace/third_party/AnyBimanual/third_party/YARR pytest -q /workspace/VLAarchtests/tests/test_anybimanual_resume_logic.py`
+  - Result: `4 passed`
+- Off-by-one checkpoint diagnosis:
+  - The first smoke really completed `200` updates but only wrote `weights/0`, because the upstream save condition uses loop index `i` and a `range(start_iter, training_iterations)` loop.
+  - Preserved failed smoke log:
+    - [anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_train_presavefix.log](/workspace/reports/anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_train_presavefix.log)
+  - This was the reason the eval watcher never fired on the first pass.
+- Fixed smoke retry, `200` iterations with `save_freq=50`:
+  - live train log:
+    - [anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_train.log](/workspace/reports/anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_train.log)
+  - run root:
+    - [/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0](/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0)
+  - result:
+    - the run finished cleanly and wrote `weights/50`, `weights/100`, `weights/150`, and `weights/200`
+    - `training.log` shows `# Starting training from weights: 0 to 200`
+- Partial `200`-step overlap eval on the official path:
+  - log:
+    - [anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_eval.log](/workspace/reports/anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_eval.log)
+  - observed result before termination:
+    - `coordinated_push_box`: `0/25 = 0.0`
+    - `coordinated_lift_ball`: `0/18 = 0.0`
+    - at least one explicit infeasible-waypoint episode on `coordinated_lift_ball`
+  - interpretation:
+    - the `200`-step checkpoint is decisively below the useful comparison floor
+    - the eval was intentionally stopped early to reclaim wall-clock for the first potentially meaningful resumed checkpoint
+- Resume-to-`1000` launch validation:
+  - live train session now runs with:
+    - `framework.load_existing_weights=True`
+    - `framework.training_iterations=1000`
+  - `training.log` now shows:
+    - `# Starting training from weights: 200 to 1000`
+  - this confirms the patched resume path is working in the real official-baseline run, not just in unit tests
+- First resumed progress block from the live train session:
+  - the stdout progress stream reached resumed step `100/800`, which corresponds to global train step `300`
+  - logged metrics at that point:
+    - loss `40.91718`
+    - sample time `0.093029`
+    - step time `14.0686`
+  - interpretation:
+    - the resumed official baseline is now past pure startup and is performing real SGD on the reused replay
+    - there is still no post-resume checkpoint on disk yet because the next save boundary is `weights/400`
+- First post-resume checkpoint milestone:
+  - the run reached resumed step `200/800`, which corresponds to global train step `400`
+  - logged metrics at that point:
+    - loss `33.26684`
+    - sample time `0.073085`
+    - step time `14.3032`
+  - checkpoint now exists on disk at:
+    - [/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/weights/400](/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/weights/400)
+  - interpretation:
+    - the official overlap baseline is not flatlining during resume
+    - loss has dropped meaningfully from the step-`300` block, so keeping the `400 -> 1000` leg alive is justified
+- Post-train eval and summary chain:
+  - the waiting watcher now does:
+    - wait for the live `200 -> 1000` train PID to exit
+    - run the official overlap eval at `EVAL_TYPE=last`
+    - summarize the resulting `seed0/eval_data.csv` against the public release `eval_data.csv`
+    - then rerun the direct task-routed proxy benchmark wrapper on the fixed sprint benchmark
+    - then run the bag-only selector specialization wrapper and compose a candidate routed summary
+  - eval log target:
+    - [/workspace/reports/anybimanual_subset3_overlap_resume1000_eval.log](/workspace/reports/anybimanual_subset3_overlap_resume1000_eval.log)
+  - summary log target:
+    - [/workspace/reports/anybimanual_subset3_overlap_resume1000_summary.log](/workspace/reports/anybimanual_subset3_overlap_resume1000_summary.log)
+  - summary output target:
+    - [/workspace/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary](/workspace/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary)
+  - routed proxy rerun log target:
+    - [/workspace/reports/task_routed_proxy_v1_rerun.log](/workspace/reports/task_routed_proxy_v1_rerun.log)
+  - routed proxy rerun output target:
+    - [/workspace/VLAarchtests/artifacts/reports/task_routed_proxy_v1_rerun](/workspace/VLAarchtests/artifacts/reports/task_routed_proxy_v1_rerun)
+  - bag specialization log target:
+    - [/workspace/reports/run_bag_selector_iter9.log](/workspace/reports/run_bag_selector_iter9.log)
+  - bag specialization report target:
+    - [/workspace/VLAarchtests/artifacts/reports/selector_finetune_v7_iter9_bag](/workspace/VLAarchtests/artifacts/reports/selector_finetune_v7_iter9_bag)
+  - bag-routed candidate summary target:
+    - [/workspace/VLAarchtests/artifacts/reports/task_routed_proxy_v2_candidate](/workspace/VLAarchtests/artifacts/reports/task_routed_proxy_v2_candidate)
+## Live Status At Record Time
+- The first replay build is complete and reused locally from:
+  - `/workspace/baselines/AnyBimanual_overlap_replay/multi`
+- The current live official-baseline leg is the resumed train:
+  - run root:
+    - [/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0](/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0)
+  - training log:
+    - [/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/training.log](/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/training.log)
+  - state at record time:
+    - resumed from `weights/200`
+    - training toward `weights/1000`
+    - latest live progress sample reached roughly `203/800` resumed steps
+    - `weights/400` now exists on disk
+    - running on the reused local replay instead of rebuilding data
+    - paired watcher is waiting to run eval and then write the overlap comparison summary automatically
+- The strongest current custom-task proxy controller remains:
+  - [task_routed_proxy_v1/summary.md](/workspace/VLAarchtests/artifacts/reports/task_routed_proxy_v1/summary.md)
+  - mean success `0.4867`
+  - foliage `0.46`, bag `0.41`, cloth `0.59`
+## Current Interpretation
+- The most useful near-term internal general-task signal still comes from the recovered push-box comparator, where the current fair-step1 line beat the historical internal control.
+- The official external-baseline path is now real, reproducible, and past the replay-build stage.
+- The new resume fix now matters empirically as well as operationally:
+  - the real run is continuing from `200` to `1000` inside the same experiment directory
+  - no replay rebuild and no reset to the public release weights were needed
+- The `200`-step smoke point is useful only as a failure-floor check. It is not a competitive baseline point.
+- The next meaningful milestone for this branch is:
+  - completed `1000`-step resumed checkpoint,
+  - then immediate overlap eval on that checkpoint,
+  - then automatic summary against the public AnyBimanual overlap line,
+  - then decide whether the official baseline branch needs more budget or whether the local architecture already has enough evidence to move into direct head-to-head reporting.

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/controller_sweep_v7/controller_sweep.md ADDED Viewed

	@@ -0,0 +1,96 @@

+# Reveal Controller Sweep
+## model
+- controller: model
+- episodes: 300.000
+- mean_success: 0.280
+- visibility_integral: 31.968
+- corridor_availability: 0.675
+- reocclusion_rate: 0.014
+- disturbance_cost: 0.393
+- premature_retrieve_rate: 0.125
+- reocclusion_after_reveal_rate: 0.547
+- planner_regret: 0.091
+- foliage_success: 0.390
+- bag_success: 0.310
+- cloth_success: 0.140
+## candidate0
+- controller: candidate0
+- episodes: 300.000
+- mean_success: 0.200
+- visibility_integral: 6.978
+- corridor_availability: 0.230
+- reocclusion_rate: 0.020
+- disturbance_cost: 0.110
+- premature_retrieve_rate: 0.151
+- reocclusion_after_reveal_rate: 0.800
+- planner_regret: 0.041
+- foliage_success: 0.240
+- bag_success: 0.220
+- cloth_success: 0.140
+- paired_paired_episodes_vs_model: 300.000
+- paired_success_delta_vs_model: -0.080
+- paired_visibility_delta_vs_model: -24.990
+- paired_reocclusion_delta_vs_model: 0.006
+- paired_disturbance_delta_vs_model: -0.283
+## random
+- controller: random
+- episodes: 300.000
+- mean_success: 0.433
+- visibility_integral: 19.548
+- corridor_availability: 0.536
+- reocclusion_rate: 0.024
+- disturbance_cost: 0.334
+- premature_retrieve_rate: 0.182
+- reocclusion_after_reveal_rate: 0.673
+- planner_regret: 0.135
+- foliage_success: 0.410
+- bag_success: 0.370
+- cloth_success: 0.520
+- paired_paired_episodes_vs_model: 300.000
+- paired_success_delta_vs_model: 0.153
+- paired_visibility_delta_vs_model: -12.421
+- paired_reocclusion_delta_vs_model: 0.010
+- paired_disturbance_delta_vs_model: -0.059
+## oracle
+- controller: oracle
+- episodes: 300.000
+- mean_success: 0.407
+- visibility_integral: 24.971
+- corridor_availability: 0.698
+- reocclusion_rate: 0.004
+- disturbance_cost: 0.121
+- premature_retrieve_rate: 0.143
+- reocclusion_after_reveal_rate: 0.523
+- planner_regret: 0.000
+- foliage_success: 0.500
+- bag_success: 0.420
+- cloth_success: 0.300
+- paired_paired_episodes_vs_model: 300.000
+- paired_success_delta_vs_model: 0.127
+- paired_visibility_delta_vs_model: -6.998
+- paired_reocclusion_delta_vs_model: -0.011
+- paired_disturbance_delta_vs_model: -0.272
+## scripted
+- controller: scripted
+- episodes: 300.000
+- mean_success: 1.000
+- visibility_integral: 1.691
+- corridor_availability: 0.665
+- reocclusion_rate: 0.000
+- disturbance_cost: 0.161
+- premature_retrieve_rate: 0.000
+- reocclusion_after_reveal_rate: 0.000
+- planner_regret: 0.000
+- foliage_success: 1.000
+- bag_success: 1.000
+- cloth_success: 1.000
+- paired_paired_episodes_vs_model: 300.000
+- paired_success_delta_vs_model: 0.720
+- paired_visibility_delta_vs_model: -30.277
+- paired_reocclusion_delta_vs_model: -0.014
+- paired_disturbance_delta_vs_model: -0.232

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "task": "bimanual_dual_push_buttons",
+  "date": "2026-03-31",
+  "code_changes": [
+    "/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_retarget_utils.py",
+    "/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_full_arch_utils.py",
+    "/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_full_arch_hybrid_eval.py",
+    "/workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_full_arch_hybrid_eval.sh",
+    "/workspace/VLAarchtests/tests/test_dual_push_retarget_utils.py",
+    "/workspace/VLAarchtests/tests/test_dual_push_full_arch_utils.py"
+  ],
+  "verification": [
+    "pytest -q /workspace/VLAarchtests/tests/test_dual_push_retarget_utils.py /workspace/VLAarchtests/tests/test_dual_push_full_arch_utils.py",
+    "python -m py_compile /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_retarget_utils.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_full_arch_utils.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_full_arch_hybrid_eval.py",
+    "bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_full_arch_hybrid_eval.sh"
+  ],
+  "results": {
+    "elastic_scene_retarget_probe_1x120": {
+      "path": "/workspace/reports/dual_push_full_arch_probe_iter6_scene_ep1/summary.json",
+      "checkpoint": "/workspace/outputs/rlbench_true_baselines/rlbench_subset3_elastic_reveal_proxy_iter6_100demo_fair_seed17/checkpoint_best.pt",
+      "mean_success": 1.0,
+      "mean_return": 1.0,
+      "retrieved_episode_index": 11,
+      "retrieval_similarity": 0.9998629689216614
+    },
+    "full_arch_hybrid_iter6_backbone_1x120": {
+      "path": "/workspace/reports/dual_push_full_arch_hybrid_iter6_backbone_ep1/summary.json",
+      "controller_checkpoint": "/workspace/outputs/rlbench_true_baselines/rlbench_subset3_elastic_reveal_proxy_iter6_100demo_fair_seed17/checkpoint_best.pt",
+      "retrieval_checkpoint": "/workspace/outputs/rlbench_dual_push/rlbench_dual_push_backbone_only_clip_chunk8_weighted_seed17/checkpoint_best.pt",
+      "mean_success": 1.0,
+      "mean_return": 1.0,
+      "steps": 116,
+      "path_recoveries": 0,
+      "noop_fallbacks": 0,
+      "active_modules": [
+        "multiview fusion",
+        "observation memory",
+        "learned action decoder",
+        "learned selector shortlist",
+        "world model",
+        "planner",
+        "geometry tokens",
+        "camera-pose tokens"
+      ],
+      "first_selected_mode": "residual::maintain_opening",
+      "last_selected_mode": "residual::base_action"
+    }
+  }
+}

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.md ADDED Viewed

	@@ -0,0 +1,54 @@

+# Dual Push Full-Architecture Hybrid
+- Task: `bimanual_dual_push_buttons`
+- Date: `2026-03-31`
+## Code Changes
+- Added delta conversion from absolute retargeted poses:
+  - `/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_retarget_utils.py`
+- Added hybrid candidate composition utilities:
+  - `/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_full_arch_utils.py`
+- Added full-architecture dual-push hybrid evaluator:
+  - `/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_full_arch_hybrid_eval.py`
+- Added reproducible wrapper:
+  - `/workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_full_arch_hybrid_eval.sh`
+- Added regression coverage:
+  - `/workspace/VLAarchtests/tests/test_dual_push_retarget_utils.py`
+  - `/workspace/VLAarchtests/tests/test_dual_push_full_arch_utils.py`
+## Verification
+- `pytest -q /workspace/VLAarchtests/tests/test_dual_push_retarget_utils.py /workspace/VLAarchtests/tests/test_dual_push_full_arch_utils.py`
+- `python -m py_compile /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_retarget_utils.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_full_arch_utils.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_full_arch_hybrid_eval.py`
+- `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_full_arch_hybrid_eval.sh`
+## Raw Results
+- Elastic checkpoint retargeted-demo probe with scene retrieval and vision-only button localization, `1` episode, `120` steps:
+  - `/workspace/reports/dual_push_full_arch_probe_iter6_scene_ep1/summary.json`
+  - checkpoint: `/workspace/outputs/rlbench_true_baselines/rlbench_subset3_elastic_reveal_proxy_iter6_100demo_fair_seed17/checkpoint_best.pt`
+  - mean success: `1.0`
+  - mean return: `1.0`
+  - retrieved episode index: `11`
+  - retrieval similarity: `0.9998629689216614`
+- Full-architecture hybrid eval with elastic controller checkpoint plus dual-push retrieval checkpoint, vision-only button localization, `1` episode, `120` steps:
+  - `/workspace/reports/dual_push_full_arch_hybrid_iter6_backbone_ep1/summary.json`
+  - controller checkpoint: `/workspace/outputs/rlbench_true_baselines/rlbench_subset3_elastic_reveal_proxy_iter6_100demo_fair_seed17/checkpoint_best.pt`
+  - retrieval checkpoint: `/workspace/outputs/rlbench_dual_push/rlbench_dual_push_backbone_only_clip_chunk8_weighted_seed17/checkpoint_best.pt`
+  - active modules:
+    - multiview fusion
+    - observation memory
+    - learned action decoder
+    - learned selector shortlist
+    - world model
+    - planner
+    - geometry tokens
+    - camera-pose tokens
+  - mean success: `1.0`
+  - mean return: `1.0`
+  - steps: `116`
+  - path recoveries: `0`
+  - noop fallbacks: `0`
+  - first selected mode: `residual::maintain_opening`
+  - last selected mode: `residual::base_action`

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proposal_alignment_diagnostics_v7/proposal_alignment_diagnostics.json ADDED Viewed

	@@ -0,0 +1,718 @@

+{
+  "checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt",
+  "dataset_path": "/workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase.pt",
+  "teacher_macro_names_by_task": {
+    "foliage": [
+      "teacher",
+      "pin_canopy",
+      "maintain_gap",
+      "premature_retrieve",
+      "reveal_with_release",
+      "wrong_side_reveal",
+      "foliage_immediate_reocclusion",
+      "over_disturbance"
+    ],
+    "bag": [
+      "teacher",
+      "widen_mouth",
+      "maintain_mouth",
+      "premature_retrieve",
+      "reveal_with_release",
+      "wrong_edge_reveal",
+      "pin_left_rim",
+      "bag_fabric_probe"
+    ],
+    "cloth": [
+      "teacher",
+      "lift_edge",
+      "stabilize_fold",
+      "premature_retrieve",
+      "reveal_with_release",
+      "cloth_lift_high",
+      "wrong_layer_reveal",
+      "delayed_actor_entry"
+    ]
+  },
+  "overall": {
+    "samples": 131,
+    "teacher_oracle_top1_accuracy": 0.9694656488549618,
+    "proposal_teacher_utility_spearman": 0.9828442390658302,
+    "slotwise_reconstruction_mse": [
+      0.02260977029800415,
+      0.13588657975196838,
+      0.18361078202724457,
+      0.21640430390834808,
+      0.12281869351863861,
+      0.19468815624713898,
+      0.14250919222831726,
+      0.2174699604511261
+    ],
+    "slotwise_best_match_mse": [
+      0.02260977029800415,
+      0.07509409636259079,
+      0.06449330598115921,
+      0.082102470099926,
+      0.07563762366771698,
+      0.07175964117050171,
+      0.0797775611281395,
+      0.07993023842573166
+    ],
+    "diagonal_reconstruction_mse": 0.15449969470500946,
+    "best_match_reconstruction_mse": 0.06892558932304382,
+    "teacher_slot_coverage_rate": [
+      1.0,
+      0.07633587718009949,
+      0.0,
+      0.0,
+      0.1679389327764511,
+      0.0,
+      0.007633587811142206,
+      0.0
+    ],
+    "proposal_slot_teacher_confusion": [
+      [
+        131,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0
+      ],
+      [
+        122,
+        9,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0
+      ],
+      [
+        130,
+        1,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0
+      ],
+      [
+        131,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0
+      ],
+      [
+        109,
+        0,
+        0,
+        0,
+        22,
+        0,
+        0,
+        0
+      ],
+      [
+        128,
+        3,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0
+      ],
+      [
+        118,
+        4,
+        0,
+        0,
+        8,
+        0,
+        1,
+        0
+      ],
+      [
+        131,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0
+      ]
+    ],
+    "proposal_slot_best_teacher_slot": [
+      {
+        "proposal_slot": 0,
+        "best_teacher_slot": 0,
+        "best_teacher_slot_rate": 1.0,
+        "support": 131
+      },
+      {
+        "proposal_slot": 1,
+        "best_teacher_slot": 0,
+        "best_teacher_slot_rate": 0.9312977099236641,
+        "support": 131
+      },
+      {
+        "proposal_slot": 2,
+        "best_teacher_slot": 0,
+        "best_teacher_slot_rate": 0.9923664122137404,
+        "support": 131
+      },
+      {
+        "proposal_slot": 3,
+        "best_teacher_slot": 0,
+        "best_teacher_slot_rate": 1.0,
+        "support": 131
+      },
+      {
+        "proposal_slot": 4,
+        "best_teacher_slot": 0,
+        "best_teacher_slot_rate": 0.8320610687022901,
+        "support": 131
+      },
+      {
+        "proposal_slot": 5,
+        "best_teacher_slot": 0,
+        "best_teacher_slot_rate": 0.9770992366412213,
+        "support": 131
+      },
+      {
+        "proposal_slot": 6,
+        "best_teacher_slot": 0,
+        "best_teacher_slot_rate": 0.9007633587786259,
+        "support": 131
+      },
+      {
+        "proposal_slot": 7,
+        "best_teacher_slot": 0,
+        "best_teacher_slot_rate": 1.0,
+        "support": 131
+      }
+    ],
+    "proposal_candidate_pairwise_l2": 2.6658203583637268
+  },
+  "by_task": {
+    "foliage": {
+      "samples": 43,
+      "teacher_oracle_top1_accuracy": 1.0,
+      "proposal_teacher_utility_spearman": 0.9880235200593535,
+      "slotwise_reconstruction_mse": [
+        0.019004767760634422,
+        0.1270177662372589,
+        0.13285410404205322,
+        0.22256922721862793,
+        0.11697744578123093,
+        0.18430863320827484,
+        0.1230543926358223,
+        0.23497486114501953
+      ],
+      "slotwise_best_match_mse": [
+        0.019004767760634422,
+        0.07327625900506973,
+        0.05797513201832771,
+        0.07962016016244888,
+        0.07355079799890518,
+        0.0724499523639679,
+        0.07063580304384232,
+        0.08340194821357727
+      ],
+      "diagonal_reconstruction_mse": 0.14509515464305878,
+      "best_match_reconstruction_mse": 0.06623934954404831,
+      "teacher_slot_coverage_rate": [
+        1.0,
+        0.11627907305955887,
+        0.0,
+        0.0,
+        0.1860465109348297,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "proposal_slot_teacher_confusion": [
+        [
+          43,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          38,
+          5,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          42,
+          1,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          43,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          35,
+          0,
+          0,
+          0,
+          8,
+          0,
+          0,
+          0
+        ],
+        [
+          40,
+          3,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          35,
+          0,
+          0,
+          0,
+          8,
+          0,
+          0,
+          0
+        ],
+        [
+          43,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ]
+      ],
+      "proposal_slot_best_teacher_slot": [
+        {
+          "proposal_slot": 0,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 43
+        },
+        {
+          "proposal_slot": 1,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 0.8837209302325582,
+          "support": 43
+        },
+        {
+          "proposal_slot": 2,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 0.9767441860465116,
+          "support": 43
+        },
+        {
+          "proposal_slot": 3,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 43
+        },
+        {
+          "proposal_slot": 4,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 0.813953488372093,
+          "support": 43
+        },
+        {
+          "proposal_slot": 5,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 0.9302325581395349,
+          "support": 43
+        },
+        {
+          "proposal_slot": 6,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 0.813953488372093,
+          "support": 43
+        },
+        {
+          "proposal_slot": 7,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 43
+        }
+      ],
+      "proposal_candidate_pairwise_l2": 2.5722522181133893
+    },
+    "bag": {
+      "samples": 48,
+      "teacher_oracle_top1_accuracy": 1.0,
+      "proposal_teacher_utility_spearman": 1.0,
+      "slotwise_reconstruction_mse": [
+        0.008700483478605747,
+        0.13847370445728302,
+        0.2201945185661316,
+        0.2279948592185974,
+        0.1253373622894287,
+        0.1859302669763565,
+        0.12765681743621826,
+        0.22562821209430695
+      ],
+      "slotwise_best_match_mse": [
+        0.008700483478605747,
+        0.060360122472047806,
+        0.05706041678786278,
+        0.07241564244031906,
+        0.061498433351516724,
+        0.05977138876914978,
+        0.06826537102460861,
+        0.06816806644201279
+      ],
+      "diagonal_reconstruction_mse": 0.15748952329158783,
+      "best_match_reconstruction_mse": 0.05702998861670494,
+      "teacher_slot_coverage_rate": [
+        1.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0
+      ],
+      "proposal_slot_teacher_confusion": [
+        [
+          48,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          48,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          48,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          48,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          48,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          48,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          48,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          48,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ]
+      ],
+      "proposal_slot_best_teacher_slot": [
+        {
+          "proposal_slot": 0,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 48
+        },
+        {
+          "proposal_slot": 1,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 48
+        },
+        {
+          "proposal_slot": 2,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 48
+        },
+        {
+          "proposal_slot": 3,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 48
+        },
+        {
+          "proposal_slot": 4,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 48
+        },
+        {
+          "proposal_slot": 5,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 48
+        },
+        {
+          "proposal_slot": 6,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 48
+        },
+        {
+          "proposal_slot": 7,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 48
+        }
+      ],
+      "proposal_candidate_pairwise_l2": 2.8185487488905587
+    },
+    "cloth": {
+      "samples": 40,
+      "teacher_oracle_top1_accuracy": 0.9,
+      "proposal_teacher_utility_spearman": 0.9566895988767886,
+      "slotwise_reconstruction_mse": [
+        0.04317628592252731,
+        0.14231613278388977,
+        0.19427374005317688,
+        0.19586828351020813,
+        0.12607567012310028,
+        0.2163556069135666,
+        0.18124601244926453,
+        0.1888623684644699
+      ],
+      "slotwise_best_match_mse": [
+        0.04317628592252731,
+        0.09472905099391937,
+        0.08041983097791672,
+        0.0963950902223587,
+        0.09484796226024628,
+        0.08540350198745728,
+        0.10341956466436386,
+        0.09031279385089874
+      ],
+      "diagonal_reconstruction_mse": 0.16102175414562225,
+      "best_match_reconstruction_mse": 0.08608800917863846,
+      "teacher_slot_coverage_rate": [
+        1.0,
+        0.125,
+        0.0,
+        0.0,
+        0.3499999940395355,
+        0.0,
+        0.02500000037252903,
+        0.0
+      ],
+      "proposal_slot_teacher_confusion": [
+        [
+          40,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          36,
+          4,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          40,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          40,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          26,
+          0,
+          0,
+          0,
+          14,
+          0,
+          0,
+          0
+        ],
+        [
+          40,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ],
+        [
+          35,
+          4,
+          0,
+          0,
+          0,
+          0,
+          1,
+          0
+        ],
+        [
+          40,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ]
+      ],
+      "proposal_slot_best_teacher_slot": [
+        {
+          "proposal_slot": 0,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 40
+        },
+        {
+          "proposal_slot": 1,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 0.9,
+          "support": 40
+        },
+        {
+          "proposal_slot": 2,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 40
+        },
+        {
+          "proposal_slot": 3,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 40
+        },
+        {
+          "proposal_slot": 4,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 0.65,
+          "support": 40
+        },
+        {
+          "proposal_slot": 5,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 40
+        },
+        {
+          "proposal_slot": 6,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 0.875,
+          "support": 40
+        },
+        {
+          "proposal_slot": 7,
+          "best_teacher_slot": 0,
+          "best_teacher_slot_rate": 1.0,
+          "support": 40
+        }
+      ],
+      "proposal_candidate_pairwise_l2": 2.583132040500641
+    }
+  }
+}

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proposal_alignment_diagnostics_v7/proposal_alignment_diagnostics.md ADDED Viewed

	@@ -0,0 +1,65 @@

+# Proposal Alignment Diagnostics
+- checkpoint: `/workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt`
+- dataset: `/workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase.pt`
+- samples: 131
+## Overall
+- teacher-oracle top1 accuracy: 0.969
+- proposal/teacher utility spearman: 0.983
+- diagonal reconstruction MSE: 0.1545
+- best-match reconstruction MSE: 0.0689
+- proposal candidate pairwise L2: 2.6658
+## By Task
+### foliage
+- samples: 43
+- teacher-oracle top1 accuracy: 1.000
+- proposal/teacher utility spearman: 0.988
+- diagonal reconstruction MSE: 0.1451
+- best-match reconstruction MSE: 0.0662
+- proposal candidate pairwise L2: 2.5723
+- dominant slot alignment:
+  proposal slot 0 -> teacher slot 0 (teacher), rate 1.000
+  proposal slot 1 -> teacher slot 0 (teacher), rate 0.884
+  proposal slot 2 -> teacher slot 0 (teacher), rate 0.977
+  proposal slot 3 -> teacher slot 0 (teacher), rate 1.000
+  proposal slot 4 -> teacher slot 0 (teacher), rate 0.814
+  proposal slot 5 -> teacher slot 0 (teacher), rate 0.930
+  proposal slot 6 -> teacher slot 0 (teacher), rate 0.814
+  proposal slot 7 -> teacher slot 0 (teacher), rate 1.000
+### bag
+- samples: 48
+- teacher-oracle top1 accuracy: 1.000
+- proposal/teacher utility spearman: 1.000
+- diagonal reconstruction MSE: 0.1575
+- best-match reconstruction MSE: 0.0570
+- proposal candidate pairwise L2: 2.8185
+- dominant slot alignment:
+  proposal slot 0 -> teacher slot 0 (teacher), rate 1.000
+  proposal slot 1 -> teacher slot 0 (teacher), rate 1.000
+  proposal slot 2 -> teacher slot 0 (teacher), rate 1.000
+  proposal slot 3 -> teacher slot 0 (teacher), rate 1.000
+  proposal slot 4 -> teacher slot 0 (teacher), rate 1.000
+  proposal slot 5 -> teacher slot 0 (teacher), rate 1.000
+  proposal slot 6 -> teacher slot 0 (teacher), rate 1.000
+  proposal slot 7 -> teacher slot 0 (teacher), rate 1.000
+### cloth
+- samples: 40
+- teacher-oracle top1 accuracy: 0.900
+- proposal/teacher utility spearman: 0.957
+- diagonal reconstruction MSE: 0.1610
+- best-match reconstruction MSE: 0.0861
+- proposal candidate pairwise L2: 2.5831
+- dominant slot alignment:
+  proposal slot 0 -> teacher slot 0 (teacher), rate 1.000
+  proposal slot 1 -> teacher slot 0 (teacher), rate 0.900
+  proposal slot 2 -> teacher slot 0 (teacher), rate 1.000
+  proposal slot 3 -> teacher slot 0 (teacher), rate 1.000
+  proposal slot 4 -> teacher slot 0 (teacher), rate 0.650
+  proposal slot 5 -> teacher slot 0 (teacher), rate 1.000
+  proposal slot 6 -> teacher slot 0 (teacher), rate 0.875
+  proposal slot 7 -> teacher slot 0 (teacher), rate 1.000

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proxy_diagnostics_v7/proxy_diagnostics.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "planner_top1_accuracy": 1.0,
+  "planner_regret": 0.0,
+  "planner_score_utility_spearman": 0.5312977433204651,
+  "risk_calibration_mse": 0.009349034167826176,
+  "role_collapse_rate": 0.0,
+  "proposal_diversity": 0.13560470938682556,
+  "left_right_equivariance_error": 0.0006655894565697321,
+  "belief_calibration_brier": 0.0033960030414164066,
+  "reocclusion_calibration_brier": 0.23249056935310364,
+  "support_stability_mae": 0.031876545399427414,
+  "clearance_auc": 0.9635915460321444,
+  "memory_write_rate": 0.013606361113488674,
+  "memory_saturation": 0.24346540868282318,
+  "num_samples": 131
+}

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/real_baseline_compare_v7_full/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,38 @@

+# Reveal Proxy Benchmark
+## baseline_rgbd_stage3
+- controller: model
+- checkpoint: /workspace/VLAarchtests_hf_outputs/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/checkpoint_best.pt
+- episodes: 300.000
+- mean_success: 0.310
+- visibility_integral: 16.799
+- corridor_availability: 0.440
+- reocclusion_rate: 0.034
+- disturbance_cost: 0.171
+- premature_retrieve_rate: 0.423
+- reocclusion_after_reveal_rate: 0.533
+- planner_regret: 0.025
+- foliage_success: 0.210
+- bag_success: 0.150
+- cloth_success: 0.570
+## iter5_selector
+- controller: model
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter5_seed17/checkpoint_best.pt
+- episodes: 300.000
+- mean_success: 0.450
+- visibility_integral: 40.492
+- corridor_availability: 0.880
+- reocclusion_rate: 0.006
+- disturbance_cost: 0.454
+- premature_retrieve_rate: 0.107
+- reocclusion_after_reveal_rate: 0.260
+- planner_regret: 0.116
+- foliage_success: 0.440
+- bag_success: 0.400
+- cloth_success: 0.510
+- paired_paired_episodes_vs_baseline_rgbd_stage3: 300.000
+- paired_success_delta_vs_baseline_rgbd_stage3: 0.140
+- paired_visibility_delta_vs_baseline_rgbd_stage3: 23.692
+- paired_reocclusion_delta_vs_baseline_rgbd_stage3: -0.028
+- paired_disturbance_delta_vs_baseline_rgbd_stage3: 0.283

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/summary.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "anybimanual_local_overlap_floor": {
+    "path": "/workspace/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json",
+    "step": 1000,
+    "mean_success": 0.16,
+    "per_task_success": {
+      "coordinated_push_box": 0.0,
+      "coordinated_lift_ball": 0.0,
+      "dual_push_buttons": 0.48
+    }
+  },
+  "anybimanual_public_best_overlap": {
+    "path": "/workspace/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json",
+    "step": 60000,
+    "mean_success": 0.6933333333333334,
+    "per_task_success": {
+      "coordinated_push_box": 0.8,
+      "coordinated_lift_ball": 0.32,
+      "dual_push_buttons": 0.96
+    }
+  },
+  "stage1_frozen": {
+    "path": "/workspace/reports/rvt_overlap_branch_fixedbounds_20260330/evals/rlbench_subset3_backbone_only_rvt_100demo_frozen_fixedbounds_seed17_noplan_split/rollout_eval.json",
+    "checkpoint": "/workspace/outputs/rlbench_rvt_branch/rlbench_subset3_backbone_only_rvt_100demo_frozen_fixedbounds_seed17/checkpoint_best.pt",
+    "mean_success": 0.0,
+    "per_task_success": {
+      "bimanual_push_box": 0.0,
+      "bimanual_lift_ball": 0.0,
+      "bimanual_dual_push_buttons": 0.0
+    },
+    "per_task_return": {
+      "bimanual_push_box": 0.0,
+      "bimanual_lift_ball": 0.0,
+      "bimanual_dual_push_buttons": 0.0
+    }
+  },
+  "stage2_unfreeze_top2": null,
+  "gates": {
+    "stage1_clears_local_floor": false,
+    "stage2_clears_local_floor": false
+  }
+}

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/summary.md ADDED Viewed

	@@ -0,0 +1,16 @@

+# RVT Overlap Branch Summary
+## External Anchors
+- anybimanual_local_overlap_floor: step=`1000`, mean_success=`0.160`
+- anybimanual_public_best_overlap: step=`60000`, mean_success=`0.693`
+## RVT Runs
+- stage1_frozen: mean_success=`0.000`, path=`/workspace/reports/rvt_overlap_branch_fixedbounds_20260330/evals/rlbench_subset3_backbone_only_rvt_100demo_frozen_fixedbounds_seed17_noplan_split/rollout_eval.json`
+- stage2_unfreeze_top2: `not_run`
+## Gates
+- stage1_clears_local_floor: `False`
+- stage2_clears_local_floor: `False`

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_full/reveal_benchmark.json ADDED Viewed

The diff for this file is too large to render. See raw diff

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_full/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Reveal Proxy Benchmark
+## scripted
+- controller: scripted
+- checkpoint: none
+- episodes: 300.000
+- mean_success: 1.000
+- visibility_integral: 1.691
+- corridor_availability: 0.665
+- reocclusion_rate: 0.000
+- disturbance_cost: 0.161
+- premature_retrieve_rate: 0.000
+- reocclusion_after_reveal_rate: 0.000
+- planner_regret: 0.000
+- foliage_success: 1.000
+- bag_success: 1.000
+- cloth_success: 1.000

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_smoke/reveal_benchmark.json ADDED Viewed

	@@ -0,0 +1,566 @@

+{
+  "benchmark_config": {
+    "benchmark_mode": "smoke",
+    "controller": "scripted",
+    "ablation": null,
+    "proxies": [
+      "foliage_proxy",
+      "bag_proxy",
+      "cloth_proxy"
+    ],
+    "chunk_commit_steps": 0,
+    "benchmark_spec_path": "/workspace/VLAarchtests/artifacts/generated_configs/reveal_proxy_sprint_benchmark_v7.json",
+    "episodes": 6,
+    "base_config": "/workspace/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_base.yaml"
+  },
+  "scripted": {
+    "per_task_success": {
+      "foliage": 1.0,
+      "bag": 1.0,
+      "cloth": 1.0
+    },
+    "mean_success": 1.0,
+    "per_stress_success": {
+      "nominal": 1.0
+    },
+    "per_difficulty_success": {
+      "medium": 1.0
+    },
+    "task_stress_success": {
+      "foliage": {
+        "nominal": 1.0
+      },
+      "bag": {
+        "nominal": 1.0
+      },
+      "cloth": {
+        "nominal": 1.0
+      }
+    },
+    "visibility_integral": 1.7259430487950642,
+    "corridor_availability": 0.7083333432674408,
+    "reocclusion_rate": 0.0,
+    "disturbance_cost": 0.1390303730945823,
+    "premature_retrieve_rate": 0.0,
+    "reocclusion_after_reveal_rate": 0.0,
+    "planner_regret": 0.0,
+    "task_specific_metrics": {
+      "gap_width": 0.04750262860390331,
+      "damage_proxy": 0.1390303730945823,
+      "actor_feasibility_floor": 0.0,
+      "mouth_aperture": 0.2505193983474954,
+      "hold_persistence": 0.9572114593639115,
+      "rim_slip_rate": 0.3306301846990871,
+      "insertable_corridor": 1.0,
+      "fold_preservation": 0.2847869507479139,
+      "layer_separation_quality": 0.1776304892024866,
+      "top_layer_stability": 0.9572114593639115,
+      "lift_too_high_rate": 0.0
+    },
+    "checkpoint": null,
+    "shape_mismatch_keys": [],
+    "episode_records": [
+      {
+        "proxy_name": "foliage_proxy",
+        "task_name": "foliage",
+        "task_id": 0,
+        "stress_slice": "nominal",
+        "difficulty_bin": "medium",
+        "seed": 0,
+        "episode_index": 0,
+        "controller": "scripted",
+        "resolved_task_name": "foliage",
+        "success": 1.0,
+        "visibility_integral": 1.4501793384552002,
+        "corridor_availability": 0.6666666865348816,
+        "reocclusion_rate": 0.0,
+        "disturbance_cost": 0.09915829507599976,
+        "premature_retrieve_rate": 0.0,
+        "reocclusion_after_reveal": 0.0,
+        "planner_regret": 0.0,
+        "gap_width": 0.1280654333918135,
+        "damage_proxy": 0.09915829507599976,
+        "actor_feasibility_floor": 0.0,
+        "mouth_aperture": 0.0,
+        "hold_persistence": 0.9918578793641878,
+        "rim_slip_rate": 0.36158549250089017,
+        "insertable_corridor": 1.0,
+        "fold_preservation": 0.0,
+        "layer_separation_quality": 0.0,
+        "top_layer_stability": 0.9918578793641878,
+        "lift_too_high_rate": 0.0,
+        "step_trace": [
+          {
+            "step_index": 1,
+            "visibility": 0.48582570303966377,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.033686798271736935,
+            "disturbance_cost": 0.09624799506210555,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "foliage",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          },
+          {
+            "step_index": 2,
+            "visibility": 0.7367230437799538,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.05106967923093351,
+            "disturbance_cost": 0.14694608592714536,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "foliage",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          }
+        ],
+        "chosen_candidate_families": [
+          "scripted_teacher"
+        ],
+        "episode_metadata": {
+          "proxy_name": "foliage_proxy",
+          "task_name": "foliage",
+          "task_id": 0,
+          "stress_slice": "nominal",
+          "difficulty_bin": "medium",
+          "camera_pose_jitter": 0.0,
+          "focal_jitter": 0.0,
+          "lateral_skew": 0.0,
+          "reocclusion_bias": 0.0,
+          "closure_scale": 1.0,
+          "disturbance_gain_scale": 1.0,
+          "corridor_scale": 1.0,
+          "support_stability_penalty": 0.0,
+          "collateral_bias": 0.0
+        }
+      },
+      {
+        "proxy_name": "foliage_proxy",
+        "task_name": "foliage",
+        "task_id": 0,
+        "stress_slice": "nominal",
+        "difficulty_bin": "medium",
+        "seed": 1,
+        "episode_index": 1,
+        "controller": "scripted",
+        "resolved_task_name": "foliage",
+        "success": 1.0,
+        "visibility_integral": 1.9396257400512695,
+        "corridor_availability": 0.75,
+        "reocclusion_rate": 0.0,
+        "disturbance_cost": 0.19006885791579062,
+        "premature_retrieve_rate": 0.0,
+        "reocclusion_after_reveal": 0.0,
+        "planner_regret": 0.0,
+        "gap_width": 0.1569503382316063,
+        "damage_proxy": 0.19006885791579062,
+        "actor_feasibility_floor": 0.0,
+        "mouth_aperture": 0.0,
+        "hold_persistence": 0.9690937340076613,
+        "rim_slip_rate": 0.30506012136432714,
+        "insertable_corridor": 1.0,
+        "fold_preservation": 0.0,
+        "layer_separation_quality": 0.0,
+        "top_layer_stability": 0.9690937340076613,
+        "lift_too_high_rate": 0.0,
+        "step_trace": [
+          {
+            "step_index": 1,
+            "visibility": 0.3652986439816591,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.05512812322244198,
+            "disturbance_cost": 0.15750892349269138,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "foliage",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          },
+          {
+            "step_index": 2,
+            "visibility": 0.6124352318551509,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.07228509740440894,
+            "disturbance_cost": 0.2065288497268827,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "foliage",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          },
+          {
+            "step_index": 3,
+            "visibility": 0.8506532259033559,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.09282726483045785,
+            "disturbance_cost": 0.2806959257742542,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "foliage",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          }
+        ],
+        "chosen_candidate_families": [
+          "scripted_teacher"
+        ],
+        "episode_metadata": {
+          "proxy_name": "foliage_proxy",
+          "task_name": "foliage",
+          "task_id": 0,
+          "stress_slice": "nominal",
+          "difficulty_bin": "medium",
+          "camera_pose_jitter": 0.0,
+          "focal_jitter": 0.0,
+          "lateral_skew": 0.0,
+          "reocclusion_bias": 0.0,
+          "closure_scale": 1.0,
+          "disturbance_gain_scale": 1.0,
+          "corridor_scale": 1.0,
+          "support_stability_penalty": 0.0,
+          "collateral_bias": 0.0
+        }
+      },
+      {
+        "proxy_name": "bag_proxy",
+        "task_name": "bag",
+        "task_id": 1,
+        "stress_slice": "nominal",
+        "difficulty_bin": "medium",
+        "seed": 10000,
+        "episode_index": 0,
+        "controller": "scripted",
+        "resolved_task_name": "bag",
+        "success": 1.0,
+        "visibility_integral": 2.222443103790283,
+        "corridor_availability": 0.75,
+        "reocclusion_rate": 0.0,
+        "disturbance_cost": 0.13910771782752218,
+        "premature_retrieve_rate": 0.0,
+        "reocclusion_after_reveal": 0.0,
+        "planner_regret": 0.0,
+        "gap_width": 0.0,
+        "damage_proxy": 0.13910771782752218,
+        "actor_feasibility_floor": 0.0,
+        "mouth_aperture": 0.741060188586711,
+        "hold_persistence": 0.9364245639602276,
+        "rim_slip_rate": 0.2922986490934589,
+        "insertable_corridor": 1.0,
+        "fold_preservation": 0.0,
+        "layer_separation_quality": 0.0,
+        "top_layer_stability": 0.9364245639602276,
+        "lift_too_high_rate": 0.0,
+        "step_trace": [
+          {
+            "step_index": 1,
+            "visibility": 0.44712191590268047,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.03871004013416583,
+            "disturbance_cost": 0.11060011466904525,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "bag",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          },
+          {
+            "step_index": 2,
+            "visibility": 0.6701348483026268,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.05604201435785605,
+            "disturbance_cost": 0.16012004102244587,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "bag",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          },
+          {
+            "step_index": 3,
+            "visibility": 0.888034440204704,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.0744425418818137,
+            "disturbance_cost": 0.2175777918921189,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "bag",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          }
+        ],
+        "chosen_candidate_families": [
+          "scripted_teacher"
+        ],
+        "episode_metadata": {
+          "proxy_name": "bag_proxy",
+          "task_name": "bag",
+          "task_id": 1,
+          "stress_slice": "nominal",
+          "difficulty_bin": "medium",
+          "camera_pose_jitter": 0.0,
+          "focal_jitter": 0.0,
+          "lateral_skew": 0.0,
+          "reocclusion_bias": 0.0,
+          "closure_scale": 1.0,
+          "disturbance_gain_scale": 1.0,
+          "corridor_scale": 1.0,
+          "support_stability_penalty": 0.0,
+          "collateral_bias": 0.0
+        }
+      },
+      {
+        "proxy_name": "bag_proxy",
+        "task_name": "bag",
+        "task_id": 1,
+        "stress_slice": "nominal",
+        "difficulty_bin": "medium",
+        "seed": 10001,
+        "episode_index": 1,
+        "controller": "scripted",
+        "resolved_task_name": "bag",
+        "success": 1.0,
+        "visibility_integral": 2.2613422870635986,
+        "corridor_availability": 0.75,
+        "reocclusion_rate": 0.0,
+        "disturbance_cost": 0.15830948550161264,
+        "premature_retrieve_rate": 0.0,
+        "reocclusion_after_reveal": 0.0,
+        "planner_regret": 0.0,
+        "gap_width": 0.0,
+        "damage_proxy": 0.15830948550161264,
+        "actor_feasibility_floor": 0.0,
+        "mouth_aperture": 0.7620562014982616,
+        "hold_persistence": 0.9274478323003021,
+        "rim_slip_rate": 0.297251848429244,
+        "insertable_corridor": 1.0,
+        "fold_preservation": 0.0,
+        "layer_separation_quality": 0.0,
+        "top_layer_stability": 0.9274478323003021,
+        "lift_too_high_rate": 0.0,
+        "step_trace": [
+          {
+            "step_index": 1,
+            "visibility": 0.45762398304566504,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.0451918291732286,
+            "disturbance_cost": 0.1291195119235103,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "bag",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          },
+          {
+            "step_index": 2,
+            "visibility": 0.6793868561309351,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.06252380339691882,
+            "disturbance_cost": 0.17863943827691092,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "bag",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          },
+          {
+            "step_index": 3,
+            "visibility": 0.8954275043439008,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.08129176114682861,
+            "disturbance_cost": 0.23882667082508555,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "bag",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          }
+        ],
+        "chosen_candidate_families": [
+          "scripted_teacher"
+        ],
+        "episode_metadata": {
+          "proxy_name": "bag_proxy",
+          "task_name": "bag",
+          "task_id": 1,
+          "stress_slice": "nominal",
+          "difficulty_bin": "medium",
+          "camera_pose_jitter": 0.0,
+          "focal_jitter": 0.0,
+          "lateral_skew": 0.0,
+          "reocclusion_bias": 0.0,
+          "closure_scale": 1.0,
+          "disturbance_gain_scale": 1.0,
+          "corridor_scale": 1.0,
+          "support_stability_penalty": 0.0,
+          "collateral_bias": 0.0
+        }
+      },
+      {
+        "proxy_name": "cloth_proxy",
+        "task_name": "cloth",
+        "task_id": 2,
+        "stress_slice": "nominal",
+        "difficulty_bin": "medium",
+        "seed": 20000,
+        "episode_index": 0,
+        "controller": "scripted",
+        "resolved_task_name": "cloth",
+        "success": 1.0,
+        "visibility_integral": 1.315584659576416,
+        "corridor_availability": 0.6666666865348816,
+        "reocclusion_rate": 0.0,
+        "disturbance_cost": 0.12976740198650083,
+        "premature_retrieve_rate": 0.0,
+        "reocclusion_after_reveal": 0.0,
+        "planner_regret": 0.0,
+        "gap_width": 0.0,
+        "damage_proxy": 0.12976740198650083,
+        "actor_feasibility_floor": 0.0,
+        "mouth_aperture": 0.0,
+        "hold_persistence": 0.956672084038026,
+        "rim_slip_rate": 0.36507675439044956,
+        "insertable_corridor": 1.0,
+        "fold_preservation": 0.8460247236628703,
+        "layer_separation_quality": 0.5584220826257553,
+        "top_layer_stability": 0.956672084038026,
+        "lift_too_high_rate": 0.0,
+        "step_trace": [
+          {
+            "step_index": 1,
+            "visibility": 0.44091287317149475,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.043451471119726,
+            "disturbance_cost": 0.12414706034207429,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "cloth",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          },
+          {
+            "step_index": 2,
+            "visibility": 0.6914333836220553,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.051778792051622516,
+            "disturbance_cost": 0.15397527633712968,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "cloth",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          }
+        ],
+        "chosen_candidate_families": [
+          "scripted_teacher"
+        ],
+        "episode_metadata": {
+          "proxy_name": "cloth_proxy",
+          "task_name": "cloth",
+          "task_id": 2,
+          "stress_slice": "nominal",
+          "difficulty_bin": "medium",
+          "camera_pose_jitter": 0.0,
+          "focal_jitter": 0.0,
+          "lateral_skew": 0.0,
+          "reocclusion_bias": 0.0,
+          "closure_scale": 1.0,
+          "disturbance_gain_scale": 1.0,
+          "corridor_scale": 1.0,
+          "support_stability_penalty": 0.0,
+          "collateral_bias": 0.0
+        }
+      },
+      {
+        "proxy_name": "cloth_proxy",
+        "task_name": "cloth",
+        "task_id": 2,
+        "stress_slice": "nominal",
+        "difficulty_bin": "medium",
+        "seed": 20001,
+        "episode_index": 1,
+        "controller": "scripted",
+        "resolved_task_name": "cloth",
+        "success": 1.0,
+        "visibility_integral": 1.1664831638336182,
+        "corridor_availability": 0.6666666865348816,
+        "reocclusion_rate": 0.0,
+        "disturbance_cost": 0.11777048026006785,
+        "premature_retrieve_rate": 0.0,
+        "reocclusion_after_reveal": 0.0,
+        "planner_regret": 0.0,
+        "gap_width": 0.0,
+        "damage_proxy": 0.11777048026006785,
+        "actor_feasibility_floor": 0.0,
+        "mouth_aperture": 0.0,
+        "hold_persistence": 0.9617726625130647,
+        "rim_slip_rate": 0.3625082424161528,
+        "insertable_corridor": 1.0,
+        "fold_preservation": 0.8626969808246128,
+        "layer_separation_quality": 0.5073608525891643,
+        "top_layer_stability": 0.9617726625130647,
+        "lift_too_high_rate": 0.0,
+        "step_trace": [
+          {
+            "step_index": 1,
+            "visibility": 0.39069084905965384,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.04007073221665362,
+            "disturbance_cost": 0.11448780633329608,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "cloth",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          },
+          {
+            "step_index": 2,
+            "visibility": 0.6434277983206992,
+            "support_mode": 0,
+            "corridor_available": 1.0,
+            "reocclusion_risk": 0.047453995031804946,
+            "disturbance_cost": 0.13730301917538718,
+            "chosen_candidate_family": "scripted_teacher",
+            "chosen_candidate_index": 0,
+            "task_name": "cloth",
+            "stress_slice": "nominal",
+            "difficulty_bin": "medium"
+          }
+        ],
+        "chosen_candidate_families": [
+          "scripted_teacher"
+        ],
+        "episode_metadata": {
+          "proxy_name": "cloth_proxy",
+          "task_name": "cloth",
+          "task_id": 2,
+          "stress_slice": "nominal",
+          "difficulty_bin": "medium",
+          "camera_pose_jitter": 0.0,
+          "focal_jitter": 0.0,
+          "lateral_skew": 0.0,
+          "reocclusion_bias": 0.0,
+          "closure_scale": 1.0,
+          "disturbance_gain_scale": 1.0,
+          "corridor_scale": 1.0,
+          "support_stability_penalty": 0.0,
+          "collateral_bias": 0.0
+        }
+      }
+    ]
+  }
+}

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_smoke/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Reveal Proxy Benchmark
+## scripted
+- controller: scripted
+- checkpoint: none
+- episodes: 6.000
+- mean_success: 1.000
+- visibility_integral: 1.726
+- corridor_availability: 0.708
+- reocclusion_rate: 0.000
+- disturbance_cost: 0.139
+- premature_retrieve_rate: 0.000
+- reocclusion_after_reveal_rate: 0.000
+- planner_regret: 0.000
+- foliage_success: 1.000
+- bag_success: 1.000
+- cloth_success: 1.000

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_factorization_v7/fullset_planner_score_off/reveal_benchmark.json ADDED Viewed

The diff for this file is too large to render. See raw diff

code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_factorization_v7/fullset_planner_score_off/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Reveal Proxy Benchmark
+## full_model
+- controller: model
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt
+- episodes: 300.000
+- mean_success: 0.397
+- visibility_integral: 42.366
+- corridor_availability: 0.903
+- reocclusion_rate: 0.000
+- disturbance_cost: 0.548
+- premature_retrieve_rate: 0.104
+- reocclusion_after_reveal_rate: 0.000
+- planner_regret: 0.110
+- foliage_success: 0.390
+- bag_success: 0.390
+- cloth_success: 0.410