lsnu commited on
Commit
bfb9665
·
verified ·
1 Parent(s): 5fbeceb

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep25/command.txt +1 -0
  2. artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep25/stdout.txt +121 -0
  3. artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep25/summary.json +35 -0
  4. artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep5/stderr.txt +35 -0
  5. artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep5/summary.json +41 -0
  6. artifacts/reports/proxy_base_fast_eval/active/reveal_benchmark.json +0 -0
  7. artifacts/reports/proxy_base_fast_eval/active/reveal_benchmark.md +17 -0
  8. artifacts/reports/proxy_base_fast_eval/noop/reveal_benchmark.json +0 -0
  9. artifacts/reports/proxy_base_fast_eval/noop/reveal_benchmark.md +17 -0
  10. artifacts/reports/proxy_rank_fast_eval/active/reveal_benchmark.json +0 -0
  11. artifacts/reports/proxy_rank_fast_eval/active/reveal_benchmark.md +17 -0
  12. artifacts/reports/proxy_rank_fast_eval/candidate0/reveal_benchmark.json +0 -0
  13. artifacts/reports/proxy_rank_fast_eval/candidate0/reveal_benchmark.md +17 -0
  14. artifacts/reports/proxy_rank_fast_eval/noop/reveal_benchmark.json +0 -0
  15. artifacts/reports/proxy_rank_fast_eval/noop/reveal_benchmark.md +17 -0
  16. artifacts/reports/proxy_rank_fast_eval/oracle/reveal_benchmark.json +0 -0
  17. artifacts/reports/proxy_rank_fast_eval/oracle/reveal_benchmark.md +17 -0
  18. artifacts/reports/proxy_rank_only_live/active/reveal_benchmark.md +17 -0
  19. artifacts/reports/proxy_rank_only_rebuild128_smoke/active/reveal_benchmark.json +0 -0
  20. artifacts/reports/proxy_rank_only_rebuild128_smoke/active/reveal_benchmark.md +17 -0
  21. artifacts/reports/proxy_rank_only_rebuild128_smoke/noop/reveal_benchmark.json +0 -0
  22. artifacts/reports/proxy_rank_only_rebuild128_smoke/noop/reveal_benchmark.md +17 -0
  23. artifacts/reports/proxy_semantic_nowm_quick12_final/reveal_benchmark.json +0 -0
  24. artifacts/reports/proxy_semantic_nowm_quick12_final/reveal_benchmark.md +17 -0
  25. code/VLAarchtests2_code/CHANGE_AND_TEST_LOG.md +221 -0
  26. code/VLAarchtests2_code/MODEL_AND_ARTIFACT_INDEX.md +59 -0
  27. code/VLAarchtests2_code/README.md +301 -0
  28. code/VLAarchtests2_code/RESULTS_RAW.md +178 -0
  29. code/VLAarchtests2_code/VLAarchtests/MODEL_INDEX.md +81 -0
  30. code/VLAarchtests2_code/VLAarchtests/README.md +172 -0
  31. code/VLAarchtests2_code/VLAarchtests/artifacts/generated_configs/reveal_proxy_sprint_benchmark_v7.json +2702 -0
  32. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/ablations_v7/ablations.md +92 -0
  33. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json +67 -0
  34. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.md +27 -0
  35. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/status.md +189 -0
  36. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/controller_sweep_v7/controller_sweep.md +96 -0
  37. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.json +49 -0
  38. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.md +54 -0
  39. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proposal_alignment_diagnostics_v7/proposal_alignment_diagnostics.json +718 -0
  40. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proposal_alignment_diagnostics_v7/proposal_alignment_diagnostics.md +65 -0
  41. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proxy_diagnostics_v7/proxy_diagnostics.json +16 -0
  42. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/real_baseline_compare_v7_full/reveal_benchmark.md +38 -0
  43. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/summary.json +42 -0
  44. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/summary.md +16 -0
  45. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_full/reveal_benchmark.json +0 -0
  46. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_full/reveal_benchmark.md +17 -0
  47. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_smoke/reveal_benchmark.json +566 -0
  48. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_smoke/reveal_benchmark.md +17 -0
  49. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_factorization_v7/fullset_planner_score_off/reveal_benchmark.json +0 -0
  50. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_factorization_v7/fullset_planner_score_off/reveal_benchmark.md +17 -0
artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep25/command.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ /workspace/envs/rlbench/bin/python /workspace/third_party/AnyBimanual/eval.py method=PERACT_BC framework.logdir=/workspace/baselines/AnyBimanual_release_eval_live framework.start_seed=0 framework.eval_type=60000 framework.eval_episodes=25 framework.eval_envs=1 framework.gpu=0 rlbench.task_name=perlf_release_dual_push_buttons_smoke1 rlbench.tasks=[dual_push_buttons] rlbench.demo_path=/workspace/baselines/AnyBimanual_subset3_demo_root rlbench.headless=True rlbench.gripper_mode=BimanualDiscrete rlbench.arm_action_mode=BimanualEndEffectorPoseViaPlanning rlbench.action_mode=BimanualMoveArmThenGripper
artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep25/stdout.txt ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2026-03-31 23:32:28,801][root][INFO] -
2
+ method:
3
+ name: PERACT_BC
4
+ agent_type: leader_follower
5
+ robot_name: bimanual
6
+ image_crop_size: 64
7
+ bounds_offset:
8
+ - 0.15
9
+ voxel_sizes:
10
+ - 100
11
+ include_prev_layer: false
12
+ num_latents: 2048
13
+ latent_dim: 512
14
+ transformer_depth: 6
15
+ transformer_iterations: 1
16
+ cross_heads: 1
17
+ cross_dim_head: 64
18
+ latent_heads: 8
19
+ latent_dim_head: 64
20
+ pos_encoding_with_lang: true
21
+ conv_downsample: true
22
+ lang_fusion_type: seq
23
+ voxel_patch_size: 5
24
+ voxel_patch_stride: 5
25
+ final_dim: 64
26
+ low_dim_size: 4
27
+ input_dropout: 0.1
28
+ attn_dropout: 0.1
29
+ decoder_dropout: 0.0
30
+ lr: 0.0005
31
+ lr_scheduler: false
32
+ num_warmup_steps: 3000
33
+ optimizer: lamb
34
+ lambda_weight_l2: 1.0e-06
35
+ trans_loss_weight: 1.0
36
+ rot_loss_weight: 1.0
37
+ grip_loss_weight: 1.0
38
+ collision_loss_weight: 1.0
39
+ rotation_resolution: 5
40
+ activation: lrelu
41
+ norm: None
42
+ crop_augmentation: true
43
+ transform_augmentation:
44
+ apply_se3: true
45
+ aug_xyz:
46
+ - 0.125
47
+ - 0.125
48
+ - 0.125
49
+ aug_rpy:
50
+ - 0.0
51
+ - 0.0
52
+ - 45.0
53
+ aug_rot_resolution: ${method.rotation_resolution}
54
+ demo_augmentation: true
55
+ demo_augmentation_every_n: 10
56
+ no_skip_connection: false
57
+ no_perceiver: false
58
+ no_language: false
59
+ keypoint_method: heuristic
60
+ rlbench:
61
+ task_name: perlf_release_dual_push_buttons_smoke1
62
+ tasks:
63
+ - dual_push_buttons
64
+ demo_path: /workspace/baselines/AnyBimanual_subset3_demo_root
65
+ episode_length: 25
66
+ cameras:
67
+ - over_shoulder_left
68
+ - over_shoulder_right
69
+ - overhead
70
+ - wrist_right
71
+ - wrist_left
72
+ - front
73
+ camera_resolution:
74
+ - 256
75
+ - 256
76
+ scene_bounds:
77
+ - -0.3
78
+ - -0.5
79
+ - 0.6
80
+ - 0.7
81
+ - 0.5
82
+ - 1.6
83
+ include_lang_goal_in_obs: true
84
+ time_in_state: true
85
+ headless: true
86
+ gripper_mode: BimanualDiscrete
87
+ arm_action_mode: BimanualEndEffectorPoseViaPlanning
88
+ action_mode: BimanualMoveArmThenGripper
89
+ framework:
90
+ tensorboard_logging: true
91
+ csv_logging: true
92
+ gpu: 0
93
+ logdir: /workspace/baselines/AnyBimanual_release_eval_live
94
+ start_seed: 0
95
+ record_every_n: 5
96
+ eval_envs: 1
97
+ eval_from_eps_number: 0
98
+ eval_episodes: 25
99
+ eval_type: 60000
100
+ eval_save_metrics: true
101
+ cinematic_recorder:
102
+ enabled: false
103
+ camera_resolution:
104
+ - 1280
105
+ - 720
106
+ fps: 30
107
+ rotate_speed: 0.005
108
+ save_path: /tmp/videos/
109
+
110
+ [2026-03-31 23:32:28,811][root][INFO] - Using env device cuda:0.
111
+ [2026-03-31 23:32:28,812][root][INFO] - Evaluating seed 0.
112
+ [2026-03-31 23:32:28,812][root][INFO] - Using method PERACT_BC with type leader_follower
113
+ Weight: [60000]
114
+ [03/31/26 23:33:05] INFO INFO:root:eval_env: _independent_env_runner.py:131
115
+ Launching env.
116
+ INFO INFO:root:Agent _independent_env_runner.py:134
117
+ information:
118
+ INFO INFO:root:<yarr.agen _independent_env_runner.py:135
119
+ ts.agent.LeaderFollo
120
+ werAgent object at
121
+ 0x77975ba9b970>
artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep25/summary.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_mode": "trunk_only",
3
+ "task_name": "perlf_release_dual_push_buttons_smoke1",
4
+ "tasks": [
5
+ "dual_push_buttons"
6
+ ],
7
+ "task_families": [
8
+ "generic"
9
+ ],
10
+ "passthrough_only": true,
11
+ "passthrough_reason": "generic_task_family",
12
+ "episodes_requested": 25,
13
+ "episode_scores": [],
14
+ "mean_score": 0.0,
15
+ "final_score": null,
16
+ "subprocess_returncode": 0,
17
+ "command": [
18
+ "/workspace/envs/rlbench/bin/python",
19
+ "/workspace/third_party/AnyBimanual/eval.py",
20
+ "method=PERACT_BC",
21
+ "framework.logdir=/workspace/baselines/AnyBimanual_release_eval_live",
22
+ "framework.start_seed=0",
23
+ "framework.eval_type=60000",
24
+ "framework.eval_episodes=25",
25
+ "framework.eval_envs=1",
26
+ "framework.gpu=0",
27
+ "rlbench.task_name=perlf_release_dual_push_buttons_smoke1",
28
+ "rlbench.tasks=[dual_push_buttons]",
29
+ "rlbench.demo_path=/workspace/baselines/AnyBimanual_subset3_demo_root",
30
+ "rlbench.headless=True",
31
+ "rlbench.gripper_mode=BimanualDiscrete",
32
+ "rlbench.arm_action_mode=BimanualEndEffectorPoseViaPlanning",
33
+ "rlbench.action_mode=BimanualMoveArmThenGripper"
34
+ ]
35
+ }
artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep5/stderr.txt ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /workspace/third_party/AnyBimanual/eval.py:183: UserWarning:
2
+ The version_base parameter is not specified.
3
+ Please specify a compatability version level, or None.
4
+ Will assume defaults for version 1.1
5
+ @hydra.main(config_name="eval", config_path="conf")
6
+ /workspace/envs/rlbench/lib/python3.10/site-packages/hydra/_internal/defaults_list.py:251: UserWarning: In 'eval': Defaults list is missing `_self_`. See https://hydra.cc/docs/1.2/upgrades/1.0_to_1.1/default_composition_order for more information
7
+ warnings.warn(msg, UserWarning)
8
+ /workspace/envs/rlbench/lib/python3.10/site-packages/hydra/core/default_element.py:124: UserWarning: In 'method/PERACT_BC': Usage of deprecated keyword in package header '# @package _group_'.
9
+ See https://hydra.cc/docs/1.2/upgrades/1.0_to_1.1/changes_to_package_header for more information
10
+ deprecation_warning(
11
+ /workspace/envs/rlbench/lib/python3.10/site-packages/hydra/_internal/hydra.py:119: UserWarning: Future Hydra versions will no longer change working directory at job runtime by default.
12
+ See https://hydra.cc/docs/1.2/upgrades/1.1_to_1.2/changes_to_job_working_dir/ for more information.
13
+ ret = run_job(
14
+ /workspace/envs/rlbench/lib/python3.10/site-packages/wandb/apis/public.py:3109: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
15
+ from pkg_resources import parse_version
16
+ /workspace/third_party/AnyBimanual/eval.py:183: UserWarning:
17
+ The version_base parameter is not specified.
18
+ Please specify a compatability version level, or None.
19
+ Will assume defaults for version 1.1
20
+ @hydra.main(config_name="eval", config_path="conf")
21
+ /workspace/third_party/AnyBimanual/eval.py:183: UserWarning:
22
+ The version_base parameter is not specified.
23
+ Please specify a compatability version level, or None.
24
+ Will assume defaults for version 1.1
25
+ @hydra.main(config_name="eval", config_path="conf")
26
+ /workspace/envs/rlbench/lib/python3.10/site-packages/wandb/apis/public.py:3109: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
27
+ from pkg_resources import parse_version
28
+ /workspace/third_party/AnyBimanual/eval.py:183: UserWarning:
29
+ The version_base parameter is not specified.
30
+ Please specify a compatability version level, or None.
31
+ Will assume defaults for version 1.1
32
+ @hydra.main(config_name="eval", config_path="conf")
33
+ /workspace/third_party/YARR/yarr/utils/rollout_generator.py:73: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at /opt/conda/conda-bld/pytorch_1716905979055/work/torch/csrc/utils/tensor_new.cpp:274.)
34
+ prepped_data = {k: torch.tensor([v], device=self._env_device) for k, v in obs_history.items()}
35
+ [W CudaIPCTypes.cpp:16] Producer process has been terminated before all shared CUDA tensors released. See Note [Sharing CUDA tensors]
artifacts/reports/anybimanual_anchor_bridge/trunk_only_ep5/summary.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_mode": "trunk_only",
3
+ "task_name": "perlf_release_dual_push_buttons_smoke1",
4
+ "tasks": [
5
+ "dual_push_buttons"
6
+ ],
7
+ "task_families": [
8
+ "generic"
9
+ ],
10
+ "passthrough_only": true,
11
+ "passthrough_reason": "generic_task_family",
12
+ "episodes_requested": 5,
13
+ "episode_scores": [
14
+ 0.0,
15
+ 0.0,
16
+ 100.0,
17
+ 0.0,
18
+ 0.0,
19
+ 20.0
20
+ ],
21
+ "mean_score": 20.0,
22
+ "subprocess_returncode": 0,
23
+ "command": [
24
+ "/workspace/envs/rlbench/bin/python",
25
+ "/workspace/third_party/AnyBimanual/eval.py",
26
+ "method=PERACT_BC",
27
+ "framework.logdir=/workspace/baselines/AnyBimanual_release_eval_live",
28
+ "framework.start_seed=0",
29
+ "framework.eval_type=60000",
30
+ "framework.eval_episodes=5",
31
+ "framework.eval_envs=1",
32
+ "framework.gpu=0",
33
+ "rlbench.task_name=perlf_release_dual_push_buttons_smoke1",
34
+ "rlbench.tasks=[dual_push_buttons]",
35
+ "rlbench.demo_path=/workspace/baselines/AnyBimanual_subset3_demo_root",
36
+ "rlbench.headless=True",
37
+ "rlbench.gripper_mode=BimanualDiscrete",
38
+ "rlbench.arm_action_mode=BimanualEndEffectorPoseViaPlanning",
39
+ "rlbench.action_mode=BimanualMoveArmThenGripper"
40
+ ]
41
+ }
artifacts/reports/proxy_base_fast_eval/active/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
artifacts/reports/proxy_base_fast_eval/active/reveal_benchmark.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## adapter
4
+ - controller: model
5
+ - checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_fast_seed17/checkpoint_best.pt
6
+ - episodes: 24.000
7
+ - mean_success: 0.000
8
+ - visibility_integral: 0.160
9
+ - corridor_availability: 0.002
10
+ - reocclusion_rate: 0.002
11
+ - disturbance_cost: 0.676
12
+ - premature_retrieve_rate: 0.835
13
+ - reocclusion_after_reveal_rate: 0.000
14
+ - planner_regret: 0.000
15
+ - foliage_success: 0.000
16
+ - bag_success: 0.000
17
+ - cloth_success: 0.000
artifacts/reports/proxy_base_fast_eval/noop/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
artifacts/reports/proxy_base_fast_eval/noop/reveal_benchmark.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## adapter
4
+ - controller: model
5
+ - checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_fast_seed17/checkpoint_best.pt
6
+ - episodes: 24.000
7
+ - mean_success: 0.000
8
+ - visibility_integral: 0.160
9
+ - corridor_availability: 0.002
10
+ - reocclusion_rate: 0.002
11
+ - disturbance_cost: 0.676
12
+ - premature_retrieve_rate: 0.835
13
+ - reocclusion_after_reveal_rate: 0.000
14
+ - planner_regret: 0.046
15
+ - foliage_success: 0.000
16
+ - bag_success: 0.000
17
+ - cloth_success: 0.000
artifacts/reports/proxy_rank_fast_eval/active/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
artifacts/reports/proxy_rank_fast_eval/active/reveal_benchmark.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## adapter
4
+ - controller: model
5
+ - checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_fast_seed17/checkpoint_best.pt
6
+ - episodes: 24.000
7
+ - mean_success: 0.000
8
+ - visibility_integral: 0.160
9
+ - corridor_availability: 0.002
10
+ - reocclusion_rate: 0.002
11
+ - disturbance_cost: 0.676
12
+ - premature_retrieve_rate: 0.835
13
+ - reocclusion_after_reveal_rate: 0.000
14
+ - planner_regret: 0.000
15
+ - foliage_success: 0.000
16
+ - bag_success: 0.000
17
+ - cloth_success: 0.000
artifacts/reports/proxy_rank_fast_eval/candidate0/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
artifacts/reports/proxy_rank_fast_eval/candidate0/reveal_benchmark.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## adapter
4
+ - controller: candidate0
5
+ - checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_fast_seed17/checkpoint_best.pt
6
+ - episodes: 24.000
7
+ - mean_success: 0.000
8
+ - visibility_integral: 0.160
9
+ - corridor_availability: 0.002
10
+ - reocclusion_rate: 0.002
11
+ - disturbance_cost: 0.676
12
+ - premature_retrieve_rate: 0.835
13
+ - reocclusion_after_reveal_rate: 0.000
14
+ - planner_regret: 0.046
15
+ - foliage_success: 0.000
16
+ - bag_success: 0.000
17
+ - cloth_success: 0.000
artifacts/reports/proxy_rank_fast_eval/noop/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
artifacts/reports/proxy_rank_fast_eval/noop/reveal_benchmark.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## adapter
4
+ - controller: model
5
+ - checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_fast_seed17/checkpoint_best.pt
6
+ - episodes: 24.000
7
+ - mean_success: 0.000
8
+ - visibility_integral: 0.160
9
+ - corridor_availability: 0.002
10
+ - reocclusion_rate: 0.002
11
+ - disturbance_cost: 0.676
12
+ - premature_retrieve_rate: 0.835
13
+ - reocclusion_after_reveal_rate: 0.000
14
+ - planner_regret: 0.046
15
+ - foliage_success: 0.000
16
+ - bag_success: 0.000
17
+ - cloth_success: 0.000
artifacts/reports/proxy_rank_fast_eval/oracle/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
artifacts/reports/proxy_rank_fast_eval/oracle/reveal_benchmark.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## adapter
4
+ - controller: oracle
5
+ - checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_fast_seed17/checkpoint_best.pt
6
+ - episodes: 24.000
7
+ - mean_success: 0.000
8
+ - visibility_integral: 10.234
9
+ - corridor_availability: 0.173
10
+ - reocclusion_rate: 0.008
11
+ - disturbance_cost: 0.655
12
+ - premature_retrieve_rate: 0.811
13
+ - reocclusion_after_reveal_rate: 0.250
14
+ - planner_regret: 0.000
15
+ - foliage_success: 0.000
16
+ - bag_success: 0.000
17
+ - cloth_success: 0.000
artifacts/reports/proxy_rank_only_live/active/reveal_benchmark.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## adapter
4
+ - controller: model
5
+ - checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_fast_seed17/checkpoint_best.pt
6
+ - episodes: 300.000
7
+ - mean_success: 0.000
8
+ - visibility_integral: 0.135
9
+ - corridor_availability: 0.000
10
+ - reocclusion_rate: 0.000
11
+ - disturbance_cost: 0.675
12
+ - premature_retrieve_rate: 0.825
13
+ - reocclusion_after_reveal_rate: 0.000
14
+ - planner_regret: 0.001
15
+ - foliage_success: 0.000
16
+ - bag_success: 0.000
17
+ - cloth_success: 0.000
artifacts/reports/proxy_rank_only_rebuild128_smoke/active/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
artifacts/reports/proxy_rank_only_rebuild128_smoke/active/reveal_benchmark.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## adapter
4
+ - controller: model
5
+ - checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_rebuild128_seed17/checkpoint_best.pt
6
+ - episodes: 72.000
7
+ - mean_success: 0.000
8
+ - visibility_integral: 2.161
9
+ - corridor_availability: 0.029
10
+ - reocclusion_rate: 0.020
11
+ - disturbance_cost: 0.746
12
+ - premature_retrieve_rate: 0.363
13
+ - reocclusion_after_reveal_rate: 0.250
14
+ - planner_regret: 0.010
15
+ - foliage_success: 0.000
16
+ - bag_success: 0.000
17
+ - cloth_success: 0.000
artifacts/reports/proxy_rank_only_rebuild128_smoke/noop/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
artifacts/reports/proxy_rank_only_rebuild128_smoke/noop/reveal_benchmark.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## adapter
4
+ - controller: model
5
+ - checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_rebuild128_seed17/checkpoint_best.pt
6
+ - episodes: 72.000
7
+ - mean_success: 0.000
8
+ - visibility_integral: 2.161
9
+ - corridor_availability: 0.029
10
+ - reocclusion_rate: 0.020
11
+ - disturbance_cost: 0.746
12
+ - premature_retrieve_rate: 0.363
13
+ - reocclusion_after_reveal_rate: 0.250
14
+ - planner_regret: 0.021
15
+ - foliage_success: 0.000
16
+ - bag_success: 0.000
17
+ - cloth_success: 0.000
artifacts/reports/proxy_semantic_nowm_quick12_final/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
artifacts/reports/proxy_semantic_nowm_quick12_final/reveal_benchmark.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## patched
4
+ - controller: model
5
+ - checkpoint: /workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_reuse128_seed17/checkpoint_best.pt
6
+ - episodes: 36.000
7
+ - mean_success: 0.667
8
+ - visibility_integral: 19.950
9
+ - corridor_availability: 0.797
10
+ - reocclusion_rate: 0.003
11
+ - disturbance_cost: 0.284
12
+ - premature_retrieve_rate: 0.372
13
+ - reocclusion_after_reveal_rate: 0.222
14
+ - planner_regret: 0.159
15
+ - foliage_success: 0.667
16
+ - bag_success: 0.750
17
+ - cloth_success: 0.583
code/VLAarchtests2_code/CHANGE_AND_TEST_LOG.md ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Change And Test Log
2
+
3
+ This file records the main code changes and executed test commands copied into this repo. Result statements below are raw command outcomes only.
4
+
5
+ ## Previous Repo Work Included Here
6
+
7
+ Copied from `history/VLAarchtests_previous_README.md`:
8
+
9
+ - core model, memory, planner, and dataset changes under:
10
+ - `VLAarchtests/code/reveal_vla_bimanual/models/`
11
+ - `VLAarchtests/code/reveal_vla_bimanual/train/losses.py`
12
+ - `VLAarchtests/code/reveal_vla_bimanual/sim_reveal/`
13
+ - `VLAarchtests/code/reveal_vla_bimanual/sim_rlbench/dataset.py`
14
+ - training and eval paths under:
15
+ - `VLAarchtests/code/reveal_vla_bimanual/train/`
16
+ - `VLAarchtests/code/reveal_vla_bimanual/eval/`
17
+ - earlier test suite under:
18
+ - `VLAarchtests/tests/`
19
+
20
+ ## Current Session File Changes
21
+
22
+ ### Core reveal/proxy path
23
+
24
+ - `VLAarchtests/code/reveal_vla_bimanual/models/policy.py`
25
+ - `VLAarchtests/code/reveal_vla_bimanual/models/action_decoder.py`
26
+ - `VLAarchtests/code/reveal_vla_bimanual/models/backbones.py`
27
+ - `VLAarchtests/code/reveal_vla_bimanual/models/rvt_backbone.py`
28
+ - `VLAarchtests/code/reveal_vla_bimanual/train/losses.py`
29
+ - `VLAarchtests/code/reveal_vla_bimanual/train/run_rlbench_experiment.py`
30
+ - `VLAarchtests/code/reveal_vla_bimanual/eval/run_reveal_benchmark.py`
31
+ - `VLAarchtests/code/reveal_vla_bimanual/eval/summarize_anybimanual_overlap_eval.py`
32
+ - `VLAarchtests/code/reveal_vla_bimanual/eval/summarize_rvt_overlap_branch.py`
33
+ - `VLAarchtests/code/reveal_vla_bimanual/eval/compose_task_routed_proxy_summary.py`
34
+ - `VLAarchtests/code/reveal_vla_bimanual/eval/run_proposal_alignment_diagnostics.py`
35
+ - `VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_knn_eval.py`
36
+ - `VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_knn_task_sweep.py`
37
+ - `VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_retargeted_demo_eval.py`
38
+ - `VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_full_arch_hybrid_eval.py`
39
+ - `VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_retarget_utils.py`
40
+ - `VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_full_arch_utils.py`
41
+ - `VLAarchtests/code/reveal_vla_bimanual/sim_reveal/build_task_specialized_episode_specs.py`
42
+ - `VLAarchtests/code/reveal_vla_bimanual/sim_reveal/procedural_envs.py`
43
+ - `VLAarchtests/code/reveal_vla_bimanual/sim_rlbench/task_resolver.py`
44
+
45
+ ### Training/eval wrappers and configs
46
+
47
+ - `VLAarchtests/code/reveal_vla_bimanual/scripts/run_task_routed_proxy_eval.sh`
48
+ - `VLAarchtests/code/reveal_vla_bimanual/scripts/run_bag_selector_iter9.sh`
49
+ - `VLAarchtests/code/reveal_vla_bimanual/scripts/run_anybimanual_subset3_overlap_train.sh`
50
+ - `VLAarchtests/code/reveal_vla_bimanual/scripts/run_anybimanual_subset3_overlap_eval.sh`
51
+ - `VLAarchtests/code/reveal_vla_bimanual/scripts/run_rvt_overlap_branch.sh`
52
+ - `VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_retargeted_demo_eval.sh`
53
+ - `VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_full_arch_hybrid_eval.sh`
54
+ - `VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter6.yaml`
55
+ - `VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter7.yaml`
56
+ - `VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter8.yaml`
57
+ - `VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter9_bag.yaml`
58
+ - `VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_clip_100demo_fair_step1_full.yaml`
59
+ - `VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17.yaml`
60
+ - `VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_rvt_100demo_unfreeze_top2_seed17.yaml`
61
+ - `VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_dual_push_backbone_only_clip_finetune_seed17.yaml`
62
+ - `VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_dual_push_backbone_only_clip_finetune_weighted_seed17.yaml`
63
+ - `VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_dual_push_backbone_only_clip_chunk8_weighted_seed17.yaml`
64
+ - `environment/reconstruct_anybimanual_overlap_replay.sh`
65
+
66
+ ### Test additions or updates
67
+
68
+ - `VLAarchtests/tests/test_eval_toggle_paths_work.py`
69
+ - `VLAarchtests/tests/test_task_routed_model_eval.py`
70
+ - `VLAarchtests/tests/test_anybimanual_resume_logic.py`
71
+ - `VLAarchtests/tests/test_anybimanual_overlap_eval_summary.py`
72
+ - `VLAarchtests/tests/test_candidate_ranking_loss.py`
73
+ - `VLAarchtests/tests/test_compose_task_routed_proxy_summary.py`
74
+ - `VLAarchtests/tests/test_build_task_specialized_episode_specs.py`
75
+ - `VLAarchtests/tests/test_proposal_mode_names_label_base_action.py`
76
+ - `VLAarchtests/tests/test_proxy_scripted_bench.py`
77
+ - `VLAarchtests/tests/test_rvt_backbone_forward.py`
78
+ - `VLAarchtests/tests/test_rlbench_dataset_rgbd_geometry.py`
79
+ - `VLAarchtests/tests/test_rlbench_init_checkpoint.py`
80
+ - `VLAarchtests/tests/test_rlbench_pickle_bootstrap.py`
81
+ - `VLAarchtests/tests/test_rlbench_task_resolver_aliases.py`
82
+ - `VLAarchtests/tests/test_summarize_rvt_overlap_branch.py`
83
+ - `VLAarchtests/tests/test_dual_push_retarget_utils.py`
84
+ - `VLAarchtests/tests/test_dual_push_full_arch_utils.py`
85
+
86
+ ### Third-party baseline path changes
87
+
88
+ - `third_party/AnyBimanual/third_party/YARR/yarr/runners/offline_train_runner.py`
89
+ - `third_party/AnyBimanual/third_party/YARR/yarr/runners/weight_init_utils.py`
90
+ - `third_party/AnyBimanual/agents/peract_bc/launch_utils.py`
91
+ - `third_party/AnyBimanual/agents/peract_bc/qattention_peract_bc_agent.py`
92
+ - `third_party/AnyBimanual/agents/peract_bimanual/qattention_peract_bc_agent.py`
93
+
94
+ ## Current Session Test Commands
95
+
96
+ Executed commands recorded in the workspace:
97
+
98
+ - `python -m py_compile /workspace/VLAarchtests/code/reveal_vla_bimanual/models/action_decoder.py /workspace/VLAarchtests/tests/test_proposal_mode_names_label_base_action.py`
99
+ - `PYTHONPATH=/workspace/VLAarchtests/code/reveal_vla_bimanual pytest -q /workspace/VLAarchtests/tests/test_proposal_mode_names_label_base_action.py /workspace/VLAarchtests/tests/test_candidate_ranking_loss.py /workspace/VLAarchtests/tests/test_compose_task_routed_proxy_summary.py /workspace/VLAarchtests/tests/test_build_task_specialized_episode_specs.py`
100
+ - result: `11 passed`
101
+ - `pytest -q /workspace/VLAarchtests/tests/test_anybimanual_overlap_eval_summary.py`
102
+ - result: `2 passed`
103
+ - `pytest -q /workspace/VLAarchtests/tests/test_task_routed_model_eval.py /workspace/VLAarchtests/tests/test_eval_toggle_paths_work.py`
104
+ - result: `4 passed`
105
+ - `pytest -q /workspace/VLAarchtests/tests/test_rvt_backbone_forward.py /workspace/VLAarchtests/tests/test_rlbench_dataset_rgbd_geometry.py /workspace/VLAarchtests/tests/test_eval_toggle_paths_work.py /workspace/VLAarchtests/tests/test_rlbench_init_checkpoint.py /workspace/VLAarchtests/tests/test_rlbench_pickle_bootstrap.py /workspace/VLAarchtests/tests/test_rlbench_task_resolver_aliases.py /workspace/VLAarchtests/tests/test_summarize_rvt_overlap_branch.py`
106
+ - result: `passed`
107
+ - `pytest -q /workspace/VLAarchtests/tests/test_build_task_specialized_episode_specs.py /workspace/VLAarchtests/tests/test_candidate_ranking_loss.py /workspace/VLAarchtests/tests/test_compose_task_routed_proxy_summary.py`
108
+ - result: `10 passed`
109
+ - `pytest -q /workspace/VLAarchtests/tests/test_dual_push_retarget_utils.py /workspace/VLAarchtests/tests/test_rlbench_knn_eval_scene_kwargs.py`
110
+ - result: `passed`
111
+ - `pytest -q /workspace/VLAarchtests/tests/test_dual_push_retarget_utils.py`
112
+ - result: `6 passed`
113
+ - `pytest -q /workspace/VLAarchtests/tests/test_dual_push_retarget_utils.py /workspace/VLAarchtests/tests/test_dual_push_full_arch_utils.py`
114
+ - result: `9 passed`
115
+ - `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_bag_selector_iter9.sh`
116
+ - `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_task_routed_proxy_eval.sh`
117
+ - `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_anybimanual_subset3_overlap_train.sh`
118
+ - `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_rvt_overlap_branch.sh`
119
+ - `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_retargeted_demo_eval.sh`
120
+ - `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_full_arch_hybrid_eval.sh`
121
+ - `PYTHONPATH=/workspace/third_party/AnyBimanual/third_party/YARR pytest -q /workspace/VLAarchtests/tests/test_anybimanual_resume_logic.py`
122
+ - result: `4 passed`
123
+ - `python -m py_compile /workspace/VLAarchtests/code/reveal_vla_bimanual/models/rvt_backbone.py /workspace/VLAarchtests/code/reveal_vla_bimanual/train/run_rlbench_experiment.py /workspace/VLAarchtests/code/reveal_vla_bimanual/sim_rlbench/dataset.py /workspace/VLAarchtests/code/reveal_vla_bimanual/sim_rlbench/task_resolver.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/summarize_rvt_overlap_branch.py`
124
+ - result: `passed`
125
+ - `python -m py_compile /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_retarget_utils.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_retargeted_demo_eval.py`
126
+ - result: `passed`
127
+ - `python -m py_compile /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_retarget_utils.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_full_arch_utils.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_full_arch_hybrid_eval.py`
128
+ - result: `passed`
129
+
130
+ ## Current Session Generated Reports
131
+
132
+ Current-session report roots staged in this repo:
133
+
134
+ - `VLAarchtests/artifacts/reports/sprint_v7_summary/`
135
+ - `VLAarchtests/artifacts/reports/sprint_v7_followup/`
136
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iterations/`
137
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter6/`
138
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter7/`
139
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/`
140
+ - `VLAarchtests/artifacts/reports/task_routed_proxy_v1/`
141
+ - `VLAarchtests/artifacts/reports/rlbench_general_debug_20260330/`
142
+ - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/`
143
+ - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/`
144
+ - `VLAarchtests/artifacts/reports/bag_mode_specialization_20260330/`
145
+ - `VLAarchtests/artifacts/reports/general_task_anchor_20260330_dual_push_buttons/`
146
+ - `VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/`
147
+ - `VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/`
148
+ - `VLAarchtests/artifacts/reports/dual_push_nonzero_branch_20260330/`
149
+ - `VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/`
150
+
151
+ ## HF Packaging Notes
152
+
153
+ Raw packaging changes applied to the staged HF export:
154
+
155
+ - `baselines/AnyBimanual_overlap_replay/multi/` was reshaped from one flat directory into shard subdirectories:
156
+ - `00000-04999/`
157
+ - `05000-09999/`
158
+ - `10000-14999/`
159
+ - file count after reshape: `14034`
160
+ - reconstruction helper added at:
161
+ - `environment/reconstruct_anybimanual_overlap_replay.sh`
162
+ - exact rejected Hub error before reshape:
163
+ - `Your push was rejected because it contains too many files per directory. Each directory in your git repo can only contain up to 10000 files. Offending directories: /baselines/AnyBimanual_overlap_replay/multi/`
164
+
165
+ ## Current Session Logs
166
+
167
+ Main logs staged in this repo:
168
+
169
+ - `reports/anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_train.log`
170
+ - `reports/anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_train_presavefix.log`
171
+ - `reports/anybimanual_subset3_overlap_resume1000_eval.log`
172
+ - `reports/anybimanual_subset3_overlap_resume1000_summary.log`
173
+ - `reports/task_routed_proxy_v1_rerun.log`
174
+ - `reports/run_bag_selector_iter9_prebuild.log`
175
+ - `reports/anybimanual_release_subset3_eval_ep5.log`
176
+ - `reports/rvt_overlap_branch_fixedbounds_20260330_chain.sh`
177
+ - `reports/dual_push_full_arch_hybrid_iter6_scene_ep5.log`
178
+ - `reports/dual_push_full_arch_hybrid_iter6_backbone_ep2_r005.log`
179
+
180
+ ## Official Overlap Eval Final Raw Outputs
181
+
182
+ Sources:
183
+
184
+ - `reports/anybimanual_subset3_overlap_resume1000_eval.log`
185
+ - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json`
186
+
187
+ Raw values:
188
+
189
+ - step `1000`
190
+ - local mean success `0.16`
191
+ - `coordinated_push_box`: success `0.0`, return `0.0`
192
+ - `coordinated_lift_ball`: success `0.0`, return `0.0`
193
+ - `dual_push_buttons`: success `0.48`, return `12.0`
194
+
195
+ ## General-Task Anchor Raw Outputs
196
+
197
+ Sources:
198
+
199
+ - `VLAarchtests/artifacts/reports/general_task_anchor_20260330_dual_push_buttons/summary.json`
200
+
201
+ Raw values:
202
+
203
+ - public AnyBimanual release, step `60000`: success `0.96`, return `24.0`, length `21.56`
204
+ - local official single-task eval, step `60000`, `25` episodes: success `0.96`, return `24.0`, length `21.84`
205
+ - local clip backbone-only result: success `0.0`, return `0.0`
206
+ - local elastic reveal proxy iter6 result: success `0.0`, return `0.0`
207
+ - local RVT frozen fixed-bounds result: success `0.0`, return `0.0`
208
+
209
+ ## Dual-Push Branch Raw Outputs
210
+
211
+ Sources:
212
+
213
+ - `VLAarchtests/artifacts/reports/dual_push_nonzero_branch_20260330/summary.md`
214
+ - `VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.md`
215
+
216
+ Raw values:
217
+
218
+ - demo replay through `absolute_action_from_delta`: mean success `0.8`, mean return `0.8`
219
+ - retargeted demo with checkpoint backbone retrieval and vision-only button localization, `5` episodes: mean success `1.0`, mean return `1.0`
220
+ - elastic checkpoint retargeted-demo probe with scene retrieval and vision-only button localization, `1` episode: mean success `1.0`, mean return `1.0`
221
+ - full-architecture hybrid eval with elastic controller checkpoint plus dual-push retrieval checkpoint, `1` episode: mean success `1.0`, mean return `1.0`, steps `116`, path recoveries `0`, noop fallbacks `0`
code/VLAarchtests2_code/MODEL_AND_ARTIFACT_INDEX.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model And Artifact Index
2
+
3
+ Main staged roots:
4
+
5
+ - `VLAarchtests/code/reveal_vla_bimanual/`
6
+ - `VLAarchtests/tests/`
7
+ - `VLAarchtests/artifacts/`
8
+ - `third_party/AnyBimanual/`
9
+ - `baselines/`
10
+ - `outputs/`
11
+ - `reports/`
12
+ - `handoff/instructions4.md`
13
+ - `history/VLAarchtests_previous_README.md`
14
+
15
+ Key current-session report roots:
16
+
17
+ - `VLAarchtests/artifacts/reports/sprint_v7_summary/`
18
+ - `VLAarchtests/artifacts/reports/sprint_v7_followup/`
19
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iterations/`
20
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter6/`
21
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter7/`
22
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/`
23
+ - `VLAarchtests/artifacts/reports/task_routed_proxy_v1/`
24
+ - `VLAarchtests/artifacts/reports/rlbench_general_debug_20260330/`
25
+ - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/`
26
+ - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/`
27
+ - `VLAarchtests/artifacts/reports/bag_mode_specialization_20260330/`
28
+ - `VLAarchtests/artifacts/reports/general_task_anchor_20260330_dual_push_buttons/`
29
+ - `VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/`
30
+ - `VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/`
31
+ - `VLAarchtests/artifacts/reports/dual_push_nonzero_branch_20260330/`
32
+ - `VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/`
33
+
34
+ Key current-session run/log roots:
35
+
36
+ - `baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/`
37
+ - `baselines/AnyBimanual_release_eval_anchor/perlf_release_dual_push_buttons_ep25/`
38
+ - `baselines/AnyBimanual_overlap_replay/`
39
+ - `outputs/rlbench_true_baselines/`
40
+ - `outputs/rlbench_dual_push/`
41
+ - `outputs/rlbench_rvt_branch/`
42
+ - `reports/anybimanual_subset3_overlap_resume1000_eval.log`
43
+ - `reports/anybimanual_subset3_overlap_resume1000_summary.log`
44
+ - `reports/anybimanual_release_subset3_eval_ep5.log`
45
+ - `reports/dual_push_full_arch_probe_iter6_scene_ep1/`
46
+ - `reports/dual_push_full_arch_hybrid_iter6_backbone_ep1/`
47
+ - `reports/dual_push_nonzero_branch_20260330/`
48
+ - `reports/run_bag_selector_iter9_prebuild.log`
49
+ - `reports/task_routed_proxy_v1_rerun.log`
50
+ - `environment/reconstruct_anybimanual_overlap_replay.sh`
51
+
52
+ Key final official overlap summary files:
53
+
54
+ - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.md`
55
+ - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json`
56
+
57
+ HF export packaging note:
58
+
59
+ - `baselines/AnyBimanual_overlap_replay/multi/` is sharded into subdirectories in this repo copy.
code/VLAarchtests2_code/README.md ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # VLAarchtests2
2
+
3
+ Bundle staged from `/workspace` on `2026-03-31 UTC`.
4
+
5
+ This repo is the follow-on organization repo to `lsnu/VLAarchtests`. It includes:
6
+
7
+ - current code under `VLAarchtests/`
8
+ - current third-party baseline code under `third_party/`
9
+ - current baseline runs, replay artifacts, demo roots, and released checkpoint material under `baselines/`
10
+ - current training outputs and checkpoints under `outputs/`
11
+ - current logs under `reports/`
12
+ - environment recreation files under `environment/`
13
+ - raw results and change/test logs at the repo root
14
+ - the previous repo README under `history/VLAarchtests_previous_README.md`
15
+ - the active handoff file under `handoff/instructions4.md`
16
+
17
+ ## Top-Level Contents
18
+
19
+ - `VLAarchtests/`
20
+ - code, tests, configs, generated configs, reports, checkpoints, and proxy datasets from the current runpod workspace
21
+ - `third_party/AnyBimanual/`
22
+ - local AnyBimanual checkout used for the official overlap baseline branch, including local compatibility patches
23
+ - `baselines/`
24
+ - released AnyBimanual checkpoint material
25
+ - overlap replay artifacts
26
+ - HF export packaging note: `baselines/AnyBimanual_overlap_replay/multi/` is sharded into subdirectories to satisfy the Hub `10000 files per directory` limit
27
+ - overlap run directories
28
+ - local subset3 demo roots used by the overlap branch
29
+ - `outputs/`
30
+ - RLBench training outputs and checkpoints used by the current anchor, RVT, dual-push, and elastic-controller branches
31
+ - `reports/`
32
+ - training and evaluation logs copied from `/workspace/reports`
33
+ - `environment/`
34
+ - machine snapshot, package lists, and setup helpers
35
+ - `history/`
36
+ - copied previous-repo README
37
+ - `handoff/`
38
+ - active sprint instruction file
39
+ - `RESULTS_RAW.md`
40
+ - raw result tables and final official overlap eval outputs
41
+ - `CHANGE_AND_TEST_LOG.md`
42
+ - file-level change log and executed test commands
43
+ - `MODEL_AND_ARTIFACT_INDEX.md`
44
+ - staged directory map with main artifact roots
45
+
46
+ ## Previous Repo Coverage
47
+
48
+ The earlier `lsnu/VLAarchtests` repo covered the `2026-03-25/26` work. Its README is copied verbatim at:
49
+
50
+ - `history/VLAarchtests_previous_README.md`
51
+
52
+ Previous-repo items explicitly referenced there include:
53
+
54
+ - compact, spatial, compact-phase, and spatial-phase proxy branches
55
+ - earlier RLBench direct-policy and kNN runs
56
+ - environment recreation files
57
+ - prior raw result tables
58
+
59
+ ## Current Session Additions
60
+
61
+ Current-session folders added or expanded in this repo include:
62
+
63
+ - `VLAarchtests/artifacts/reports/sprint_v7_summary/`
64
+ - `VLAarchtests/artifacts/reports/sprint_v7_followup/`
65
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iterations/`
66
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter6/`
67
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter7/`
68
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/`
69
+ - `VLAarchtests/artifacts/reports/task_routed_proxy_v1/`
70
+ - `VLAarchtests/artifacts/reports/rlbench_general_debug_20260330/`
71
+ - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/`
72
+ - `VLAarchtests/artifacts/reports/bag_mode_specialization_20260330/`
73
+ - `VLAarchtests/artifacts/reports/general_task_anchor_20260330_dual_push_buttons/`
74
+ - `VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/`
75
+ - `VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/`
76
+ - `VLAarchtests/artifacts/reports/dual_push_nonzero_branch_20260330/`
77
+ - `VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/`
78
+
79
+ ## Raw Results Snapshot
80
+
81
+ ### Proxy sprint v7
82
+
83
+ Source:
84
+
85
+ - `VLAarchtests/artifacts/reports/sprint_v7_summary/reveal_sprint_summary_compact.json`
86
+
87
+ Raw values:
88
+
89
+ - base model mean success: `0.28`
90
+ - base per-task: foliage `0.39`, bag `0.31`, cloth `0.14`
91
+ - random mean success: `0.43333333333333335`
92
+ - candidate0 mean success: `0.2`
93
+ - oracle mean success: `0.4066666666666667`
94
+ - scripted mean success: `1.0`
95
+
96
+ ### Eval-time ablations
97
+
98
+ Source:
99
+
100
+ - `VLAarchtests/artifacts/reports/sprint_v7_summary/reveal_sprint_summary_compact.json`
101
+
102
+ Raw values:
103
+
104
+ - `no_planner`: `0.2`
105
+ - `no_memory`: `0.3233333333333333`
106
+ - `no_task_conditioning`: `0.28`
107
+ - `no_geometry`: `0.27`
108
+ - `no_camera_pose`: `0.29333333333333333`
109
+
110
+ ### Selector checkpoints
111
+
112
+ Sources:
113
+
114
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter6/default/reveal_benchmark.json`
115
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter7/full_fixed_default/reveal_benchmark.json`
116
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/bag_fixed_default/reveal_benchmark.json`
117
+ - `VLAarchtests/artifacts/reports/task_routed_proxy_v1/summary.md`
118
+
119
+ Raw values:
120
+
121
+ - `iter6` mean success: `0.4566666666666667`
122
+ - foliage `0.46`, bag `0.4`, cloth `0.51`
123
+ - `iter7` mean success: `0.4666666666666666`
124
+ - foliage `0.4`, bag `0.41`, cloth `0.59`
125
+ - `iter8` bag-only fixed slice: `0.41`
126
+ - routed controller mean success: `0.48666666666666664`
127
+ - routing rule: `foliage -> iter6`, `bag -> iter8`, `cloth -> iter8`
128
+ - per-task: foliage `0.46`, bag `0.41`, cloth `0.59`
129
+
130
+ ### Real baseline compare on proxy suite
131
+
132
+ Source:
133
+
134
+ - `VLAarchtests/artifacts/reports/real_baseline_compare_v7_full/reveal_benchmark.json`
135
+
136
+ Raw values:
137
+
138
+ - `baseline_rgbd_stage3` mean success: `0.31`
139
+ - foliage `0.21`, bag `0.15`, cloth `0.57`
140
+ - `iter5_selector` mean success: `0.45`
141
+ - foliage `0.44`, bag `0.4`, cloth `0.51`
142
+
143
+ ### RLBench recovered push-box comparator
144
+
145
+ Sources:
146
+
147
+ - `reports/rlbench_general_debug/rlbench_push_box_fair_step1_final_knn_ep10_x99_res224_len180_train80_fixed/bimanual_push_box/rollout_eval.json`
148
+ - `reports/rlbench_general_debug/rlbench_push_box_historical_step1_knn_ep10_x99_res224_len180_train80_fixed/bimanual_push_box/rollout_eval.json`
149
+
150
+ Raw values:
151
+
152
+ - current fair-step1 final mean success: `0.7`
153
+ - current fair-step1 final successes:
154
+ - `[1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]`
155
+ - historical push-box control mean success: `0.4`
156
+ - historical push-box control successes:
157
+ - `[0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0]`
158
+
159
+ ### Official AnyBimanual overlap branch
160
+
161
+ Sources:
162
+
163
+ - `baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/training.log`
164
+ - `reports/anybimanual_subset3_overlap_resume1000_eval.log`
165
+
166
+ Raw train milestones:
167
+
168
+ - global step `300`: loss `40.91718`
169
+ - global step `400`: loss `33.26684`
170
+ - global step `500`: loss `36.07054`
171
+ - global step `600`: loss `35.32345`
172
+ - global step `700`: loss `28.50959`
173
+ - global step `800`: loss `23.60169`
174
+ - global step `900`: loss `15.28901`
175
+ - run reached `weights/1000` and the train exited cleanly
176
+
177
+ Raw eval outputs:
178
+
179
+ - source log: `reports/anybimanual_subset3_overlap_resume1000_eval.log`
180
+ - summary files:
181
+ - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.md`
182
+ - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json`
183
+ - local last complete step: `1000`
184
+ - local mean success: `0.16`
185
+ - local per-task success:
186
+ - `coordinated_push_box`: `0.0`
187
+ - `coordinated_lift_ball`: `0.0`
188
+ - `dual_push_buttons`: `0.48`
189
+ - local per-task return:
190
+ - `coordinated_push_box`: `0.0`
191
+ - `coordinated_lift_ball`: `0.0`
192
+ - `dual_push_buttons`: `12.0`
193
+ - public best overlap step in the local summary: `60000`
194
+ - public best mean success in the local summary: `0.6933333333333334`
195
+
196
+ ### Validated general-task anchor: `dual_push_buttons`
197
+
198
+ Sources:
199
+
200
+ - `VLAarchtests/artifacts/reports/general_task_anchor_20260330_dual_push_buttons/summary.json`
201
+ - `baselines/AnyBimanual_release_eval_anchor/perlf_release_dual_push_buttons_ep25/PERACT_BC/seed0/eval_data.csv`
202
+
203
+ Raw values:
204
+
205
+ - public AnyBimanual release, step `60000`: success `0.96`, return `24.0`, length `21.56`
206
+ - local official single-task eval, step `60000`, `25` episodes: success `0.96`, return `24.0`, length `21.84`
207
+ - local clip backbone-only result on same task: success `0.0`, return `0.0`
208
+ - local elastic reveal proxy iter6 result on same task: success `0.0`, return `0.0`
209
+ - local RVT frozen fixed-bounds result on same task: success `0.0`, return `0.0`
210
+
211
+ ### RVT overlap branch
212
+
213
+ Sources:
214
+
215
+ - `VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/summary.md`
216
+ - `VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/summary.md`
217
+
218
+ Raw values:
219
+
220
+ - frozen RVT stage1 train summary:
221
+ - `outputs/rlbench_rvt_branch/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17/summary.json`
222
+ - final train total `0.043179353826920445`
223
+ - final val total `0.039591669984665984`
224
+ - frozen RVT overlap eval: mean success `0.0`
225
+ - frozen fixed-bounds RVT overlap eval: mean success `0.0`
226
+ - both branch gates:
227
+ - local AnyBimanual overlap floor `0.16`
228
+ - stage2 run `false`
229
+
230
+ ### Dual-push non-privileged retarget branch
231
+
232
+ Sources:
233
+
234
+ - `VLAarchtests/artifacts/reports/dual_push_nonzero_branch_20260330/summary.md`
235
+
236
+ Raw values:
237
+
238
+ - demo replay through `absolute_action_from_delta`:
239
+ - `reports/dual_push_nonzero_branch_20260330/demo_replay/replay_summary.json`
240
+ - mean success `0.8`
241
+ - mean return `0.8`
242
+ - retargeted demo with checkpoint backbone retrieval and vision-only button localization:
243
+ - `reports/dual_push_nonzero_branch_20260330/retargeted_demo_backbone_vision_ep1/summary.json`
244
+ - mean success `1.0`
245
+ - mean return `1.0`
246
+ - retargeted demo with checkpoint backbone retrieval and vision-only button localization:
247
+ - `reports/dual_push_nonzero_branch_20260330/retargeted_demo_backbone_vision_ep5/summary.json`
248
+ - mean success `1.0`
249
+ - mean return `1.0`
250
+
251
+ ### Dual-push full-architecture hybrid branch
252
+
253
+ Sources:
254
+
255
+ - `VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.md`
256
+ - `reports/dual_push_full_arch_probe_iter6_scene_ep1/summary.json`
257
+ - `reports/dual_push_full_arch_hybrid_iter6_backbone_ep1/summary.json`
258
+
259
+ Raw values:
260
+
261
+ - elastic checkpoint retargeted-demo probe with scene retrieval and vision-only button localization:
262
+ - `1` episode
263
+ - mean success `1.0`
264
+ - mean return `1.0`
265
+ - steps `94`
266
+ - retrieved episode index `11`
267
+ - retrieval similarity `0.9998629689216614`
268
+ - full-architecture hybrid eval with elastic controller checkpoint plus dual-push retrieval checkpoint:
269
+ - `1` episode
270
+ - mean success `1.0`
271
+ - mean return `1.0`
272
+ - steps `116`
273
+ - path recoveries `0`
274
+ - noop fallbacks `0`
275
+ - first selected mode `residual::maintain_opening`
276
+ - last selected mode `residual::base_action`
277
+
278
+ ## Environment Recreation
279
+
280
+ Environment files are under `environment/`, including:
281
+
282
+ - `environment/setup_same_hardware.sh`
283
+ - `environment/runtime_env_vars.sh`
284
+ - `environment/reconstruct_anybimanual_overlap_replay.sh`
285
+ - `environment/hardware_snapshot.txt`
286
+ - `environment/env_list.txt`
287
+ - `environment/base_python.txt`
288
+ - `environment/base_pip_freeze.txt`
289
+ - `environment/rlbench_python.txt`
290
+ - `environment/rlbench_pip_freeze.txt`
291
+
292
+ ## Notes On Result Presentation
293
+
294
+ This repo-level README and the new root docs intentionally keep result text raw:
295
+
296
+ - file paths
297
+ - exact commands
298
+ - exact numeric outputs
299
+ - exact partial status for in-flight runs
300
+
301
+ Interpretive material already present inside older staged artifacts remains preserved as part of the historical workspace contents.
code/VLAarchtests2_code/RESULTS_RAW.md ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Results Raw
2
+
3
+ This file records exact values and exact partial statuses without additional conclusions.
4
+
5
+ ## Proxy Sprint v7 Main Table
6
+
7
+ Source:
8
+
9
+ - `VLAarchtests/artifacts/reports/sprint_v7_summary/reveal_sprint_summary_compact.json`
10
+
11
+ | Item | Raw values |
12
+ | --- | --- |
13
+ | base_model | mean success `0.28`; foliage `0.39`; bag `0.31`; cloth `0.14` |
14
+ | random | mean success `0.43333333333333335`; foliage `0.41`; bag `0.37`; cloth `0.52` |
15
+ | candidate0 | mean success `0.2`; foliage `0.24`; bag `0.22`; cloth `0.14` |
16
+ | oracle | mean success `0.4066666666666667`; foliage `0.5`; bag `0.42`; cloth `0.3` |
17
+ | scripted | mean success `1.0`; foliage `1.0`; bag `1.0`; cloth `1.0` |
18
+
19
+ ## Proxy Sprint v7 Ablation Table
20
+
21
+ Source:
22
+
23
+ - `VLAarchtests/artifacts/reports/sprint_v7_summary/reveal_sprint_summary_compact.json`
24
+
25
+ | Item | Raw values |
26
+ | --- | --- |
27
+ | no_planner | `0.2` |
28
+ | no_memory | `0.3233333333333333` |
29
+ | no_task_conditioning | `0.28` |
30
+ | no_geometry | `0.27` |
31
+ | no_camera_pose | `0.29333333333333333` |
32
+
33
+ ## Selector Table
34
+
35
+ Sources:
36
+
37
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter6/default/reveal_benchmark.json`
38
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter7/full_fixed_default/reveal_benchmark.json`
39
+ - `VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/bag_fixed_default/reveal_benchmark.json`
40
+ - `VLAarchtests/artifacts/reports/task_routed_proxy_v1/summary.md`
41
+
42
+ | Item | Raw values |
43
+ | --- | --- |
44
+ | iter6 | mean success `0.4566666666666667`; foliage `0.46`; bag `0.4`; cloth `0.51` |
45
+ | iter7 | mean success `0.4666666666666666`; foliage `0.4`; bag `0.41`; cloth `0.59` |
46
+ | iter8 bag fixed slice | mean success `0.41`; nominal `0.45`; high_reocclusion `0.4`; camera_perturbation `0.5`; one_sided_slip `0.25` |
47
+ | routed controller | mean success `0.48666666666666664`; route `foliage -> iter6`, `bag -> iter8`, `cloth -> iter8`; foliage `0.46`; bag `0.41`; cloth `0.59` |
48
+
49
+ ## Proxy Baseline Compare Table
50
+
51
+ Source:
52
+
53
+ - `VLAarchtests/artifacts/reports/real_baseline_compare_v7_full/reveal_benchmark.json`
54
+
55
+ | Item | Raw values |
56
+ | --- | --- |
57
+ | baseline_rgbd_stage3 | mean success `0.31`; foliage `0.21`; bag `0.15`; cloth `0.57` |
58
+ | iter5_selector | mean success `0.45`; foliage `0.44`; bag `0.4`; cloth `0.51` |
59
+
60
+ ## RLBench Recovered Push-Box Comparator
61
+
62
+ Sources:
63
+
64
+ - `reports/rlbench_general_debug/rlbench_push_box_fair_step1_final_knn_ep10_x99_res224_len180_train80_fixed/bimanual_push_box/rollout_eval.json`
65
+ - `reports/rlbench_general_debug/rlbench_push_box_historical_step1_knn_ep10_x99_res224_len180_train80_fixed/bimanual_push_box/rollout_eval.json`
66
+
67
+ | Item | Raw values |
68
+ | --- | --- |
69
+ | current fair-step1 final | mean success `0.7`; mean return `0.7`; successes `[1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]` |
70
+ | historical push-box control | mean success `0.4`; mean return `0.4`; successes `[0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0]` |
71
+
72
+ ## Official AnyBimanual Overlap Training Milestones
73
+
74
+ Sources:
75
+
76
+ - `baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/training.log`
77
+ - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/status.md`
78
+
79
+ | Global step | Raw values |
80
+ | --- | --- |
81
+ | 300 | loss `40.91718`; sample time `0.093029`; step time `14.0686` |
82
+ | 400 | loss `33.26684`; sample time `0.073085`; step time `14.3032` |
83
+ | 500 | loss `36.07054`; sample time `0.048558`; step time `11.1376` |
84
+ | 600 | loss `35.32345`; sample time `0.040642`; step time `9.7719` |
85
+ | 700 | loss `28.50959`; sample time `0.057937`; step time `10.9347` |
86
+ | 800 | loss `23.60169`; sample time `0.032697`; step time `11.8652` |
87
+ | 900 | loss `15.28901`; sample time `0.051232`; step time `11.5073` |
88
+ | 1000 checkpoint | train reached `weights/1000` and exited cleanly |
89
+
90
+ ## Official AnyBimanual Overlap Eval Final Output
91
+
92
+ Sources:
93
+
94
+ - `reports/anybimanual_subset3_overlap_resume1000_eval.log`
95
+ - `VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json`
96
+
97
+ | Item | Raw values |
98
+ | --- | --- |
99
+ | local last complete step | `1000` |
100
+ | local mean success | `0.16` |
101
+ | coordinated_push_box | success `0.0`; return `0.0`; final score log line `0.0` |
102
+ | coordinated_lift_ball | success `0.0`; return `0.0`; final score log line `0.0` |
103
+ | dual_push_buttons | success `0.48`; return `12.0`; final score log line `12.0` |
104
+ | public best overlap step in local summary | step `60000`; mean success `0.6933333333333334` |
105
+ | public best overlap per-task success | coordinated_push_box `0.8`; coordinated_lift_ball `0.32`; dual_push_buttons `0.96` |
106
+ | delta vs public best mean success | `-0.5333333333333333` |
107
+ | delta vs public best per-task success | coordinated_push_box `-0.8`; coordinated_lift_ball `-0.32`; dual_push_buttons `-0.48` |
108
+
109
+ ## Validated General-Task Anchor: dual_push_buttons
110
+
111
+ Source:
112
+
113
+ - `VLAarchtests/artifacts/reports/general_task_anchor_20260330_dual_push_buttons/summary.json`
114
+
115
+ | Item | Raw values |
116
+ | --- | --- |
117
+ | public AnyBimanual release | step `60000`; success `0.96`; return `24.0`; length `21.56` |
118
+ | local official single-task eval | step `60000`; episodes `25`; success `0.96`; return `24.0`; length `21.84` |
119
+ | local clip backbone-only | success `0.0`; return `0.0`; path `reports/true_baseline_compare_subset3_v1/rlbench_subset3_backbone_only_clip_100demo_fair_seed17_noplan_split/bimanual_dual_push_buttons/rollout_eval.json` |
120
+ | local elastic reveal proxy iter6 | success `0.0`; return `0.0`; path `reports/true_baseline_compare_subset3_v1/rlbench_subset3_elastic_reveal_proxy_iter6_100demo_fair_seed17_noplan_split/bimanual_dual_push_buttons/rollout_eval.json` |
121
+ | local RVT hybrid frozen fixed-bounds | success `0.0`; return `0.0`; path `reports/rvt_overlap_branch_fixedbounds_20260330/evals/rlbench_subset3_backbone_only_rvt_100demo_frozen_fixedbounds_seed17_noplan_split/bimanual_dual_push_buttons/rollout_eval.json` |
122
+
123
+ ## RVT Overlap Branch
124
+
125
+ Sources:
126
+
127
+ - `VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/status.md`
128
+ - `VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/summary.md`
129
+ - `VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/summary.md`
130
+
131
+ | Item | Raw values |
132
+ | --- | --- |
133
+ | frozen RVT stage1 train | checkpoint `outputs/rlbench_rvt_branch/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17/checkpoint_best.pt`; final train total `0.043179353826920445`; final val total `0.039591669984665984`; train seconds `2261.2839448451996` |
134
+ | frozen RVT overlap eval | mean success `0.0`; push_box `0.0`; lift_ball `0.0`; dual_push_buttons `0.0` |
135
+ | frozen fixed-bounds RVT overlap eval | mean success `0.0`; push_box `0.0`; lift_ball `0.0`; dual_push_buttons `0.0` |
136
+ | local overlap floor used for gate | `0.16` |
137
+ | stage2 run flag | `false` |
138
+
139
+ ## Dual-Push Nonzero Branch
140
+
141
+ Source:
142
+
143
+ - `VLAarchtests/artifacts/reports/dual_push_nonzero_branch_20260330/summary.md`
144
+
145
+ | Item | Raw values |
146
+ | --- | --- |
147
+ | direct rollout smoke planning | `5` episodes; `25` steps; mean success `0.0`; path `reports/dual_push_nonzero_branch_20260330/smoke_planning/rollout_eval.json` |
148
+ | controller sweep planning_c4 | `0.0` |
149
+ | controller sweep ik_c1 | `0.0` |
150
+ | controller sweep planning_c1_s05 | `0.0` |
151
+ | kNN top-1 planning | `5` episodes; `25` steps; mean success `0.0` |
152
+ | weighted rollout smoke planning | `5` episodes; `25` steps; mean success `0.0` |
153
+ | demo replay through absolute_action_from_delta | mean success `0.8`; mean return `0.8`; successful demo step counts `89`, `112`, `93`, `112` |
154
+ | weighted kNN top-1 planning length120 | `2` episodes; mean success `0.0` |
155
+ | chunk8 probe IK length120 | `1` episode; success `0.0`; return `0.0`; path recoveries `119`; noop fallbacks `1` |
156
+ | retargeted demo task_state smoke | `2` episodes; mean success `1.0`; mean return `1.0` |
157
+ | retargeted demo checkpoint-backbone ep5 | `5` episodes; mean success `1.0`; mean return `1.0` |
158
+ | retargeted demo checkpoint-backbone vision ep1 | `1` episode; mean success `1.0`; mean return `1.0` |
159
+ | retargeted demo checkpoint-backbone vision ep5 | `5` episodes; mean success `1.0`; mean return `1.0` |
160
+
161
+ ## Dual-Push Full-Architecture Hybrid
162
+
163
+ Sources:
164
+
165
+ - `VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.md`
166
+ - `reports/dual_push_full_arch_probe_iter6_scene_ep1/summary.json`
167
+ - `reports/dual_push_full_arch_hybrid_iter6_backbone_ep1/summary.json`
168
+
169
+ | Item | Raw values |
170
+ | --- | --- |
171
+ | elastic checkpoint retargeted-demo probe | `1` episode; mean success `1.0`; mean return `1.0`; steps `94`; retrieved episode index `11`; retrieval similarity `0.9998629689216614` |
172
+ | full-architecture hybrid eval | `1` episode; mean success `1.0`; mean return `1.0`; steps `116`; path recoveries `0`; noop fallbacks `0`; first selected mode `residual::maintain_opening`; last selected mode `residual::base_action` |
173
+
174
+ ## Previous Repo Raw Results
175
+
176
+ Previous raw tables are preserved in:
177
+
178
+ - `history/VLAarchtests_previous_README.md`
code/VLAarchtests2_code/VLAarchtests/MODEL_INDEX.md ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model Index
2
+
3
+ ## 2026-03-25/26 Additions
4
+
5
+ ### Handoff Proxy Checkpoints
6
+
7
+ | Run | Checkpoint | Summary | Report |
8
+ | --- | --- | --- | --- |
9
+ | spatial handoff | `artifacts/outputs/r3d_handoff/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial_seed17/checkpoint_best.pt` | `artifacts/outputs/r3d_handoff/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial_seed17/summary.json` | `artifacts/reports/reveal_handoff_compare_serious/reveal_benchmark.json` |
10
+ | compact handoff | `artifacts/outputs/r3d_handoff/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_seed17/checkpoint_best.pt` | `artifacts/outputs/r3d_handoff/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_seed17/summary.json` | `artifacts/reports/reveal_handoff_compact_train_probe/reveal_benchmark.json` |
11
+ | compact-phase handoff | `artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt` | `artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/summary.json` | `artifacts/reports/reveal_phase_compare_serious_compact/reveal_benchmark.json` |
12
+ | spatial-phase handoff | `artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial_phase_seed17/checkpoint_best.pt` | `artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial_phase_seed17/summary.json` | `artifacts/reports/reveal_phase_compare_serious_spatial_compactwm/reveal_benchmark.json` |
13
+
14
+ ### RLBench Current Checkpoints
15
+
16
+ | Run | Checkpoint | Related files |
17
+ | --- | --- | --- |
18
+ | subset3 valid9 | `artifacts/outputs/rlbench_current/rlbench_subset3_backbone_only_clip_current_valid9/checkpoint_best.pt` | `artifacts/outputs/rlbench_current/rlbench_subset3_backbone_only_clip_current_valid9/checkpoint_stable.pt` |
19
+ | subset3 common23 | `artifacts/outputs/rlbench_current/rlbench_subset3_backbone_only_clip_current_common23/checkpoint_best.pt` | `artifacts/outputs/rlbench_current/rlbench_subset3_backbone_only_clip_current_common23/checkpoint_stable.pt` |
20
+ | lift-ball wide | `artifacts/outputs/rlbench_current/rlbench_lift_ball_backbone_only_clip_current_wide/checkpoint_best.pt` | `artifacts/outputs/rlbench_current/rlbench_lift_ball_backbone_only_clip_current_wide/checkpoint_stable.pt` |
21
+ | push-box step1 | `artifacts/outputs/rlbench_current/rlbench_push_box_backbone_only_clip_step1/checkpoint_best.pt` | `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1/rollout_eval.json`, `artifacts/reports/rlbench_push_box_knn_step1_ep5_top1_dense/rollout_eval.json` |
22
+
23
+ ### RLBench Result Files
24
+
25
+ | Artifact | File |
26
+ | --- | --- |
27
+ | lift-ball wide, one-step replanning | `artifacts/reports/rlbench_lift_ball_wide_len160_ep1_ik_c1/rollout_eval.json` |
28
+ | push-box step1, one-step replanning | `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1/rollout_eval.json` |
29
+ | push-box step1, one-step replanning, `delta_scale=0.05` | `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1_s005/rollout_eval.json` |
30
+ | push-box kNN, `episodes=1` | `artifacts/reports/rlbench_push_box_knn_step1_ep1/rollout_eval.json` |
31
+ | push-box kNN, `episodes=5`, `top_k=5` | `artifacts/reports/rlbench_push_box_knn_step1_ep5/rollout_eval.json` |
32
+ | push-box kNN, `episodes=5`, `top_k=1`, dense bank | `artifacts/reports/rlbench_push_box_knn_step1_ep5_top1_dense/rollout_eval.json` |
33
+
34
+ ## R3D Proxy Runs
35
+
36
+ | Run | Config | Seed | Checkpoint | Summary | Benchmark | Diagnostics |
37
+ | --- | --- | ---: | --- | --- | --- | --- |
38
+ | stage1 dummy | `proxy_interaction_r3d_stage1_dummy.yaml` | 13 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/diagnostics_full/proxy_diagnostics.json` |
39
+ | stage1 dummy | `proxy_interaction_r3d_stage1_dummy.yaml` | 14 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/diagnostics_full/proxy_diagnostics.json` |
40
+ | stage1 dummy | `proxy_interaction_r3d_stage1_dummy.yaml` | 15 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/diagnostics_full/proxy_diagnostics.json` |
41
+ | stage2 dummy | `proxy_interaction_r3d_stage2_dummy.yaml` | 21 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/diagnostics_full/proxy_diagnostics.json` |
42
+ | stage2 dummy | `proxy_interaction_r3d_stage2_dummy.yaml` | 22 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed22/diagnostics_full/proxy_diagnostics.json` |
43
+ | stage2 dummy | `proxy_interaction_r3d_stage2_dummy.yaml` | 23 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed23/diagnostics_full/proxy_diagnostics.json` |
44
+ | stage1 clip | `proxy_interaction_r3d_stage1_clip.yaml` | 7 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/diagnostics_full/proxy_diagnostics.json` |
45
+ | stage1 clip | `proxy_interaction_r3d_stage1_clip.yaml` | 8 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/diagnostics_full/proxy_diagnostics.json` |
46
+ | stage1 clip | `proxy_interaction_r3d_stage1_clip.yaml` | 9 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/diagnostics_full/proxy_diagnostics.json` |
47
+ | stage2 clip | `proxy_interaction_r3d_stage2_clip.yaml` | 11 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/diagnostics_full/proxy_diagnostics.json` |
48
+ | stage2 clip | `proxy_interaction_r3d_stage2_clip.yaml` | 12 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed12/diagnostics_full/proxy_diagnostics.json` |
49
+ | stage2 clip | `proxy_interaction_r3d_stage2_clip.yaml` | 13 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed13/diagnostics_full/proxy_diagnostics.json` |
50
+ | stage3 clip rgbd | `proxy_interaction_r3d_stage3_clip_rgbd.yaml` | 17 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/diagnostics_full/proxy_diagnostics.json` |
51
+ | stage3 clip rgbd | `proxy_interaction_r3d_stage3_clip_rgbd.yaml` | 18 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed18/diagnostics_full/proxy_diagnostics.json` |
52
+ | stage3 clip rgbd | `proxy_interaction_r3d_stage3_clip_rgbd.yaml` | 19 | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed19/checkpoint_best.pt` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed19/summary.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed19/benchmark_full/reveal_benchmark.json` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed19/diagnostics_full/proxy_diagnostics.json` |
53
+
54
+ ## Ablation Benchmark Files
55
+
56
+ | Ablation | File |
57
+ | --- | --- |
58
+ | stage1 dummy `no_planner` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.json` |
59
+ | stage1 dummy `no_role_symmetry` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.json` |
60
+ | stage2 dummy `no_world_model` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_no_world_model/reveal_benchmark.json` |
61
+ | stage2 dummy `no_world_model` pre-fix backup | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_no_world_model/reveal_benchmark_pre_null_rollout_fix.json` |
62
+ | stage2 dummy `short_history` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage2_dummy_seed21/benchmark_short_history/reveal_benchmark.json` |
63
+ | stage3 clip RGB-D `no_depth` | `artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/benchmark_no_depth/reveal_benchmark.json` |
64
+
65
+ Equivalent files exist under the other seed directories.
66
+
67
+ ## Integration Artifacts
68
+
69
+ | Artifact | File |
70
+ | --- | --- |
71
+ | RLBench import/config smoke | `artifacts/outputs/r3d/rlbench_smokes/smoke_test_output.txt` |
72
+ | RLBench `open_drawer` launch smoke | `artifacts/outputs/r3d/rlbench_smokes/launch_smoke_open_drawer.txt` |
73
+ | RLBench `open_drawer` rollout | `artifacts/outputs/r3d/rlbench_open_drawer_r3d_rollout/rollout_eval.json` |
74
+ | PerAct2 13-task launch smoke summary | `artifacts/outputs/r3d/peract2_13_launch_smoke/launch_smoke_summary.json` |
75
+
76
+ ## Historical References
77
+
78
+ | File | Purpose |
79
+ | --- | --- |
80
+ | `regression/baselines.md` | historical baseline metrics from the downloaded snapshot |
81
+ | `results/phase_tracking.md` | phase-by-phase acceptance tracking |
code/VLAarchtests2_code/VLAarchtests/README.md ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - robotics
4
+ - vision-language-action
5
+ - bimanual-manipulation
6
+ - rlbench
7
+ - rgbd
8
+ ---
9
+
10
+ # VLAarchtests
11
+
12
+ Bundle uploaded from `/workspace` runpod sessions dated `2026-03-25 UTC` and `2026-03-26 UTC`.
13
+
14
+ ## Top-Level Contents
15
+
16
+ - `code/reveal_vla_bimanual/`
17
+ - project code used for the proxy and RLBench runs in this bundle
18
+ - `artifacts/data/reveal_proxy/`
19
+ - proxy dataset bundles used by the handoff runs
20
+ - `artifacts/outputs/r3d/`
21
+ - previously uploaded R3D proxy outputs already present in the bundle
22
+ - `artifacts/outputs/r3d_handoff/`
23
+ - handoff proxy checkpoints
24
+ - `artifacts/outputs/r3d_handoff_phase/`
25
+ - phase-supervised handoff proxy checkpoints
26
+ - `artifacts/outputs/rlbench_current/`
27
+ - RLBench checkpoints from the current session
28
+ - `artifacts/reports/`
29
+ - proxy and RLBench result files copied from `/workspace/reports`
30
+ - `environment/`
31
+ - same-machine setup files and validation helpers
32
+ - `tests/`
33
+ - local test suite
34
+ - `handoff/instructions.md`
35
+ - instruction file used for the handoff work
36
+ - `MODEL_INDEX.md`
37
+ - checkpoint and result index
38
+ - `results/session_results_20260326.md`
39
+ - raw result tables for the `2026-03-25/26` work
40
+
41
+ ## Code Added Or Updated
42
+
43
+ ### Core model, memory, planner, and dataset paths
44
+
45
+ - `code/reveal_vla_bimanual/models/backbones.py`
46
+ - `code/reveal_vla_bimanual/models/multiview_fusion.py`
47
+ - `code/reveal_vla_bimanual/models/observation_memory.py`
48
+ - `code/reveal_vla_bimanual/models/reveal_head.py`
49
+ - `code/reveal_vla_bimanual/models/world_model.py`
50
+ - `code/reveal_vla_bimanual/models/action_decoder.py`
51
+ - `code/reveal_vla_bimanual/models/planner.py`
52
+ - `code/reveal_vla_bimanual/models/policy.py`
53
+ - `code/reveal_vla_bimanual/train/losses.py`
54
+ - `code/reveal_vla_bimanual/sim_reveal/dataset.py`
55
+ - `code/reveal_vla_bimanual/sim_reveal/procedural_envs.py`
56
+ - `code/reveal_vla_bimanual/sim_rlbench/dataset.py`
57
+
58
+ ### Training and evaluation paths
59
+
60
+ - `code/reveal_vla_bimanual/train/run_rlbench_experiment.py`
61
+ - `code/reveal_vla_bimanual/eval/run_reveal_benchmark.py`
62
+ - `code/reveal_vla_bimanual/eval/run_ablations.py`
63
+ - `code/reveal_vla_bimanual/eval/run_teacher_audit.py`
64
+ - `code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py`
65
+ - `code/reveal_vla_bimanual/eval/run_rlbench_knn_eval.py`
66
+
67
+ ### Added or updated training configs
68
+
69
+ - `code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact.yaml`
70
+ - `code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial.yaml`
71
+ - `code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase.yaml`
72
+ - `code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_spatial_phase.yaml`
73
+ - `code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_clip_current_valid9.yaml`
74
+ - `code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_clip_current_common23.yaml`
75
+ - `code/reveal_vla_bimanual/train/configs/rlbench_lift_ball_backbone_only_clip_current_wide.yaml`
76
+ - `code/reveal_vla_bimanual/train/configs/rlbench_lift_ball_backbone_only_clip_step1.yaml`
77
+ - `code/reveal_vla_bimanual/train/configs/rlbench_push_box_backbone_only_clip_step1.yaml`
78
+
79
+ ### Test files
80
+
81
+ The staged `tests/` directory contains `32` test modules plus `conftest.py`, including:
82
+
83
+ - geometry and camera rotation coverage
84
+ - phase-label and candidate-ranking coverage
85
+ - planner gradient-flow and reocclusion gating coverage
86
+ - world-model null-rollout, field-consistency, and task-adapter coverage
87
+ - proxy scripted benchmark and teacher-audit coverage
88
+
89
+ ## Verification
90
+
91
+ - local test command:
92
+ - `PYTHONPATH=/workspace/VLAarchtests_work/code/reveal_vla_bimanual python -m pytest -q /workspace/VLAarchtests_work/tests`
93
+ - result:
94
+ - `33 passed`
95
+
96
+ ## Raw Result Files
97
+
98
+ ### Proxy and handoff results
99
+
100
+ - `artifacts/reports/reveal_smoke_mod/reveal_benchmark.json`
101
+ - `artifacts/reports/reveal_smoke_nogeom/reveal_benchmark.json`
102
+ - `artifacts/reports/reveal_smoke_noplanner/reveal_benchmark.json`
103
+ - `artifacts/reports/reveal_handoff_compare_serious/reveal_benchmark.json`
104
+ - `artifacts/reports/reveal_handoff_compare_serious_compact/reveal_benchmark.json`
105
+ - `artifacts/reports/reveal_phase_compare_serious_compact/reveal_benchmark.json`
106
+ - `artifacts/reports/reveal_phase_compare_serious_spatial_compactwm/reveal_benchmark.json`
107
+ - `artifacts/reports/reveal_phase_ablations_compact/ablations.json`
108
+ - `artifacts/reports/reveal_teacher_audit_serious/teacher_audit.json`
109
+
110
+ ### RLBench result files
111
+
112
+ - `artifacts/reports/rlbench_dual_buttons_baseline_len100_ep1_ik_rescale/rollout_eval.json`
113
+ - `artifacts/reports/rlbench_dual_buttons_common23_len100_ep1_ik_rescale/rollout_eval.json`
114
+ - `artifacts/reports/rlbench_push_box_common23_len100_ep1_ik_rescale/rollout_eval.json`
115
+ - `artifacts/reports/rlbench_lift_ball_wide_len160_ep1_ik_c1/rollout_eval.json`
116
+ - `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1/rollout_eval.json`
117
+ - `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1_s005/rollout_eval.json`
118
+ - `artifacts/reports/rlbench_push_box_knn_step1_ep1/rollout_eval.json`
119
+ - `artifacts/reports/rlbench_push_box_knn_step1_ep5/rollout_eval.json`
120
+ - `artifacts/reports/rlbench_push_box_knn_step1_ep5_top1_dense/rollout_eval.json`
121
+
122
+ ## Raw Result Tables
123
+
124
+ ### Proxy serious runs
125
+
126
+ | Artifact | File | Raw values |
127
+ | --- | --- | --- |
128
+ | spatial handoff vs released baseline | `artifacts/reports/reveal_handoff_compare_serious/reveal_benchmark.json` | baseline mean success `0.5833`, handoff mean success `0.2167` |
129
+ | spatial-trained checkpoint with compact world model vs released baseline | `artifacts/reports/reveal_handoff_compare_serious_compact/reveal_benchmark.json` | baseline mean success `0.5833`, handoff mean success `0.5200` |
130
+ | compact-phase vs released baseline | `artifacts/reports/reveal_phase_compare_serious_compact/reveal_benchmark.json` | baseline mean success `0.5833`, compact-phase mean success `0.5133` |
131
+ | spatial-phase with compact world model vs released baseline | `artifacts/reports/reveal_phase_compare_serious_spatial_compactwm/reveal_benchmark.json` | baseline mean success `0.5833`, spatial-phase compact-world-model mean success `0.4933` |
132
+
133
+ ### Proxy ablations
134
+
135
+ | Artifact | File | Raw values |
136
+ | --- | --- | --- |
137
+ | compact-phase ablations | `artifacts/reports/reveal_phase_ablations_compact/ablations.json` | full `0.5133`, `no_geometry` `0.5133`, `no_spatial_memory` `0.4967`, `compact_world_model` `0.5133`, `no_planner` `0.4333`, `gaussian_candidates_only` `0.4667`, `no_task_head` `0.5133`, `no_support_mode_conditioning` `0.5133` |
138
+
139
+ ### RLBench direct-policy runs
140
+
141
+ | Artifact | File | Raw values |
142
+ | --- | --- | --- |
143
+ | lift-ball wide checkpoint, one-step replanning | `artifacts/reports/rlbench_lift_ball_wide_len160_ep1_ik_c1/rollout_eval.json` | mean success `0.0`, mean return `0.0`, path recoveries `[148]`, noop fallbacks `[11]` |
144
+ | push-box step-1 checkpoint, one-step replanning | `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1/rollout_eval.json` | mean success `0.0`, mean return `0.0`, path recoveries `[177]`, noop fallbacks `[0]` |
145
+ | push-box step-1 checkpoint, one-step replanning, `delta_scale=0.05` | `artifacts/reports/rlbench_push_box_step1_ep1_ik_c1_s005/rollout_eval.json` | mean success `0.0`, mean return `0.0`, path recoveries `[180]`, noop fallbacks `[0]` |
146
+
147
+ ### RLBench retrieval runs
148
+
149
+ | Artifact | File | Raw values |
150
+ | --- | --- | --- |
151
+ | push-box kNN, `bank_stride=4`, `top_k=5`, `time_window=8`, `episodes=1` | `artifacts/reports/rlbench_push_box_knn_step1_ep1/rollout_eval.json` | mean success `1.0`, mean return `1.0`, bank size `2815` |
152
+ | push-box kNN, `bank_stride=4`, `top_k=5`, `time_window=8`, `episodes=5` | `artifacts/reports/rlbench_push_box_knn_step1_ep5/rollout_eval.json` | successes `[0.0, 1.0, 0.0, 0.0, 0.0]`, mean success `0.2`, bank size `2815` |
153
+ | push-box kNN, `bank_stride=1`, `top_k=1`, `time_window=4`, `episodes=5` | `artifacts/reports/rlbench_push_box_knn_step1_ep5_top1_dense/rollout_eval.json` | successes `[0.0, 0.0, 1.0, 1.0, 0.0]`, mean success `0.4`, bank size `11259` |
154
+
155
+ ## Environment Recreation Files
156
+
157
+ - `environment/setup_same_machine.sh`
158
+ - `environment/validate_same_machine.sh`
159
+ - `environment/run_peract2_13_rollouts.sh`
160
+ - `environment/runtime_env_vars.sh`
161
+ - `environment/hardware_snapshot.txt`
162
+ - `environment/glxinfo_B.txt`
163
+ - `environment/upstream_revisions.txt`
164
+ - `environment/system_packages_same_machine.txt`
165
+ - `environment/rlbench_env_export.yaml`
166
+ - `environment/rlbench_env_explicit.txt`
167
+ - `environment/rlbench_pip_freeze.txt`
168
+ - `environment/reveal_env_export.yaml`
169
+ - `environment/reveal_env_explicit.txt`
170
+ - `environment/reveal_pip_freeze.txt`
171
+
172
+ Detailed raw tables for the `2026-03-25/26` work are in `results/session_results_20260326.md`.
code/VLAarchtests2_code/VLAarchtests/artifacts/generated_configs/reveal_proxy_sprint_benchmark_v7.json ADDED
@@ -0,0 +1,2702 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "proxy_name": "foliage_proxy",
4
+ "task_name": "foliage",
5
+ "task_id": 0,
6
+ "stress_slice": "nominal",
7
+ "difficulty_bin": "medium",
8
+ "episode_index": 0,
9
+ "seed": 0
10
+ },
11
+ {
12
+ "proxy_name": "foliage_proxy",
13
+ "task_name": "foliage",
14
+ "task_id": 0,
15
+ "stress_slice": "high_reocclusion",
16
+ "difficulty_bin": "medium",
17
+ "episode_index": 1,
18
+ "seed": 1000
19
+ },
20
+ {
21
+ "proxy_name": "foliage_proxy",
22
+ "task_name": "foliage",
23
+ "task_id": 0,
24
+ "stress_slice": "nominal",
25
+ "difficulty_bin": "hard",
26
+ "episode_index": 2,
27
+ "seed": 2000
28
+ },
29
+ {
30
+ "proxy_name": "foliage_proxy",
31
+ "task_name": "foliage",
32
+ "task_id": 0,
33
+ "stress_slice": "high_reocclusion",
34
+ "difficulty_bin": "hard",
35
+ "episode_index": 3,
36
+ "seed": 3000
37
+ },
38
+ {
39
+ "proxy_name": "foliage_proxy",
40
+ "task_name": "foliage",
41
+ "task_id": 0,
42
+ "stress_slice": "camera_perturbation",
43
+ "difficulty_bin": "medium",
44
+ "episode_index": 4,
45
+ "seed": 4000
46
+ },
47
+ {
48
+ "proxy_name": "foliage_proxy",
49
+ "task_name": "foliage",
50
+ "task_id": 0,
51
+ "stress_slice": "tight_corridor_high_collateral",
52
+ "difficulty_bin": "medium",
53
+ "episode_index": 5,
54
+ "seed": 5000
55
+ },
56
+ {
57
+ "proxy_name": "foliage_proxy",
58
+ "task_name": "foliage",
59
+ "task_id": 0,
60
+ "stress_slice": "camera_perturbation",
61
+ "difficulty_bin": "hard",
62
+ "episode_index": 6,
63
+ "seed": 6000
64
+ },
65
+ {
66
+ "proxy_name": "foliage_proxy",
67
+ "task_name": "foliage",
68
+ "task_id": 0,
69
+ "stress_slice": "tight_corridor_high_collateral",
70
+ "difficulty_bin": "hard",
71
+ "episode_index": 7,
72
+ "seed": 7000
73
+ },
74
+ {
75
+ "proxy_name": "foliage_proxy",
76
+ "task_name": "foliage",
77
+ "task_id": 0,
78
+ "stress_slice": "nominal",
79
+ "difficulty_bin": "medium",
80
+ "episode_index": 8,
81
+ "seed": 1
82
+ },
83
+ {
84
+ "proxy_name": "foliage_proxy",
85
+ "task_name": "foliage",
86
+ "task_id": 0,
87
+ "stress_slice": "high_reocclusion",
88
+ "difficulty_bin": "medium",
89
+ "episode_index": 9,
90
+ "seed": 1001
91
+ },
92
+ {
93
+ "proxy_name": "foliage_proxy",
94
+ "task_name": "foliage",
95
+ "task_id": 0,
96
+ "stress_slice": "nominal",
97
+ "difficulty_bin": "hard",
98
+ "episode_index": 10,
99
+ "seed": 2001
100
+ },
101
+ {
102
+ "proxy_name": "foliage_proxy",
103
+ "task_name": "foliage",
104
+ "task_id": 0,
105
+ "stress_slice": "high_reocclusion",
106
+ "difficulty_bin": "hard",
107
+ "episode_index": 11,
108
+ "seed": 3001
109
+ },
110
+ {
111
+ "proxy_name": "foliage_proxy",
112
+ "task_name": "foliage",
113
+ "task_id": 0,
114
+ "stress_slice": "camera_perturbation",
115
+ "difficulty_bin": "medium",
116
+ "episode_index": 12,
117
+ "seed": 4001
118
+ },
119
+ {
120
+ "proxy_name": "foliage_proxy",
121
+ "task_name": "foliage",
122
+ "task_id": 0,
123
+ "stress_slice": "tight_corridor_high_collateral",
124
+ "difficulty_bin": "medium",
125
+ "episode_index": 13,
126
+ "seed": 5001
127
+ },
128
+ {
129
+ "proxy_name": "foliage_proxy",
130
+ "task_name": "foliage",
131
+ "task_id": 0,
132
+ "stress_slice": "camera_perturbation",
133
+ "difficulty_bin": "hard",
134
+ "episode_index": 14,
135
+ "seed": 6001
136
+ },
137
+ {
138
+ "proxy_name": "foliage_proxy",
139
+ "task_name": "foliage",
140
+ "task_id": 0,
141
+ "stress_slice": "tight_corridor_high_collateral",
142
+ "difficulty_bin": "hard",
143
+ "episode_index": 15,
144
+ "seed": 7001
145
+ },
146
+ {
147
+ "proxy_name": "foliage_proxy",
148
+ "task_name": "foliage",
149
+ "task_id": 0,
150
+ "stress_slice": "nominal",
151
+ "difficulty_bin": "medium",
152
+ "episode_index": 16,
153
+ "seed": 2
154
+ },
155
+ {
156
+ "proxy_name": "foliage_proxy",
157
+ "task_name": "foliage",
158
+ "task_id": 0,
159
+ "stress_slice": "high_reocclusion",
160
+ "difficulty_bin": "medium",
161
+ "episode_index": 17,
162
+ "seed": 1002
163
+ },
164
+ {
165
+ "proxy_name": "foliage_proxy",
166
+ "task_name": "foliage",
167
+ "task_id": 0,
168
+ "stress_slice": "nominal",
169
+ "difficulty_bin": "hard",
170
+ "episode_index": 18,
171
+ "seed": 2002
172
+ },
173
+ {
174
+ "proxy_name": "foliage_proxy",
175
+ "task_name": "foliage",
176
+ "task_id": 0,
177
+ "stress_slice": "high_reocclusion",
178
+ "difficulty_bin": "hard",
179
+ "episode_index": 19,
180
+ "seed": 3002
181
+ },
182
+ {
183
+ "proxy_name": "foliage_proxy",
184
+ "task_name": "foliage",
185
+ "task_id": 0,
186
+ "stress_slice": "camera_perturbation",
187
+ "difficulty_bin": "medium",
188
+ "episode_index": 20,
189
+ "seed": 4002
190
+ },
191
+ {
192
+ "proxy_name": "foliage_proxy",
193
+ "task_name": "foliage",
194
+ "task_id": 0,
195
+ "stress_slice": "tight_corridor_high_collateral",
196
+ "difficulty_bin": "medium",
197
+ "episode_index": 21,
198
+ "seed": 5002
199
+ },
200
+ {
201
+ "proxy_name": "foliage_proxy",
202
+ "task_name": "foliage",
203
+ "task_id": 0,
204
+ "stress_slice": "camera_perturbation",
205
+ "difficulty_bin": "hard",
206
+ "episode_index": 22,
207
+ "seed": 6002
208
+ },
209
+ {
210
+ "proxy_name": "foliage_proxy",
211
+ "task_name": "foliage",
212
+ "task_id": 0,
213
+ "stress_slice": "tight_corridor_high_collateral",
214
+ "difficulty_bin": "hard",
215
+ "episode_index": 23,
216
+ "seed": 7002
217
+ },
218
+ {
219
+ "proxy_name": "foliage_proxy",
220
+ "task_name": "foliage",
221
+ "task_id": 0,
222
+ "stress_slice": "nominal",
223
+ "difficulty_bin": "medium",
224
+ "episode_index": 24,
225
+ "seed": 3
226
+ },
227
+ {
228
+ "proxy_name": "foliage_proxy",
229
+ "task_name": "foliage",
230
+ "task_id": 0,
231
+ "stress_slice": "high_reocclusion",
232
+ "difficulty_bin": "medium",
233
+ "episode_index": 25,
234
+ "seed": 1003
235
+ },
236
+ {
237
+ "proxy_name": "foliage_proxy",
238
+ "task_name": "foliage",
239
+ "task_id": 0,
240
+ "stress_slice": "nominal",
241
+ "difficulty_bin": "hard",
242
+ "episode_index": 26,
243
+ "seed": 2003
244
+ },
245
+ {
246
+ "proxy_name": "foliage_proxy",
247
+ "task_name": "foliage",
248
+ "task_id": 0,
249
+ "stress_slice": "high_reocclusion",
250
+ "difficulty_bin": "hard",
251
+ "episode_index": 27,
252
+ "seed": 3003
253
+ },
254
+ {
255
+ "proxy_name": "foliage_proxy",
256
+ "task_name": "foliage",
257
+ "task_id": 0,
258
+ "stress_slice": "camera_perturbation",
259
+ "difficulty_bin": "medium",
260
+ "episode_index": 28,
261
+ "seed": 4003
262
+ },
263
+ {
264
+ "proxy_name": "foliage_proxy",
265
+ "task_name": "foliage",
266
+ "task_id": 0,
267
+ "stress_slice": "tight_corridor_high_collateral",
268
+ "difficulty_bin": "medium",
269
+ "episode_index": 29,
270
+ "seed": 5003
271
+ },
272
+ {
273
+ "proxy_name": "foliage_proxy",
274
+ "task_name": "foliage",
275
+ "task_id": 0,
276
+ "stress_slice": "camera_perturbation",
277
+ "difficulty_bin": "hard",
278
+ "episode_index": 30,
279
+ "seed": 6003
280
+ },
281
+ {
282
+ "proxy_name": "foliage_proxy",
283
+ "task_name": "foliage",
284
+ "task_id": 0,
285
+ "stress_slice": "tight_corridor_high_collateral",
286
+ "difficulty_bin": "hard",
287
+ "episode_index": 31,
288
+ "seed": 7003
289
+ },
290
+ {
291
+ "proxy_name": "foliage_proxy",
292
+ "task_name": "foliage",
293
+ "task_id": 0,
294
+ "stress_slice": "nominal",
295
+ "difficulty_bin": "medium",
296
+ "episode_index": 32,
297
+ "seed": 4
298
+ },
299
+ {
300
+ "proxy_name": "foliage_proxy",
301
+ "task_name": "foliage",
302
+ "task_id": 0,
303
+ "stress_slice": "high_reocclusion",
304
+ "difficulty_bin": "medium",
305
+ "episode_index": 33,
306
+ "seed": 1004
307
+ },
308
+ {
309
+ "proxy_name": "foliage_proxy",
310
+ "task_name": "foliage",
311
+ "task_id": 0,
312
+ "stress_slice": "nominal",
313
+ "difficulty_bin": "hard",
314
+ "episode_index": 34,
315
+ "seed": 2004
316
+ },
317
+ {
318
+ "proxy_name": "foliage_proxy",
319
+ "task_name": "foliage",
320
+ "task_id": 0,
321
+ "stress_slice": "high_reocclusion",
322
+ "difficulty_bin": "hard",
323
+ "episode_index": 35,
324
+ "seed": 3004
325
+ },
326
+ {
327
+ "proxy_name": "foliage_proxy",
328
+ "task_name": "foliage",
329
+ "task_id": 0,
330
+ "stress_slice": "camera_perturbation",
331
+ "difficulty_bin": "medium",
332
+ "episode_index": 36,
333
+ "seed": 4004
334
+ },
335
+ {
336
+ "proxy_name": "foliage_proxy",
337
+ "task_name": "foliage",
338
+ "task_id": 0,
339
+ "stress_slice": "tight_corridor_high_collateral",
340
+ "difficulty_bin": "medium",
341
+ "episode_index": 37,
342
+ "seed": 5004
343
+ },
344
+ {
345
+ "proxy_name": "foliage_proxy",
346
+ "task_name": "foliage",
347
+ "task_id": 0,
348
+ "stress_slice": "camera_perturbation",
349
+ "difficulty_bin": "hard",
350
+ "episode_index": 38,
351
+ "seed": 6004
352
+ },
353
+ {
354
+ "proxy_name": "foliage_proxy",
355
+ "task_name": "foliage",
356
+ "task_id": 0,
357
+ "stress_slice": "tight_corridor_high_collateral",
358
+ "difficulty_bin": "hard",
359
+ "episode_index": 39,
360
+ "seed": 7004
361
+ },
362
+ {
363
+ "proxy_name": "foliage_proxy",
364
+ "task_name": "foliage",
365
+ "task_id": 0,
366
+ "stress_slice": "nominal",
367
+ "difficulty_bin": "medium",
368
+ "episode_index": 40,
369
+ "seed": 5
370
+ },
371
+ {
372
+ "proxy_name": "foliage_proxy",
373
+ "task_name": "foliage",
374
+ "task_id": 0,
375
+ "stress_slice": "high_reocclusion",
376
+ "difficulty_bin": "medium",
377
+ "episode_index": 41,
378
+ "seed": 1005
379
+ },
380
+ {
381
+ "proxy_name": "foliage_proxy",
382
+ "task_name": "foliage",
383
+ "task_id": 0,
384
+ "stress_slice": "nominal",
385
+ "difficulty_bin": "hard",
386
+ "episode_index": 42,
387
+ "seed": 2005
388
+ },
389
+ {
390
+ "proxy_name": "foliage_proxy",
391
+ "task_name": "foliage",
392
+ "task_id": 0,
393
+ "stress_slice": "high_reocclusion",
394
+ "difficulty_bin": "hard",
395
+ "episode_index": 43,
396
+ "seed": 3005
397
+ },
398
+ {
399
+ "proxy_name": "foliage_proxy",
400
+ "task_name": "foliage",
401
+ "task_id": 0,
402
+ "stress_slice": "camera_perturbation",
403
+ "difficulty_bin": "medium",
404
+ "episode_index": 44,
405
+ "seed": 4005
406
+ },
407
+ {
408
+ "proxy_name": "foliage_proxy",
409
+ "task_name": "foliage",
410
+ "task_id": 0,
411
+ "stress_slice": "tight_corridor_high_collateral",
412
+ "difficulty_bin": "medium",
413
+ "episode_index": 45,
414
+ "seed": 5005
415
+ },
416
+ {
417
+ "proxy_name": "foliage_proxy",
418
+ "task_name": "foliage",
419
+ "task_id": 0,
420
+ "stress_slice": "camera_perturbation",
421
+ "difficulty_bin": "hard",
422
+ "episode_index": 46,
423
+ "seed": 6005
424
+ },
425
+ {
426
+ "proxy_name": "foliage_proxy",
427
+ "task_name": "foliage",
428
+ "task_id": 0,
429
+ "stress_slice": "tight_corridor_high_collateral",
430
+ "difficulty_bin": "hard",
431
+ "episode_index": 47,
432
+ "seed": 7005
433
+ },
434
+ {
435
+ "proxy_name": "foliage_proxy",
436
+ "task_name": "foliage",
437
+ "task_id": 0,
438
+ "stress_slice": "nominal",
439
+ "difficulty_bin": "medium",
440
+ "episode_index": 48,
441
+ "seed": 6
442
+ },
443
+ {
444
+ "proxy_name": "foliage_proxy",
445
+ "task_name": "foliage",
446
+ "task_id": 0,
447
+ "stress_slice": "high_reocclusion",
448
+ "difficulty_bin": "medium",
449
+ "episode_index": 49,
450
+ "seed": 1006
451
+ },
452
+ {
453
+ "proxy_name": "foliage_proxy",
454
+ "task_name": "foliage",
455
+ "task_id": 0,
456
+ "stress_slice": "nominal",
457
+ "difficulty_bin": "hard",
458
+ "episode_index": 50,
459
+ "seed": 2006
460
+ },
461
+ {
462
+ "proxy_name": "foliage_proxy",
463
+ "task_name": "foliage",
464
+ "task_id": 0,
465
+ "stress_slice": "high_reocclusion",
466
+ "difficulty_bin": "hard",
467
+ "episode_index": 51,
468
+ "seed": 3006
469
+ },
470
+ {
471
+ "proxy_name": "foliage_proxy",
472
+ "task_name": "foliage",
473
+ "task_id": 0,
474
+ "stress_slice": "camera_perturbation",
475
+ "difficulty_bin": "medium",
476
+ "episode_index": 52,
477
+ "seed": 4006
478
+ },
479
+ {
480
+ "proxy_name": "foliage_proxy",
481
+ "task_name": "foliage",
482
+ "task_id": 0,
483
+ "stress_slice": "tight_corridor_high_collateral",
484
+ "difficulty_bin": "medium",
485
+ "episode_index": 53,
486
+ "seed": 5006
487
+ },
488
+ {
489
+ "proxy_name": "foliage_proxy",
490
+ "task_name": "foliage",
491
+ "task_id": 0,
492
+ "stress_slice": "camera_perturbation",
493
+ "difficulty_bin": "hard",
494
+ "episode_index": 54,
495
+ "seed": 6006
496
+ },
497
+ {
498
+ "proxy_name": "foliage_proxy",
499
+ "task_name": "foliage",
500
+ "task_id": 0,
501
+ "stress_slice": "tight_corridor_high_collateral",
502
+ "difficulty_bin": "hard",
503
+ "episode_index": 55,
504
+ "seed": 7006
505
+ },
506
+ {
507
+ "proxy_name": "foliage_proxy",
508
+ "task_name": "foliage",
509
+ "task_id": 0,
510
+ "stress_slice": "nominal",
511
+ "difficulty_bin": "medium",
512
+ "episode_index": 56,
513
+ "seed": 7
514
+ },
515
+ {
516
+ "proxy_name": "foliage_proxy",
517
+ "task_name": "foliage",
518
+ "task_id": 0,
519
+ "stress_slice": "high_reocclusion",
520
+ "difficulty_bin": "medium",
521
+ "episode_index": 57,
522
+ "seed": 1007
523
+ },
524
+ {
525
+ "proxy_name": "foliage_proxy",
526
+ "task_name": "foliage",
527
+ "task_id": 0,
528
+ "stress_slice": "nominal",
529
+ "difficulty_bin": "hard",
530
+ "episode_index": 58,
531
+ "seed": 2007
532
+ },
533
+ {
534
+ "proxy_name": "foliage_proxy",
535
+ "task_name": "foliage",
536
+ "task_id": 0,
537
+ "stress_slice": "high_reocclusion",
538
+ "difficulty_bin": "hard",
539
+ "episode_index": 59,
540
+ "seed": 3007
541
+ },
542
+ {
543
+ "proxy_name": "foliage_proxy",
544
+ "task_name": "foliage",
545
+ "task_id": 0,
546
+ "stress_slice": "camera_perturbation",
547
+ "difficulty_bin": "medium",
548
+ "episode_index": 60,
549
+ "seed": 4007
550
+ },
551
+ {
552
+ "proxy_name": "foliage_proxy",
553
+ "task_name": "foliage",
554
+ "task_id": 0,
555
+ "stress_slice": "tight_corridor_high_collateral",
556
+ "difficulty_bin": "medium",
557
+ "episode_index": 61,
558
+ "seed": 5007
559
+ },
560
+ {
561
+ "proxy_name": "foliage_proxy",
562
+ "task_name": "foliage",
563
+ "task_id": 0,
564
+ "stress_slice": "camera_perturbation",
565
+ "difficulty_bin": "hard",
566
+ "episode_index": 62,
567
+ "seed": 6007
568
+ },
569
+ {
570
+ "proxy_name": "foliage_proxy",
571
+ "task_name": "foliage",
572
+ "task_id": 0,
573
+ "stress_slice": "tight_corridor_high_collateral",
574
+ "difficulty_bin": "hard",
575
+ "episode_index": 63,
576
+ "seed": 7007
577
+ },
578
+ {
579
+ "proxy_name": "foliage_proxy",
580
+ "task_name": "foliage",
581
+ "task_id": 0,
582
+ "stress_slice": "nominal",
583
+ "difficulty_bin": "medium",
584
+ "episode_index": 64,
585
+ "seed": 8
586
+ },
587
+ {
588
+ "proxy_name": "foliage_proxy",
589
+ "task_name": "foliage",
590
+ "task_id": 0,
591
+ "stress_slice": "high_reocclusion",
592
+ "difficulty_bin": "medium",
593
+ "episode_index": 65,
594
+ "seed": 1008
595
+ },
596
+ {
597
+ "proxy_name": "foliage_proxy",
598
+ "task_name": "foliage",
599
+ "task_id": 0,
600
+ "stress_slice": "nominal",
601
+ "difficulty_bin": "hard",
602
+ "episode_index": 66,
603
+ "seed": 2008
604
+ },
605
+ {
606
+ "proxy_name": "foliage_proxy",
607
+ "task_name": "foliage",
608
+ "task_id": 0,
609
+ "stress_slice": "high_reocclusion",
610
+ "difficulty_bin": "hard",
611
+ "episode_index": 67,
612
+ "seed": 3008
613
+ },
614
+ {
615
+ "proxy_name": "foliage_proxy",
616
+ "task_name": "foliage",
617
+ "task_id": 0,
618
+ "stress_slice": "camera_perturbation",
619
+ "difficulty_bin": "medium",
620
+ "episode_index": 68,
621
+ "seed": 4008
622
+ },
623
+ {
624
+ "proxy_name": "foliage_proxy",
625
+ "task_name": "foliage",
626
+ "task_id": 0,
627
+ "stress_slice": "tight_corridor_high_collateral",
628
+ "difficulty_bin": "medium",
629
+ "episode_index": 69,
630
+ "seed": 5008
631
+ },
632
+ {
633
+ "proxy_name": "foliage_proxy",
634
+ "task_name": "foliage",
635
+ "task_id": 0,
636
+ "stress_slice": "camera_perturbation",
637
+ "difficulty_bin": "hard",
638
+ "episode_index": 70,
639
+ "seed": 6008
640
+ },
641
+ {
642
+ "proxy_name": "foliage_proxy",
643
+ "task_name": "foliage",
644
+ "task_id": 0,
645
+ "stress_slice": "tight_corridor_high_collateral",
646
+ "difficulty_bin": "hard",
647
+ "episode_index": 71,
648
+ "seed": 7008
649
+ },
650
+ {
651
+ "proxy_name": "foliage_proxy",
652
+ "task_name": "foliage",
653
+ "task_id": 0,
654
+ "stress_slice": "nominal",
655
+ "difficulty_bin": "medium",
656
+ "episode_index": 72,
657
+ "seed": 9
658
+ },
659
+ {
660
+ "proxy_name": "foliage_proxy",
661
+ "task_name": "foliage",
662
+ "task_id": 0,
663
+ "stress_slice": "high_reocclusion",
664
+ "difficulty_bin": "medium",
665
+ "episode_index": 73,
666
+ "seed": 1009
667
+ },
668
+ {
669
+ "proxy_name": "foliage_proxy",
670
+ "task_name": "foliage",
671
+ "task_id": 0,
672
+ "stress_slice": "nominal",
673
+ "difficulty_bin": "hard",
674
+ "episode_index": 74,
675
+ "seed": 2009
676
+ },
677
+ {
678
+ "proxy_name": "foliage_proxy",
679
+ "task_name": "foliage",
680
+ "task_id": 0,
681
+ "stress_slice": "high_reocclusion",
682
+ "difficulty_bin": "hard",
683
+ "episode_index": 75,
684
+ "seed": 3009
685
+ },
686
+ {
687
+ "proxy_name": "foliage_proxy",
688
+ "task_name": "foliage",
689
+ "task_id": 0,
690
+ "stress_slice": "camera_perturbation",
691
+ "difficulty_bin": "medium",
692
+ "episode_index": 76,
693
+ "seed": 4009
694
+ },
695
+ {
696
+ "proxy_name": "foliage_proxy",
697
+ "task_name": "foliage",
698
+ "task_id": 0,
699
+ "stress_slice": "tight_corridor_high_collateral",
700
+ "difficulty_bin": "medium",
701
+ "episode_index": 77,
702
+ "seed": 5009
703
+ },
704
+ {
705
+ "proxy_name": "foliage_proxy",
706
+ "task_name": "foliage",
707
+ "task_id": 0,
708
+ "stress_slice": "camera_perturbation",
709
+ "difficulty_bin": "hard",
710
+ "episode_index": 78,
711
+ "seed": 6009
712
+ },
713
+ {
714
+ "proxy_name": "foliage_proxy",
715
+ "task_name": "foliage",
716
+ "task_id": 0,
717
+ "stress_slice": "tight_corridor_high_collateral",
718
+ "difficulty_bin": "hard",
719
+ "episode_index": 79,
720
+ "seed": 7009
721
+ },
722
+ {
723
+ "proxy_name": "foliage_proxy",
724
+ "task_name": "foliage",
725
+ "task_id": 0,
726
+ "stress_slice": "nominal",
727
+ "difficulty_bin": "medium",
728
+ "episode_index": 80,
729
+ "seed": 10
730
+ },
731
+ {
732
+ "proxy_name": "foliage_proxy",
733
+ "task_name": "foliage",
734
+ "task_id": 0,
735
+ "stress_slice": "nominal",
736
+ "difficulty_bin": "hard",
737
+ "episode_index": 81,
738
+ "seed": 2010
739
+ },
740
+ {
741
+ "proxy_name": "foliage_proxy",
742
+ "task_name": "foliage",
743
+ "task_id": 0,
744
+ "stress_slice": "nominal",
745
+ "difficulty_bin": "medium",
746
+ "episode_index": 82,
747
+ "seed": 11
748
+ },
749
+ {
750
+ "proxy_name": "foliage_proxy",
751
+ "task_name": "foliage",
752
+ "task_id": 0,
753
+ "stress_slice": "nominal",
754
+ "difficulty_bin": "hard",
755
+ "episode_index": 83,
756
+ "seed": 2011
757
+ },
758
+ {
759
+ "proxy_name": "foliage_proxy",
760
+ "task_name": "foliage",
761
+ "task_id": 0,
762
+ "stress_slice": "nominal",
763
+ "difficulty_bin": "medium",
764
+ "episode_index": 84,
765
+ "seed": 12
766
+ },
767
+ {
768
+ "proxy_name": "foliage_proxy",
769
+ "task_name": "foliage",
770
+ "task_id": 0,
771
+ "stress_slice": "nominal",
772
+ "difficulty_bin": "hard",
773
+ "episode_index": 85,
774
+ "seed": 2012
775
+ },
776
+ {
777
+ "proxy_name": "foliage_proxy",
778
+ "task_name": "foliage",
779
+ "task_id": 0,
780
+ "stress_slice": "nominal",
781
+ "difficulty_bin": "medium",
782
+ "episode_index": 86,
783
+ "seed": 13
784
+ },
785
+ {
786
+ "proxy_name": "foliage_proxy",
787
+ "task_name": "foliage",
788
+ "task_id": 0,
789
+ "stress_slice": "nominal",
790
+ "difficulty_bin": "hard",
791
+ "episode_index": 87,
792
+ "seed": 2013
793
+ },
794
+ {
795
+ "proxy_name": "foliage_proxy",
796
+ "task_name": "foliage",
797
+ "task_id": 0,
798
+ "stress_slice": "nominal",
799
+ "difficulty_bin": "medium",
800
+ "episode_index": 88,
801
+ "seed": 14
802
+ },
803
+ {
804
+ "proxy_name": "foliage_proxy",
805
+ "task_name": "foliage",
806
+ "task_id": 0,
807
+ "stress_slice": "nominal",
808
+ "difficulty_bin": "hard",
809
+ "episode_index": 89,
810
+ "seed": 2014
811
+ },
812
+ {
813
+ "proxy_name": "foliage_proxy",
814
+ "task_name": "foliage",
815
+ "task_id": 0,
816
+ "stress_slice": "nominal",
817
+ "difficulty_bin": "medium",
818
+ "episode_index": 90,
819
+ "seed": 15
820
+ },
821
+ {
822
+ "proxy_name": "foliage_proxy",
823
+ "task_name": "foliage",
824
+ "task_id": 0,
825
+ "stress_slice": "nominal",
826
+ "difficulty_bin": "hard",
827
+ "episode_index": 91,
828
+ "seed": 2015
829
+ },
830
+ {
831
+ "proxy_name": "foliage_proxy",
832
+ "task_name": "foliage",
833
+ "task_id": 0,
834
+ "stress_slice": "nominal",
835
+ "difficulty_bin": "medium",
836
+ "episode_index": 92,
837
+ "seed": 16
838
+ },
839
+ {
840
+ "proxy_name": "foliage_proxy",
841
+ "task_name": "foliage",
842
+ "task_id": 0,
843
+ "stress_slice": "nominal",
844
+ "difficulty_bin": "hard",
845
+ "episode_index": 93,
846
+ "seed": 2016
847
+ },
848
+ {
849
+ "proxy_name": "foliage_proxy",
850
+ "task_name": "foliage",
851
+ "task_id": 0,
852
+ "stress_slice": "nominal",
853
+ "difficulty_bin": "medium",
854
+ "episode_index": 94,
855
+ "seed": 17
856
+ },
857
+ {
858
+ "proxy_name": "foliage_proxy",
859
+ "task_name": "foliage",
860
+ "task_id": 0,
861
+ "stress_slice": "nominal",
862
+ "difficulty_bin": "hard",
863
+ "episode_index": 95,
864
+ "seed": 2017
865
+ },
866
+ {
867
+ "proxy_name": "foliage_proxy",
868
+ "task_name": "foliage",
869
+ "task_id": 0,
870
+ "stress_slice": "nominal",
871
+ "difficulty_bin": "medium",
872
+ "episode_index": 96,
873
+ "seed": 18
874
+ },
875
+ {
876
+ "proxy_name": "foliage_proxy",
877
+ "task_name": "foliage",
878
+ "task_id": 0,
879
+ "stress_slice": "nominal",
880
+ "difficulty_bin": "hard",
881
+ "episode_index": 97,
882
+ "seed": 2018
883
+ },
884
+ {
885
+ "proxy_name": "foliage_proxy",
886
+ "task_name": "foliage",
887
+ "task_id": 0,
888
+ "stress_slice": "nominal",
889
+ "difficulty_bin": "medium",
890
+ "episode_index": 98,
891
+ "seed": 19
892
+ },
893
+ {
894
+ "proxy_name": "foliage_proxy",
895
+ "task_name": "foliage",
896
+ "task_id": 0,
897
+ "stress_slice": "nominal",
898
+ "difficulty_bin": "hard",
899
+ "episode_index": 99,
900
+ "seed": 2019
901
+ },
902
+ {
903
+ "proxy_name": "bag_proxy",
904
+ "task_name": "bag",
905
+ "task_id": 1,
906
+ "stress_slice": "nominal",
907
+ "difficulty_bin": "medium",
908
+ "episode_index": 0,
909
+ "seed": 100000
910
+ },
911
+ {
912
+ "proxy_name": "bag_proxy",
913
+ "task_name": "bag",
914
+ "task_id": 1,
915
+ "stress_slice": "high_reocclusion",
916
+ "difficulty_bin": "medium",
917
+ "episode_index": 1,
918
+ "seed": 101000
919
+ },
920
+ {
921
+ "proxy_name": "bag_proxy",
922
+ "task_name": "bag",
923
+ "task_id": 1,
924
+ "stress_slice": "nominal",
925
+ "difficulty_bin": "hard",
926
+ "episode_index": 2,
927
+ "seed": 102000
928
+ },
929
+ {
930
+ "proxy_name": "bag_proxy",
931
+ "task_name": "bag",
932
+ "task_id": 1,
933
+ "stress_slice": "high_reocclusion",
934
+ "difficulty_bin": "hard",
935
+ "episode_index": 3,
936
+ "seed": 103000
937
+ },
938
+ {
939
+ "proxy_name": "bag_proxy",
940
+ "task_name": "bag",
941
+ "task_id": 1,
942
+ "stress_slice": "camera_perturbation",
943
+ "difficulty_bin": "medium",
944
+ "episode_index": 4,
945
+ "seed": 104000
946
+ },
947
+ {
948
+ "proxy_name": "bag_proxy",
949
+ "task_name": "bag",
950
+ "task_id": 1,
951
+ "stress_slice": "one_sided_slip",
952
+ "difficulty_bin": "medium",
953
+ "episode_index": 5,
954
+ "seed": 105000
955
+ },
956
+ {
957
+ "proxy_name": "bag_proxy",
958
+ "task_name": "bag",
959
+ "task_id": 1,
960
+ "stress_slice": "camera_perturbation",
961
+ "difficulty_bin": "hard",
962
+ "episode_index": 6,
963
+ "seed": 106000
964
+ },
965
+ {
966
+ "proxy_name": "bag_proxy",
967
+ "task_name": "bag",
968
+ "task_id": 1,
969
+ "stress_slice": "one_sided_slip",
970
+ "difficulty_bin": "hard",
971
+ "episode_index": 7,
972
+ "seed": 107000
973
+ },
974
+ {
975
+ "proxy_name": "bag_proxy",
976
+ "task_name": "bag",
977
+ "task_id": 1,
978
+ "stress_slice": "nominal",
979
+ "difficulty_bin": "medium",
980
+ "episode_index": 8,
981
+ "seed": 100001
982
+ },
983
+ {
984
+ "proxy_name": "bag_proxy",
985
+ "task_name": "bag",
986
+ "task_id": 1,
987
+ "stress_slice": "high_reocclusion",
988
+ "difficulty_bin": "medium",
989
+ "episode_index": 9,
990
+ "seed": 101001
991
+ },
992
+ {
993
+ "proxy_name": "bag_proxy",
994
+ "task_name": "bag",
995
+ "task_id": 1,
996
+ "stress_slice": "nominal",
997
+ "difficulty_bin": "hard",
998
+ "episode_index": 10,
999
+ "seed": 102001
1000
+ },
1001
+ {
1002
+ "proxy_name": "bag_proxy",
1003
+ "task_name": "bag",
1004
+ "task_id": 1,
1005
+ "stress_slice": "high_reocclusion",
1006
+ "difficulty_bin": "hard",
1007
+ "episode_index": 11,
1008
+ "seed": 103001
1009
+ },
1010
+ {
1011
+ "proxy_name": "bag_proxy",
1012
+ "task_name": "bag",
1013
+ "task_id": 1,
1014
+ "stress_slice": "camera_perturbation",
1015
+ "difficulty_bin": "medium",
1016
+ "episode_index": 12,
1017
+ "seed": 104001
1018
+ },
1019
+ {
1020
+ "proxy_name": "bag_proxy",
1021
+ "task_name": "bag",
1022
+ "task_id": 1,
1023
+ "stress_slice": "one_sided_slip",
1024
+ "difficulty_bin": "medium",
1025
+ "episode_index": 13,
1026
+ "seed": 105001
1027
+ },
1028
+ {
1029
+ "proxy_name": "bag_proxy",
1030
+ "task_name": "bag",
1031
+ "task_id": 1,
1032
+ "stress_slice": "camera_perturbation",
1033
+ "difficulty_bin": "hard",
1034
+ "episode_index": 14,
1035
+ "seed": 106001
1036
+ },
1037
+ {
1038
+ "proxy_name": "bag_proxy",
1039
+ "task_name": "bag",
1040
+ "task_id": 1,
1041
+ "stress_slice": "one_sided_slip",
1042
+ "difficulty_bin": "hard",
1043
+ "episode_index": 15,
1044
+ "seed": 107001
1045
+ },
1046
+ {
1047
+ "proxy_name": "bag_proxy",
1048
+ "task_name": "bag",
1049
+ "task_id": 1,
1050
+ "stress_slice": "nominal",
1051
+ "difficulty_bin": "medium",
1052
+ "episode_index": 16,
1053
+ "seed": 100002
1054
+ },
1055
+ {
1056
+ "proxy_name": "bag_proxy",
1057
+ "task_name": "bag",
1058
+ "task_id": 1,
1059
+ "stress_slice": "high_reocclusion",
1060
+ "difficulty_bin": "medium",
1061
+ "episode_index": 17,
1062
+ "seed": 101002
1063
+ },
1064
+ {
1065
+ "proxy_name": "bag_proxy",
1066
+ "task_name": "bag",
1067
+ "task_id": 1,
1068
+ "stress_slice": "nominal",
1069
+ "difficulty_bin": "hard",
1070
+ "episode_index": 18,
1071
+ "seed": 102002
1072
+ },
1073
+ {
1074
+ "proxy_name": "bag_proxy",
1075
+ "task_name": "bag",
1076
+ "task_id": 1,
1077
+ "stress_slice": "high_reocclusion",
1078
+ "difficulty_bin": "hard",
1079
+ "episode_index": 19,
1080
+ "seed": 103002
1081
+ },
1082
+ {
1083
+ "proxy_name": "bag_proxy",
1084
+ "task_name": "bag",
1085
+ "task_id": 1,
1086
+ "stress_slice": "camera_perturbation",
1087
+ "difficulty_bin": "medium",
1088
+ "episode_index": 20,
1089
+ "seed": 104002
1090
+ },
1091
+ {
1092
+ "proxy_name": "bag_proxy",
1093
+ "task_name": "bag",
1094
+ "task_id": 1,
1095
+ "stress_slice": "one_sided_slip",
1096
+ "difficulty_bin": "medium",
1097
+ "episode_index": 21,
1098
+ "seed": 105002
1099
+ },
1100
+ {
1101
+ "proxy_name": "bag_proxy",
1102
+ "task_name": "bag",
1103
+ "task_id": 1,
1104
+ "stress_slice": "camera_perturbation",
1105
+ "difficulty_bin": "hard",
1106
+ "episode_index": 22,
1107
+ "seed": 106002
1108
+ },
1109
+ {
1110
+ "proxy_name": "bag_proxy",
1111
+ "task_name": "bag",
1112
+ "task_id": 1,
1113
+ "stress_slice": "one_sided_slip",
1114
+ "difficulty_bin": "hard",
1115
+ "episode_index": 23,
1116
+ "seed": 107002
1117
+ },
1118
+ {
1119
+ "proxy_name": "bag_proxy",
1120
+ "task_name": "bag",
1121
+ "task_id": 1,
1122
+ "stress_slice": "nominal",
1123
+ "difficulty_bin": "medium",
1124
+ "episode_index": 24,
1125
+ "seed": 100003
1126
+ },
1127
+ {
1128
+ "proxy_name": "bag_proxy",
1129
+ "task_name": "bag",
1130
+ "task_id": 1,
1131
+ "stress_slice": "high_reocclusion",
1132
+ "difficulty_bin": "medium",
1133
+ "episode_index": 25,
1134
+ "seed": 101003
1135
+ },
1136
+ {
1137
+ "proxy_name": "bag_proxy",
1138
+ "task_name": "bag",
1139
+ "task_id": 1,
1140
+ "stress_slice": "nominal",
1141
+ "difficulty_bin": "hard",
1142
+ "episode_index": 26,
1143
+ "seed": 102003
1144
+ },
1145
+ {
1146
+ "proxy_name": "bag_proxy",
1147
+ "task_name": "bag",
1148
+ "task_id": 1,
1149
+ "stress_slice": "high_reocclusion",
1150
+ "difficulty_bin": "hard",
1151
+ "episode_index": 27,
1152
+ "seed": 103003
1153
+ },
1154
+ {
1155
+ "proxy_name": "bag_proxy",
1156
+ "task_name": "bag",
1157
+ "task_id": 1,
1158
+ "stress_slice": "camera_perturbation",
1159
+ "difficulty_bin": "medium",
1160
+ "episode_index": 28,
1161
+ "seed": 104003
1162
+ },
1163
+ {
1164
+ "proxy_name": "bag_proxy",
1165
+ "task_name": "bag",
1166
+ "task_id": 1,
1167
+ "stress_slice": "one_sided_slip",
1168
+ "difficulty_bin": "medium",
1169
+ "episode_index": 29,
1170
+ "seed": 105003
1171
+ },
1172
+ {
1173
+ "proxy_name": "bag_proxy",
1174
+ "task_name": "bag",
1175
+ "task_id": 1,
1176
+ "stress_slice": "camera_perturbation",
1177
+ "difficulty_bin": "hard",
1178
+ "episode_index": 30,
1179
+ "seed": 106003
1180
+ },
1181
+ {
1182
+ "proxy_name": "bag_proxy",
1183
+ "task_name": "bag",
1184
+ "task_id": 1,
1185
+ "stress_slice": "one_sided_slip",
1186
+ "difficulty_bin": "hard",
1187
+ "episode_index": 31,
1188
+ "seed": 107003
1189
+ },
1190
+ {
1191
+ "proxy_name": "bag_proxy",
1192
+ "task_name": "bag",
1193
+ "task_id": 1,
1194
+ "stress_slice": "nominal",
1195
+ "difficulty_bin": "medium",
1196
+ "episode_index": 32,
1197
+ "seed": 100004
1198
+ },
1199
+ {
1200
+ "proxy_name": "bag_proxy",
1201
+ "task_name": "bag",
1202
+ "task_id": 1,
1203
+ "stress_slice": "high_reocclusion",
1204
+ "difficulty_bin": "medium",
1205
+ "episode_index": 33,
1206
+ "seed": 101004
1207
+ },
1208
+ {
1209
+ "proxy_name": "bag_proxy",
1210
+ "task_name": "bag",
1211
+ "task_id": 1,
1212
+ "stress_slice": "nominal",
1213
+ "difficulty_bin": "hard",
1214
+ "episode_index": 34,
1215
+ "seed": 102004
1216
+ },
1217
+ {
1218
+ "proxy_name": "bag_proxy",
1219
+ "task_name": "bag",
1220
+ "task_id": 1,
1221
+ "stress_slice": "high_reocclusion",
1222
+ "difficulty_bin": "hard",
1223
+ "episode_index": 35,
1224
+ "seed": 103004
1225
+ },
1226
+ {
1227
+ "proxy_name": "bag_proxy",
1228
+ "task_name": "bag",
1229
+ "task_id": 1,
1230
+ "stress_slice": "camera_perturbation",
1231
+ "difficulty_bin": "medium",
1232
+ "episode_index": 36,
1233
+ "seed": 104004
1234
+ },
1235
+ {
1236
+ "proxy_name": "bag_proxy",
1237
+ "task_name": "bag",
1238
+ "task_id": 1,
1239
+ "stress_slice": "one_sided_slip",
1240
+ "difficulty_bin": "medium",
1241
+ "episode_index": 37,
1242
+ "seed": 105004
1243
+ },
1244
+ {
1245
+ "proxy_name": "bag_proxy",
1246
+ "task_name": "bag",
1247
+ "task_id": 1,
1248
+ "stress_slice": "camera_perturbation",
1249
+ "difficulty_bin": "hard",
1250
+ "episode_index": 38,
1251
+ "seed": 106004
1252
+ },
1253
+ {
1254
+ "proxy_name": "bag_proxy",
1255
+ "task_name": "bag",
1256
+ "task_id": 1,
1257
+ "stress_slice": "one_sided_slip",
1258
+ "difficulty_bin": "hard",
1259
+ "episode_index": 39,
1260
+ "seed": 107004
1261
+ },
1262
+ {
1263
+ "proxy_name": "bag_proxy",
1264
+ "task_name": "bag",
1265
+ "task_id": 1,
1266
+ "stress_slice": "nominal",
1267
+ "difficulty_bin": "medium",
1268
+ "episode_index": 40,
1269
+ "seed": 100005
1270
+ },
1271
+ {
1272
+ "proxy_name": "bag_proxy",
1273
+ "task_name": "bag",
1274
+ "task_id": 1,
1275
+ "stress_slice": "high_reocclusion",
1276
+ "difficulty_bin": "medium",
1277
+ "episode_index": 41,
1278
+ "seed": 101005
1279
+ },
1280
+ {
1281
+ "proxy_name": "bag_proxy",
1282
+ "task_name": "bag",
1283
+ "task_id": 1,
1284
+ "stress_slice": "nominal",
1285
+ "difficulty_bin": "hard",
1286
+ "episode_index": 42,
1287
+ "seed": 102005
1288
+ },
1289
+ {
1290
+ "proxy_name": "bag_proxy",
1291
+ "task_name": "bag",
1292
+ "task_id": 1,
1293
+ "stress_slice": "high_reocclusion",
1294
+ "difficulty_bin": "hard",
1295
+ "episode_index": 43,
1296
+ "seed": 103005
1297
+ },
1298
+ {
1299
+ "proxy_name": "bag_proxy",
1300
+ "task_name": "bag",
1301
+ "task_id": 1,
1302
+ "stress_slice": "camera_perturbation",
1303
+ "difficulty_bin": "medium",
1304
+ "episode_index": 44,
1305
+ "seed": 104005
1306
+ },
1307
+ {
1308
+ "proxy_name": "bag_proxy",
1309
+ "task_name": "bag",
1310
+ "task_id": 1,
1311
+ "stress_slice": "one_sided_slip",
1312
+ "difficulty_bin": "medium",
1313
+ "episode_index": 45,
1314
+ "seed": 105005
1315
+ },
1316
+ {
1317
+ "proxy_name": "bag_proxy",
1318
+ "task_name": "bag",
1319
+ "task_id": 1,
1320
+ "stress_slice": "camera_perturbation",
1321
+ "difficulty_bin": "hard",
1322
+ "episode_index": 46,
1323
+ "seed": 106005
1324
+ },
1325
+ {
1326
+ "proxy_name": "bag_proxy",
1327
+ "task_name": "bag",
1328
+ "task_id": 1,
1329
+ "stress_slice": "one_sided_slip",
1330
+ "difficulty_bin": "hard",
1331
+ "episode_index": 47,
1332
+ "seed": 107005
1333
+ },
1334
+ {
1335
+ "proxy_name": "bag_proxy",
1336
+ "task_name": "bag",
1337
+ "task_id": 1,
1338
+ "stress_slice": "nominal",
1339
+ "difficulty_bin": "medium",
1340
+ "episode_index": 48,
1341
+ "seed": 100006
1342
+ },
1343
+ {
1344
+ "proxy_name": "bag_proxy",
1345
+ "task_name": "bag",
1346
+ "task_id": 1,
1347
+ "stress_slice": "high_reocclusion",
1348
+ "difficulty_bin": "medium",
1349
+ "episode_index": 49,
1350
+ "seed": 101006
1351
+ },
1352
+ {
1353
+ "proxy_name": "bag_proxy",
1354
+ "task_name": "bag",
1355
+ "task_id": 1,
1356
+ "stress_slice": "nominal",
1357
+ "difficulty_bin": "hard",
1358
+ "episode_index": 50,
1359
+ "seed": 102006
1360
+ },
1361
+ {
1362
+ "proxy_name": "bag_proxy",
1363
+ "task_name": "bag",
1364
+ "task_id": 1,
1365
+ "stress_slice": "high_reocclusion",
1366
+ "difficulty_bin": "hard",
1367
+ "episode_index": 51,
1368
+ "seed": 103006
1369
+ },
1370
+ {
1371
+ "proxy_name": "bag_proxy",
1372
+ "task_name": "bag",
1373
+ "task_id": 1,
1374
+ "stress_slice": "camera_perturbation",
1375
+ "difficulty_bin": "medium",
1376
+ "episode_index": 52,
1377
+ "seed": 104006
1378
+ },
1379
+ {
1380
+ "proxy_name": "bag_proxy",
1381
+ "task_name": "bag",
1382
+ "task_id": 1,
1383
+ "stress_slice": "one_sided_slip",
1384
+ "difficulty_bin": "medium",
1385
+ "episode_index": 53,
1386
+ "seed": 105006
1387
+ },
1388
+ {
1389
+ "proxy_name": "bag_proxy",
1390
+ "task_name": "bag",
1391
+ "task_id": 1,
1392
+ "stress_slice": "camera_perturbation",
1393
+ "difficulty_bin": "hard",
1394
+ "episode_index": 54,
1395
+ "seed": 106006
1396
+ },
1397
+ {
1398
+ "proxy_name": "bag_proxy",
1399
+ "task_name": "bag",
1400
+ "task_id": 1,
1401
+ "stress_slice": "one_sided_slip",
1402
+ "difficulty_bin": "hard",
1403
+ "episode_index": 55,
1404
+ "seed": 107006
1405
+ },
1406
+ {
1407
+ "proxy_name": "bag_proxy",
1408
+ "task_name": "bag",
1409
+ "task_id": 1,
1410
+ "stress_slice": "nominal",
1411
+ "difficulty_bin": "medium",
1412
+ "episode_index": 56,
1413
+ "seed": 100007
1414
+ },
1415
+ {
1416
+ "proxy_name": "bag_proxy",
1417
+ "task_name": "bag",
1418
+ "task_id": 1,
1419
+ "stress_slice": "high_reocclusion",
1420
+ "difficulty_bin": "medium",
1421
+ "episode_index": 57,
1422
+ "seed": 101007
1423
+ },
1424
+ {
1425
+ "proxy_name": "bag_proxy",
1426
+ "task_name": "bag",
1427
+ "task_id": 1,
1428
+ "stress_slice": "nominal",
1429
+ "difficulty_bin": "hard",
1430
+ "episode_index": 58,
1431
+ "seed": 102007
1432
+ },
1433
+ {
1434
+ "proxy_name": "bag_proxy",
1435
+ "task_name": "bag",
1436
+ "task_id": 1,
1437
+ "stress_slice": "high_reocclusion",
1438
+ "difficulty_bin": "hard",
1439
+ "episode_index": 59,
1440
+ "seed": 103007
1441
+ },
1442
+ {
1443
+ "proxy_name": "bag_proxy",
1444
+ "task_name": "bag",
1445
+ "task_id": 1,
1446
+ "stress_slice": "camera_perturbation",
1447
+ "difficulty_bin": "medium",
1448
+ "episode_index": 60,
1449
+ "seed": 104007
1450
+ },
1451
+ {
1452
+ "proxy_name": "bag_proxy",
1453
+ "task_name": "bag",
1454
+ "task_id": 1,
1455
+ "stress_slice": "one_sided_slip",
1456
+ "difficulty_bin": "medium",
1457
+ "episode_index": 61,
1458
+ "seed": 105007
1459
+ },
1460
+ {
1461
+ "proxy_name": "bag_proxy",
1462
+ "task_name": "bag",
1463
+ "task_id": 1,
1464
+ "stress_slice": "camera_perturbation",
1465
+ "difficulty_bin": "hard",
1466
+ "episode_index": 62,
1467
+ "seed": 106007
1468
+ },
1469
+ {
1470
+ "proxy_name": "bag_proxy",
1471
+ "task_name": "bag",
1472
+ "task_id": 1,
1473
+ "stress_slice": "one_sided_slip",
1474
+ "difficulty_bin": "hard",
1475
+ "episode_index": 63,
1476
+ "seed": 107007
1477
+ },
1478
+ {
1479
+ "proxy_name": "bag_proxy",
1480
+ "task_name": "bag",
1481
+ "task_id": 1,
1482
+ "stress_slice": "nominal",
1483
+ "difficulty_bin": "medium",
1484
+ "episode_index": 64,
1485
+ "seed": 100008
1486
+ },
1487
+ {
1488
+ "proxy_name": "bag_proxy",
1489
+ "task_name": "bag",
1490
+ "task_id": 1,
1491
+ "stress_slice": "high_reocclusion",
1492
+ "difficulty_bin": "medium",
1493
+ "episode_index": 65,
1494
+ "seed": 101008
1495
+ },
1496
+ {
1497
+ "proxy_name": "bag_proxy",
1498
+ "task_name": "bag",
1499
+ "task_id": 1,
1500
+ "stress_slice": "nominal",
1501
+ "difficulty_bin": "hard",
1502
+ "episode_index": 66,
1503
+ "seed": 102008
1504
+ },
1505
+ {
1506
+ "proxy_name": "bag_proxy",
1507
+ "task_name": "bag",
1508
+ "task_id": 1,
1509
+ "stress_slice": "high_reocclusion",
1510
+ "difficulty_bin": "hard",
1511
+ "episode_index": 67,
1512
+ "seed": 103008
1513
+ },
1514
+ {
1515
+ "proxy_name": "bag_proxy",
1516
+ "task_name": "bag",
1517
+ "task_id": 1,
1518
+ "stress_slice": "camera_perturbation",
1519
+ "difficulty_bin": "medium",
1520
+ "episode_index": 68,
1521
+ "seed": 104008
1522
+ },
1523
+ {
1524
+ "proxy_name": "bag_proxy",
1525
+ "task_name": "bag",
1526
+ "task_id": 1,
1527
+ "stress_slice": "one_sided_slip",
1528
+ "difficulty_bin": "medium",
1529
+ "episode_index": 69,
1530
+ "seed": 105008
1531
+ },
1532
+ {
1533
+ "proxy_name": "bag_proxy",
1534
+ "task_name": "bag",
1535
+ "task_id": 1,
1536
+ "stress_slice": "camera_perturbation",
1537
+ "difficulty_bin": "hard",
1538
+ "episode_index": 70,
1539
+ "seed": 106008
1540
+ },
1541
+ {
1542
+ "proxy_name": "bag_proxy",
1543
+ "task_name": "bag",
1544
+ "task_id": 1,
1545
+ "stress_slice": "one_sided_slip",
1546
+ "difficulty_bin": "hard",
1547
+ "episode_index": 71,
1548
+ "seed": 107008
1549
+ },
1550
+ {
1551
+ "proxy_name": "bag_proxy",
1552
+ "task_name": "bag",
1553
+ "task_id": 1,
1554
+ "stress_slice": "nominal",
1555
+ "difficulty_bin": "medium",
1556
+ "episode_index": 72,
1557
+ "seed": 100009
1558
+ },
1559
+ {
1560
+ "proxy_name": "bag_proxy",
1561
+ "task_name": "bag",
1562
+ "task_id": 1,
1563
+ "stress_slice": "high_reocclusion",
1564
+ "difficulty_bin": "medium",
1565
+ "episode_index": 73,
1566
+ "seed": 101009
1567
+ },
1568
+ {
1569
+ "proxy_name": "bag_proxy",
1570
+ "task_name": "bag",
1571
+ "task_id": 1,
1572
+ "stress_slice": "nominal",
1573
+ "difficulty_bin": "hard",
1574
+ "episode_index": 74,
1575
+ "seed": 102009
1576
+ },
1577
+ {
1578
+ "proxy_name": "bag_proxy",
1579
+ "task_name": "bag",
1580
+ "task_id": 1,
1581
+ "stress_slice": "high_reocclusion",
1582
+ "difficulty_bin": "hard",
1583
+ "episode_index": 75,
1584
+ "seed": 103009
1585
+ },
1586
+ {
1587
+ "proxy_name": "bag_proxy",
1588
+ "task_name": "bag",
1589
+ "task_id": 1,
1590
+ "stress_slice": "camera_perturbation",
1591
+ "difficulty_bin": "medium",
1592
+ "episode_index": 76,
1593
+ "seed": 104009
1594
+ },
1595
+ {
1596
+ "proxy_name": "bag_proxy",
1597
+ "task_name": "bag",
1598
+ "task_id": 1,
1599
+ "stress_slice": "one_sided_slip",
1600
+ "difficulty_bin": "medium",
1601
+ "episode_index": 77,
1602
+ "seed": 105009
1603
+ },
1604
+ {
1605
+ "proxy_name": "bag_proxy",
1606
+ "task_name": "bag",
1607
+ "task_id": 1,
1608
+ "stress_slice": "camera_perturbation",
1609
+ "difficulty_bin": "hard",
1610
+ "episode_index": 78,
1611
+ "seed": 106009
1612
+ },
1613
+ {
1614
+ "proxy_name": "bag_proxy",
1615
+ "task_name": "bag",
1616
+ "task_id": 1,
1617
+ "stress_slice": "one_sided_slip",
1618
+ "difficulty_bin": "hard",
1619
+ "episode_index": 79,
1620
+ "seed": 107009
1621
+ },
1622
+ {
1623
+ "proxy_name": "bag_proxy",
1624
+ "task_name": "bag",
1625
+ "task_id": 1,
1626
+ "stress_slice": "nominal",
1627
+ "difficulty_bin": "medium",
1628
+ "episode_index": 80,
1629
+ "seed": 100010
1630
+ },
1631
+ {
1632
+ "proxy_name": "bag_proxy",
1633
+ "task_name": "bag",
1634
+ "task_id": 1,
1635
+ "stress_slice": "nominal",
1636
+ "difficulty_bin": "hard",
1637
+ "episode_index": 81,
1638
+ "seed": 102010
1639
+ },
1640
+ {
1641
+ "proxy_name": "bag_proxy",
1642
+ "task_name": "bag",
1643
+ "task_id": 1,
1644
+ "stress_slice": "nominal",
1645
+ "difficulty_bin": "medium",
1646
+ "episode_index": 82,
1647
+ "seed": 100011
1648
+ },
1649
+ {
1650
+ "proxy_name": "bag_proxy",
1651
+ "task_name": "bag",
1652
+ "task_id": 1,
1653
+ "stress_slice": "nominal",
1654
+ "difficulty_bin": "hard",
1655
+ "episode_index": 83,
1656
+ "seed": 102011
1657
+ },
1658
+ {
1659
+ "proxy_name": "bag_proxy",
1660
+ "task_name": "bag",
1661
+ "task_id": 1,
1662
+ "stress_slice": "nominal",
1663
+ "difficulty_bin": "medium",
1664
+ "episode_index": 84,
1665
+ "seed": 100012
1666
+ },
1667
+ {
1668
+ "proxy_name": "bag_proxy",
1669
+ "task_name": "bag",
1670
+ "task_id": 1,
1671
+ "stress_slice": "nominal",
1672
+ "difficulty_bin": "hard",
1673
+ "episode_index": 85,
1674
+ "seed": 102012
1675
+ },
1676
+ {
1677
+ "proxy_name": "bag_proxy",
1678
+ "task_name": "bag",
1679
+ "task_id": 1,
1680
+ "stress_slice": "nominal",
1681
+ "difficulty_bin": "medium",
1682
+ "episode_index": 86,
1683
+ "seed": 100013
1684
+ },
1685
+ {
1686
+ "proxy_name": "bag_proxy",
1687
+ "task_name": "bag",
1688
+ "task_id": 1,
1689
+ "stress_slice": "nominal",
1690
+ "difficulty_bin": "hard",
1691
+ "episode_index": 87,
1692
+ "seed": 102013
1693
+ },
1694
+ {
1695
+ "proxy_name": "bag_proxy",
1696
+ "task_name": "bag",
1697
+ "task_id": 1,
1698
+ "stress_slice": "nominal",
1699
+ "difficulty_bin": "medium",
1700
+ "episode_index": 88,
1701
+ "seed": 100014
1702
+ },
1703
+ {
1704
+ "proxy_name": "bag_proxy",
1705
+ "task_name": "bag",
1706
+ "task_id": 1,
1707
+ "stress_slice": "nominal",
1708
+ "difficulty_bin": "hard",
1709
+ "episode_index": 89,
1710
+ "seed": 102014
1711
+ },
1712
+ {
1713
+ "proxy_name": "bag_proxy",
1714
+ "task_name": "bag",
1715
+ "task_id": 1,
1716
+ "stress_slice": "nominal",
1717
+ "difficulty_bin": "medium",
1718
+ "episode_index": 90,
1719
+ "seed": 100015
1720
+ },
1721
+ {
1722
+ "proxy_name": "bag_proxy",
1723
+ "task_name": "bag",
1724
+ "task_id": 1,
1725
+ "stress_slice": "nominal",
1726
+ "difficulty_bin": "hard",
1727
+ "episode_index": 91,
1728
+ "seed": 102015
1729
+ },
1730
+ {
1731
+ "proxy_name": "bag_proxy",
1732
+ "task_name": "bag",
1733
+ "task_id": 1,
1734
+ "stress_slice": "nominal",
1735
+ "difficulty_bin": "medium",
1736
+ "episode_index": 92,
1737
+ "seed": 100016
1738
+ },
1739
+ {
1740
+ "proxy_name": "bag_proxy",
1741
+ "task_name": "bag",
1742
+ "task_id": 1,
1743
+ "stress_slice": "nominal",
1744
+ "difficulty_bin": "hard",
1745
+ "episode_index": 93,
1746
+ "seed": 102016
1747
+ },
1748
+ {
1749
+ "proxy_name": "bag_proxy",
1750
+ "task_name": "bag",
1751
+ "task_id": 1,
1752
+ "stress_slice": "nominal",
1753
+ "difficulty_bin": "medium",
1754
+ "episode_index": 94,
1755
+ "seed": 100017
1756
+ },
1757
+ {
1758
+ "proxy_name": "bag_proxy",
1759
+ "task_name": "bag",
1760
+ "task_id": 1,
1761
+ "stress_slice": "nominal",
1762
+ "difficulty_bin": "hard",
1763
+ "episode_index": 95,
1764
+ "seed": 102017
1765
+ },
1766
+ {
1767
+ "proxy_name": "bag_proxy",
1768
+ "task_name": "bag",
1769
+ "task_id": 1,
1770
+ "stress_slice": "nominal",
1771
+ "difficulty_bin": "medium",
1772
+ "episode_index": 96,
1773
+ "seed": 100018
1774
+ },
1775
+ {
1776
+ "proxy_name": "bag_proxy",
1777
+ "task_name": "bag",
1778
+ "task_id": 1,
1779
+ "stress_slice": "nominal",
1780
+ "difficulty_bin": "hard",
1781
+ "episode_index": 97,
1782
+ "seed": 102018
1783
+ },
1784
+ {
1785
+ "proxy_name": "bag_proxy",
1786
+ "task_name": "bag",
1787
+ "task_id": 1,
1788
+ "stress_slice": "nominal",
1789
+ "difficulty_bin": "medium",
1790
+ "episode_index": 98,
1791
+ "seed": 100019
1792
+ },
1793
+ {
1794
+ "proxy_name": "bag_proxy",
1795
+ "task_name": "bag",
1796
+ "task_id": 1,
1797
+ "stress_slice": "nominal",
1798
+ "difficulty_bin": "hard",
1799
+ "episode_index": 99,
1800
+ "seed": 102019
1801
+ },
1802
+ {
1803
+ "proxy_name": "cloth_proxy",
1804
+ "task_name": "cloth",
1805
+ "task_id": 2,
1806
+ "stress_slice": "nominal",
1807
+ "difficulty_bin": "medium",
1808
+ "episode_index": 0,
1809
+ "seed": 200000
1810
+ },
1811
+ {
1812
+ "proxy_name": "cloth_proxy",
1813
+ "task_name": "cloth",
1814
+ "task_id": 2,
1815
+ "stress_slice": "high_reocclusion",
1816
+ "difficulty_bin": "medium",
1817
+ "episode_index": 1,
1818
+ "seed": 201000
1819
+ },
1820
+ {
1821
+ "proxy_name": "cloth_proxy",
1822
+ "task_name": "cloth",
1823
+ "task_id": 2,
1824
+ "stress_slice": "nominal",
1825
+ "difficulty_bin": "hard",
1826
+ "episode_index": 2,
1827
+ "seed": 202000
1828
+ },
1829
+ {
1830
+ "proxy_name": "cloth_proxy",
1831
+ "task_name": "cloth",
1832
+ "task_id": 2,
1833
+ "stress_slice": "high_reocclusion",
1834
+ "difficulty_bin": "hard",
1835
+ "episode_index": 3,
1836
+ "seed": 203000
1837
+ },
1838
+ {
1839
+ "proxy_name": "cloth_proxy",
1840
+ "task_name": "cloth",
1841
+ "task_id": 2,
1842
+ "stress_slice": "camera_perturbation",
1843
+ "difficulty_bin": "medium",
1844
+ "episode_index": 4,
1845
+ "seed": 204000
1846
+ },
1847
+ {
1848
+ "proxy_name": "cloth_proxy",
1849
+ "task_name": "cloth",
1850
+ "task_id": 2,
1851
+ "stress_slice": "fold_sensitive_long_persistence",
1852
+ "difficulty_bin": "medium",
1853
+ "episode_index": 5,
1854
+ "seed": 205000
1855
+ },
1856
+ {
1857
+ "proxy_name": "cloth_proxy",
1858
+ "task_name": "cloth",
1859
+ "task_id": 2,
1860
+ "stress_slice": "camera_perturbation",
1861
+ "difficulty_bin": "hard",
1862
+ "episode_index": 6,
1863
+ "seed": 206000
1864
+ },
1865
+ {
1866
+ "proxy_name": "cloth_proxy",
1867
+ "task_name": "cloth",
1868
+ "task_id": 2,
1869
+ "stress_slice": "fold_sensitive_long_persistence",
1870
+ "difficulty_bin": "hard",
1871
+ "episode_index": 7,
1872
+ "seed": 207000
1873
+ },
1874
+ {
1875
+ "proxy_name": "cloth_proxy",
1876
+ "task_name": "cloth",
1877
+ "task_id": 2,
1878
+ "stress_slice": "nominal",
1879
+ "difficulty_bin": "medium",
1880
+ "episode_index": 8,
1881
+ "seed": 200001
1882
+ },
1883
+ {
1884
+ "proxy_name": "cloth_proxy",
1885
+ "task_name": "cloth",
1886
+ "task_id": 2,
1887
+ "stress_slice": "high_reocclusion",
1888
+ "difficulty_bin": "medium",
1889
+ "episode_index": 9,
1890
+ "seed": 201001
1891
+ },
1892
+ {
1893
+ "proxy_name": "cloth_proxy",
1894
+ "task_name": "cloth",
1895
+ "task_id": 2,
1896
+ "stress_slice": "nominal",
1897
+ "difficulty_bin": "hard",
1898
+ "episode_index": 10,
1899
+ "seed": 202001
1900
+ },
1901
+ {
1902
+ "proxy_name": "cloth_proxy",
1903
+ "task_name": "cloth",
1904
+ "task_id": 2,
1905
+ "stress_slice": "high_reocclusion",
1906
+ "difficulty_bin": "hard",
1907
+ "episode_index": 11,
1908
+ "seed": 203001
1909
+ },
1910
+ {
1911
+ "proxy_name": "cloth_proxy",
1912
+ "task_name": "cloth",
1913
+ "task_id": 2,
1914
+ "stress_slice": "camera_perturbation",
1915
+ "difficulty_bin": "medium",
1916
+ "episode_index": 12,
1917
+ "seed": 204001
1918
+ },
1919
+ {
1920
+ "proxy_name": "cloth_proxy",
1921
+ "task_name": "cloth",
1922
+ "task_id": 2,
1923
+ "stress_slice": "fold_sensitive_long_persistence",
1924
+ "difficulty_bin": "medium",
1925
+ "episode_index": 13,
1926
+ "seed": 205001
1927
+ },
1928
+ {
1929
+ "proxy_name": "cloth_proxy",
1930
+ "task_name": "cloth",
1931
+ "task_id": 2,
1932
+ "stress_slice": "camera_perturbation",
1933
+ "difficulty_bin": "hard",
1934
+ "episode_index": 14,
1935
+ "seed": 206001
1936
+ },
1937
+ {
1938
+ "proxy_name": "cloth_proxy",
1939
+ "task_name": "cloth",
1940
+ "task_id": 2,
1941
+ "stress_slice": "fold_sensitive_long_persistence",
1942
+ "difficulty_bin": "hard",
1943
+ "episode_index": 15,
1944
+ "seed": 207001
1945
+ },
1946
+ {
1947
+ "proxy_name": "cloth_proxy",
1948
+ "task_name": "cloth",
1949
+ "task_id": 2,
1950
+ "stress_slice": "nominal",
1951
+ "difficulty_bin": "medium",
1952
+ "episode_index": 16,
1953
+ "seed": 200002
1954
+ },
1955
+ {
1956
+ "proxy_name": "cloth_proxy",
1957
+ "task_name": "cloth",
1958
+ "task_id": 2,
1959
+ "stress_slice": "high_reocclusion",
1960
+ "difficulty_bin": "medium",
1961
+ "episode_index": 17,
1962
+ "seed": 201002
1963
+ },
1964
+ {
1965
+ "proxy_name": "cloth_proxy",
1966
+ "task_name": "cloth",
1967
+ "task_id": 2,
1968
+ "stress_slice": "nominal",
1969
+ "difficulty_bin": "hard",
1970
+ "episode_index": 18,
1971
+ "seed": 202002
1972
+ },
1973
+ {
1974
+ "proxy_name": "cloth_proxy",
1975
+ "task_name": "cloth",
1976
+ "task_id": 2,
1977
+ "stress_slice": "high_reocclusion",
1978
+ "difficulty_bin": "hard",
1979
+ "episode_index": 19,
1980
+ "seed": 203002
1981
+ },
1982
+ {
1983
+ "proxy_name": "cloth_proxy",
1984
+ "task_name": "cloth",
1985
+ "task_id": 2,
1986
+ "stress_slice": "camera_perturbation",
1987
+ "difficulty_bin": "medium",
1988
+ "episode_index": 20,
1989
+ "seed": 204002
1990
+ },
1991
+ {
1992
+ "proxy_name": "cloth_proxy",
1993
+ "task_name": "cloth",
1994
+ "task_id": 2,
1995
+ "stress_slice": "fold_sensitive_long_persistence",
1996
+ "difficulty_bin": "medium",
1997
+ "episode_index": 21,
1998
+ "seed": 205002
1999
+ },
2000
+ {
2001
+ "proxy_name": "cloth_proxy",
2002
+ "task_name": "cloth",
2003
+ "task_id": 2,
2004
+ "stress_slice": "camera_perturbation",
2005
+ "difficulty_bin": "hard",
2006
+ "episode_index": 22,
2007
+ "seed": 206002
2008
+ },
2009
+ {
2010
+ "proxy_name": "cloth_proxy",
2011
+ "task_name": "cloth",
2012
+ "task_id": 2,
2013
+ "stress_slice": "fold_sensitive_long_persistence",
2014
+ "difficulty_bin": "hard",
2015
+ "episode_index": 23,
2016
+ "seed": 207002
2017
+ },
2018
+ {
2019
+ "proxy_name": "cloth_proxy",
2020
+ "task_name": "cloth",
2021
+ "task_id": 2,
2022
+ "stress_slice": "nominal",
2023
+ "difficulty_bin": "medium",
2024
+ "episode_index": 24,
2025
+ "seed": 200003
2026
+ },
2027
+ {
2028
+ "proxy_name": "cloth_proxy",
2029
+ "task_name": "cloth",
2030
+ "task_id": 2,
2031
+ "stress_slice": "high_reocclusion",
2032
+ "difficulty_bin": "medium",
2033
+ "episode_index": 25,
2034
+ "seed": 201003
2035
+ },
2036
+ {
2037
+ "proxy_name": "cloth_proxy",
2038
+ "task_name": "cloth",
2039
+ "task_id": 2,
2040
+ "stress_slice": "nominal",
2041
+ "difficulty_bin": "hard",
2042
+ "episode_index": 26,
2043
+ "seed": 202003
2044
+ },
2045
+ {
2046
+ "proxy_name": "cloth_proxy",
2047
+ "task_name": "cloth",
2048
+ "task_id": 2,
2049
+ "stress_slice": "high_reocclusion",
2050
+ "difficulty_bin": "hard",
2051
+ "episode_index": 27,
2052
+ "seed": 203003
2053
+ },
2054
+ {
2055
+ "proxy_name": "cloth_proxy",
2056
+ "task_name": "cloth",
2057
+ "task_id": 2,
2058
+ "stress_slice": "camera_perturbation",
2059
+ "difficulty_bin": "medium",
2060
+ "episode_index": 28,
2061
+ "seed": 204003
2062
+ },
2063
+ {
2064
+ "proxy_name": "cloth_proxy",
2065
+ "task_name": "cloth",
2066
+ "task_id": 2,
2067
+ "stress_slice": "fold_sensitive_long_persistence",
2068
+ "difficulty_bin": "medium",
2069
+ "episode_index": 29,
2070
+ "seed": 205003
2071
+ },
2072
+ {
2073
+ "proxy_name": "cloth_proxy",
2074
+ "task_name": "cloth",
2075
+ "task_id": 2,
2076
+ "stress_slice": "camera_perturbation",
2077
+ "difficulty_bin": "hard",
2078
+ "episode_index": 30,
2079
+ "seed": 206003
2080
+ },
2081
+ {
2082
+ "proxy_name": "cloth_proxy",
2083
+ "task_name": "cloth",
2084
+ "task_id": 2,
2085
+ "stress_slice": "fold_sensitive_long_persistence",
2086
+ "difficulty_bin": "hard",
2087
+ "episode_index": 31,
2088
+ "seed": 207003
2089
+ },
2090
+ {
2091
+ "proxy_name": "cloth_proxy",
2092
+ "task_name": "cloth",
2093
+ "task_id": 2,
2094
+ "stress_slice": "nominal",
2095
+ "difficulty_bin": "medium",
2096
+ "episode_index": 32,
2097
+ "seed": 200004
2098
+ },
2099
+ {
2100
+ "proxy_name": "cloth_proxy",
2101
+ "task_name": "cloth",
2102
+ "task_id": 2,
2103
+ "stress_slice": "high_reocclusion",
2104
+ "difficulty_bin": "medium",
2105
+ "episode_index": 33,
2106
+ "seed": 201004
2107
+ },
2108
+ {
2109
+ "proxy_name": "cloth_proxy",
2110
+ "task_name": "cloth",
2111
+ "task_id": 2,
2112
+ "stress_slice": "nominal",
2113
+ "difficulty_bin": "hard",
2114
+ "episode_index": 34,
2115
+ "seed": 202004
2116
+ },
2117
+ {
2118
+ "proxy_name": "cloth_proxy",
2119
+ "task_name": "cloth",
2120
+ "task_id": 2,
2121
+ "stress_slice": "high_reocclusion",
2122
+ "difficulty_bin": "hard",
2123
+ "episode_index": 35,
2124
+ "seed": 203004
2125
+ },
2126
+ {
2127
+ "proxy_name": "cloth_proxy",
2128
+ "task_name": "cloth",
2129
+ "task_id": 2,
2130
+ "stress_slice": "camera_perturbation",
2131
+ "difficulty_bin": "medium",
2132
+ "episode_index": 36,
2133
+ "seed": 204004
2134
+ },
2135
+ {
2136
+ "proxy_name": "cloth_proxy",
2137
+ "task_name": "cloth",
2138
+ "task_id": 2,
2139
+ "stress_slice": "fold_sensitive_long_persistence",
2140
+ "difficulty_bin": "medium",
2141
+ "episode_index": 37,
2142
+ "seed": 205004
2143
+ },
2144
+ {
2145
+ "proxy_name": "cloth_proxy",
2146
+ "task_name": "cloth",
2147
+ "task_id": 2,
2148
+ "stress_slice": "camera_perturbation",
2149
+ "difficulty_bin": "hard",
2150
+ "episode_index": 38,
2151
+ "seed": 206004
2152
+ },
2153
+ {
2154
+ "proxy_name": "cloth_proxy",
2155
+ "task_name": "cloth",
2156
+ "task_id": 2,
2157
+ "stress_slice": "fold_sensitive_long_persistence",
2158
+ "difficulty_bin": "hard",
2159
+ "episode_index": 39,
2160
+ "seed": 207004
2161
+ },
2162
+ {
2163
+ "proxy_name": "cloth_proxy",
2164
+ "task_name": "cloth",
2165
+ "task_id": 2,
2166
+ "stress_slice": "nominal",
2167
+ "difficulty_bin": "medium",
2168
+ "episode_index": 40,
2169
+ "seed": 200005
2170
+ },
2171
+ {
2172
+ "proxy_name": "cloth_proxy",
2173
+ "task_name": "cloth",
2174
+ "task_id": 2,
2175
+ "stress_slice": "high_reocclusion",
2176
+ "difficulty_bin": "medium",
2177
+ "episode_index": 41,
2178
+ "seed": 201005
2179
+ },
2180
+ {
2181
+ "proxy_name": "cloth_proxy",
2182
+ "task_name": "cloth",
2183
+ "task_id": 2,
2184
+ "stress_slice": "nominal",
2185
+ "difficulty_bin": "hard",
2186
+ "episode_index": 42,
2187
+ "seed": 202005
2188
+ },
2189
+ {
2190
+ "proxy_name": "cloth_proxy",
2191
+ "task_name": "cloth",
2192
+ "task_id": 2,
2193
+ "stress_slice": "high_reocclusion",
2194
+ "difficulty_bin": "hard",
2195
+ "episode_index": 43,
2196
+ "seed": 203005
2197
+ },
2198
+ {
2199
+ "proxy_name": "cloth_proxy",
2200
+ "task_name": "cloth",
2201
+ "task_id": 2,
2202
+ "stress_slice": "camera_perturbation",
2203
+ "difficulty_bin": "medium",
2204
+ "episode_index": 44,
2205
+ "seed": 204005
2206
+ },
2207
+ {
2208
+ "proxy_name": "cloth_proxy",
2209
+ "task_name": "cloth",
2210
+ "task_id": 2,
2211
+ "stress_slice": "fold_sensitive_long_persistence",
2212
+ "difficulty_bin": "medium",
2213
+ "episode_index": 45,
2214
+ "seed": 205005
2215
+ },
2216
+ {
2217
+ "proxy_name": "cloth_proxy",
2218
+ "task_name": "cloth",
2219
+ "task_id": 2,
2220
+ "stress_slice": "camera_perturbation",
2221
+ "difficulty_bin": "hard",
2222
+ "episode_index": 46,
2223
+ "seed": 206005
2224
+ },
2225
+ {
2226
+ "proxy_name": "cloth_proxy",
2227
+ "task_name": "cloth",
2228
+ "task_id": 2,
2229
+ "stress_slice": "fold_sensitive_long_persistence",
2230
+ "difficulty_bin": "hard",
2231
+ "episode_index": 47,
2232
+ "seed": 207005
2233
+ },
2234
+ {
2235
+ "proxy_name": "cloth_proxy",
2236
+ "task_name": "cloth",
2237
+ "task_id": 2,
2238
+ "stress_slice": "nominal",
2239
+ "difficulty_bin": "medium",
2240
+ "episode_index": 48,
2241
+ "seed": 200006
2242
+ },
2243
+ {
2244
+ "proxy_name": "cloth_proxy",
2245
+ "task_name": "cloth",
2246
+ "task_id": 2,
2247
+ "stress_slice": "high_reocclusion",
2248
+ "difficulty_bin": "medium",
2249
+ "episode_index": 49,
2250
+ "seed": 201006
2251
+ },
2252
+ {
2253
+ "proxy_name": "cloth_proxy",
2254
+ "task_name": "cloth",
2255
+ "task_id": 2,
2256
+ "stress_slice": "nominal",
2257
+ "difficulty_bin": "hard",
2258
+ "episode_index": 50,
2259
+ "seed": 202006
2260
+ },
2261
+ {
2262
+ "proxy_name": "cloth_proxy",
2263
+ "task_name": "cloth",
2264
+ "task_id": 2,
2265
+ "stress_slice": "high_reocclusion",
2266
+ "difficulty_bin": "hard",
2267
+ "episode_index": 51,
2268
+ "seed": 203006
2269
+ },
2270
+ {
2271
+ "proxy_name": "cloth_proxy",
2272
+ "task_name": "cloth",
2273
+ "task_id": 2,
2274
+ "stress_slice": "camera_perturbation",
2275
+ "difficulty_bin": "medium",
2276
+ "episode_index": 52,
2277
+ "seed": 204006
2278
+ },
2279
+ {
2280
+ "proxy_name": "cloth_proxy",
2281
+ "task_name": "cloth",
2282
+ "task_id": 2,
2283
+ "stress_slice": "fold_sensitive_long_persistence",
2284
+ "difficulty_bin": "medium",
2285
+ "episode_index": 53,
2286
+ "seed": 205006
2287
+ },
2288
+ {
2289
+ "proxy_name": "cloth_proxy",
2290
+ "task_name": "cloth",
2291
+ "task_id": 2,
2292
+ "stress_slice": "camera_perturbation",
2293
+ "difficulty_bin": "hard",
2294
+ "episode_index": 54,
2295
+ "seed": 206006
2296
+ },
2297
+ {
2298
+ "proxy_name": "cloth_proxy",
2299
+ "task_name": "cloth",
2300
+ "task_id": 2,
2301
+ "stress_slice": "fold_sensitive_long_persistence",
2302
+ "difficulty_bin": "hard",
2303
+ "episode_index": 55,
2304
+ "seed": 207006
2305
+ },
2306
+ {
2307
+ "proxy_name": "cloth_proxy",
2308
+ "task_name": "cloth",
2309
+ "task_id": 2,
2310
+ "stress_slice": "nominal",
2311
+ "difficulty_bin": "medium",
2312
+ "episode_index": 56,
2313
+ "seed": 200007
2314
+ },
2315
+ {
2316
+ "proxy_name": "cloth_proxy",
2317
+ "task_name": "cloth",
2318
+ "task_id": 2,
2319
+ "stress_slice": "high_reocclusion",
2320
+ "difficulty_bin": "medium",
2321
+ "episode_index": 57,
2322
+ "seed": 201007
2323
+ },
2324
+ {
2325
+ "proxy_name": "cloth_proxy",
2326
+ "task_name": "cloth",
2327
+ "task_id": 2,
2328
+ "stress_slice": "nominal",
2329
+ "difficulty_bin": "hard",
2330
+ "episode_index": 58,
2331
+ "seed": 202007
2332
+ },
2333
+ {
2334
+ "proxy_name": "cloth_proxy",
2335
+ "task_name": "cloth",
2336
+ "task_id": 2,
2337
+ "stress_slice": "high_reocclusion",
2338
+ "difficulty_bin": "hard",
2339
+ "episode_index": 59,
2340
+ "seed": 203007
2341
+ },
2342
+ {
2343
+ "proxy_name": "cloth_proxy",
2344
+ "task_name": "cloth",
2345
+ "task_id": 2,
2346
+ "stress_slice": "camera_perturbation",
2347
+ "difficulty_bin": "medium",
2348
+ "episode_index": 60,
2349
+ "seed": 204007
2350
+ },
2351
+ {
2352
+ "proxy_name": "cloth_proxy",
2353
+ "task_name": "cloth",
2354
+ "task_id": 2,
2355
+ "stress_slice": "fold_sensitive_long_persistence",
2356
+ "difficulty_bin": "medium",
2357
+ "episode_index": 61,
2358
+ "seed": 205007
2359
+ },
2360
+ {
2361
+ "proxy_name": "cloth_proxy",
2362
+ "task_name": "cloth",
2363
+ "task_id": 2,
2364
+ "stress_slice": "camera_perturbation",
2365
+ "difficulty_bin": "hard",
2366
+ "episode_index": 62,
2367
+ "seed": 206007
2368
+ },
2369
+ {
2370
+ "proxy_name": "cloth_proxy",
2371
+ "task_name": "cloth",
2372
+ "task_id": 2,
2373
+ "stress_slice": "fold_sensitive_long_persistence",
2374
+ "difficulty_bin": "hard",
2375
+ "episode_index": 63,
2376
+ "seed": 207007
2377
+ },
2378
+ {
2379
+ "proxy_name": "cloth_proxy",
2380
+ "task_name": "cloth",
2381
+ "task_id": 2,
2382
+ "stress_slice": "nominal",
2383
+ "difficulty_bin": "medium",
2384
+ "episode_index": 64,
2385
+ "seed": 200008
2386
+ },
2387
+ {
2388
+ "proxy_name": "cloth_proxy",
2389
+ "task_name": "cloth",
2390
+ "task_id": 2,
2391
+ "stress_slice": "high_reocclusion",
2392
+ "difficulty_bin": "medium",
2393
+ "episode_index": 65,
2394
+ "seed": 201008
2395
+ },
2396
+ {
2397
+ "proxy_name": "cloth_proxy",
2398
+ "task_name": "cloth",
2399
+ "task_id": 2,
2400
+ "stress_slice": "nominal",
2401
+ "difficulty_bin": "hard",
2402
+ "episode_index": 66,
2403
+ "seed": 202008
2404
+ },
2405
+ {
2406
+ "proxy_name": "cloth_proxy",
2407
+ "task_name": "cloth",
2408
+ "task_id": 2,
2409
+ "stress_slice": "high_reocclusion",
2410
+ "difficulty_bin": "hard",
2411
+ "episode_index": 67,
2412
+ "seed": 203008
2413
+ },
2414
+ {
2415
+ "proxy_name": "cloth_proxy",
2416
+ "task_name": "cloth",
2417
+ "task_id": 2,
2418
+ "stress_slice": "camera_perturbation",
2419
+ "difficulty_bin": "medium",
2420
+ "episode_index": 68,
2421
+ "seed": 204008
2422
+ },
2423
+ {
2424
+ "proxy_name": "cloth_proxy",
2425
+ "task_name": "cloth",
2426
+ "task_id": 2,
2427
+ "stress_slice": "fold_sensitive_long_persistence",
2428
+ "difficulty_bin": "medium",
2429
+ "episode_index": 69,
2430
+ "seed": 205008
2431
+ },
2432
+ {
2433
+ "proxy_name": "cloth_proxy",
2434
+ "task_name": "cloth",
2435
+ "task_id": 2,
2436
+ "stress_slice": "camera_perturbation",
2437
+ "difficulty_bin": "hard",
2438
+ "episode_index": 70,
2439
+ "seed": 206008
2440
+ },
2441
+ {
2442
+ "proxy_name": "cloth_proxy",
2443
+ "task_name": "cloth",
2444
+ "task_id": 2,
2445
+ "stress_slice": "fold_sensitive_long_persistence",
2446
+ "difficulty_bin": "hard",
2447
+ "episode_index": 71,
2448
+ "seed": 207008
2449
+ },
2450
+ {
2451
+ "proxy_name": "cloth_proxy",
2452
+ "task_name": "cloth",
2453
+ "task_id": 2,
2454
+ "stress_slice": "nominal",
2455
+ "difficulty_bin": "medium",
2456
+ "episode_index": 72,
2457
+ "seed": 200009
2458
+ },
2459
+ {
2460
+ "proxy_name": "cloth_proxy",
2461
+ "task_name": "cloth",
2462
+ "task_id": 2,
2463
+ "stress_slice": "high_reocclusion",
2464
+ "difficulty_bin": "medium",
2465
+ "episode_index": 73,
2466
+ "seed": 201009
2467
+ },
2468
+ {
2469
+ "proxy_name": "cloth_proxy",
2470
+ "task_name": "cloth",
2471
+ "task_id": 2,
2472
+ "stress_slice": "nominal",
2473
+ "difficulty_bin": "hard",
2474
+ "episode_index": 74,
2475
+ "seed": 202009
2476
+ },
2477
+ {
2478
+ "proxy_name": "cloth_proxy",
2479
+ "task_name": "cloth",
2480
+ "task_id": 2,
2481
+ "stress_slice": "high_reocclusion",
2482
+ "difficulty_bin": "hard",
2483
+ "episode_index": 75,
2484
+ "seed": 203009
2485
+ },
2486
+ {
2487
+ "proxy_name": "cloth_proxy",
2488
+ "task_name": "cloth",
2489
+ "task_id": 2,
2490
+ "stress_slice": "camera_perturbation",
2491
+ "difficulty_bin": "medium",
2492
+ "episode_index": 76,
2493
+ "seed": 204009
2494
+ },
2495
+ {
2496
+ "proxy_name": "cloth_proxy",
2497
+ "task_name": "cloth",
2498
+ "task_id": 2,
2499
+ "stress_slice": "fold_sensitive_long_persistence",
2500
+ "difficulty_bin": "medium",
2501
+ "episode_index": 77,
2502
+ "seed": 205009
2503
+ },
2504
+ {
2505
+ "proxy_name": "cloth_proxy",
2506
+ "task_name": "cloth",
2507
+ "task_id": 2,
2508
+ "stress_slice": "camera_perturbation",
2509
+ "difficulty_bin": "hard",
2510
+ "episode_index": 78,
2511
+ "seed": 206009
2512
+ },
2513
+ {
2514
+ "proxy_name": "cloth_proxy",
2515
+ "task_name": "cloth",
2516
+ "task_id": 2,
2517
+ "stress_slice": "fold_sensitive_long_persistence",
2518
+ "difficulty_bin": "hard",
2519
+ "episode_index": 79,
2520
+ "seed": 207009
2521
+ },
2522
+ {
2523
+ "proxy_name": "cloth_proxy",
2524
+ "task_name": "cloth",
2525
+ "task_id": 2,
2526
+ "stress_slice": "nominal",
2527
+ "difficulty_bin": "medium",
2528
+ "episode_index": 80,
2529
+ "seed": 200010
2530
+ },
2531
+ {
2532
+ "proxy_name": "cloth_proxy",
2533
+ "task_name": "cloth",
2534
+ "task_id": 2,
2535
+ "stress_slice": "nominal",
2536
+ "difficulty_bin": "hard",
2537
+ "episode_index": 81,
2538
+ "seed": 202010
2539
+ },
2540
+ {
2541
+ "proxy_name": "cloth_proxy",
2542
+ "task_name": "cloth",
2543
+ "task_id": 2,
2544
+ "stress_slice": "nominal",
2545
+ "difficulty_bin": "medium",
2546
+ "episode_index": 82,
2547
+ "seed": 200011
2548
+ },
2549
+ {
2550
+ "proxy_name": "cloth_proxy",
2551
+ "task_name": "cloth",
2552
+ "task_id": 2,
2553
+ "stress_slice": "nominal",
2554
+ "difficulty_bin": "hard",
2555
+ "episode_index": 83,
2556
+ "seed": 202011
2557
+ },
2558
+ {
2559
+ "proxy_name": "cloth_proxy",
2560
+ "task_name": "cloth",
2561
+ "task_id": 2,
2562
+ "stress_slice": "nominal",
2563
+ "difficulty_bin": "medium",
2564
+ "episode_index": 84,
2565
+ "seed": 200012
2566
+ },
2567
+ {
2568
+ "proxy_name": "cloth_proxy",
2569
+ "task_name": "cloth",
2570
+ "task_id": 2,
2571
+ "stress_slice": "nominal",
2572
+ "difficulty_bin": "hard",
2573
+ "episode_index": 85,
2574
+ "seed": 202012
2575
+ },
2576
+ {
2577
+ "proxy_name": "cloth_proxy",
2578
+ "task_name": "cloth",
2579
+ "task_id": 2,
2580
+ "stress_slice": "nominal",
2581
+ "difficulty_bin": "medium",
2582
+ "episode_index": 86,
2583
+ "seed": 200013
2584
+ },
2585
+ {
2586
+ "proxy_name": "cloth_proxy",
2587
+ "task_name": "cloth",
2588
+ "task_id": 2,
2589
+ "stress_slice": "nominal",
2590
+ "difficulty_bin": "hard",
2591
+ "episode_index": 87,
2592
+ "seed": 202013
2593
+ },
2594
+ {
2595
+ "proxy_name": "cloth_proxy",
2596
+ "task_name": "cloth",
2597
+ "task_id": 2,
2598
+ "stress_slice": "nominal",
2599
+ "difficulty_bin": "medium",
2600
+ "episode_index": 88,
2601
+ "seed": 200014
2602
+ },
2603
+ {
2604
+ "proxy_name": "cloth_proxy",
2605
+ "task_name": "cloth",
2606
+ "task_id": 2,
2607
+ "stress_slice": "nominal",
2608
+ "difficulty_bin": "hard",
2609
+ "episode_index": 89,
2610
+ "seed": 202014
2611
+ },
2612
+ {
2613
+ "proxy_name": "cloth_proxy",
2614
+ "task_name": "cloth",
2615
+ "task_id": 2,
2616
+ "stress_slice": "nominal",
2617
+ "difficulty_bin": "medium",
2618
+ "episode_index": 90,
2619
+ "seed": 200015
2620
+ },
2621
+ {
2622
+ "proxy_name": "cloth_proxy",
2623
+ "task_name": "cloth",
2624
+ "task_id": 2,
2625
+ "stress_slice": "nominal",
2626
+ "difficulty_bin": "hard",
2627
+ "episode_index": 91,
2628
+ "seed": 202015
2629
+ },
2630
+ {
2631
+ "proxy_name": "cloth_proxy",
2632
+ "task_name": "cloth",
2633
+ "task_id": 2,
2634
+ "stress_slice": "nominal",
2635
+ "difficulty_bin": "medium",
2636
+ "episode_index": 92,
2637
+ "seed": 200016
2638
+ },
2639
+ {
2640
+ "proxy_name": "cloth_proxy",
2641
+ "task_name": "cloth",
2642
+ "task_id": 2,
2643
+ "stress_slice": "nominal",
2644
+ "difficulty_bin": "hard",
2645
+ "episode_index": 93,
2646
+ "seed": 202016
2647
+ },
2648
+ {
2649
+ "proxy_name": "cloth_proxy",
2650
+ "task_name": "cloth",
2651
+ "task_id": 2,
2652
+ "stress_slice": "nominal",
2653
+ "difficulty_bin": "medium",
2654
+ "episode_index": 94,
2655
+ "seed": 200017
2656
+ },
2657
+ {
2658
+ "proxy_name": "cloth_proxy",
2659
+ "task_name": "cloth",
2660
+ "task_id": 2,
2661
+ "stress_slice": "nominal",
2662
+ "difficulty_bin": "hard",
2663
+ "episode_index": 95,
2664
+ "seed": 202017
2665
+ },
2666
+ {
2667
+ "proxy_name": "cloth_proxy",
2668
+ "task_name": "cloth",
2669
+ "task_id": 2,
2670
+ "stress_slice": "nominal",
2671
+ "difficulty_bin": "medium",
2672
+ "episode_index": 96,
2673
+ "seed": 200018
2674
+ },
2675
+ {
2676
+ "proxy_name": "cloth_proxy",
2677
+ "task_name": "cloth",
2678
+ "task_id": 2,
2679
+ "stress_slice": "nominal",
2680
+ "difficulty_bin": "hard",
2681
+ "episode_index": 97,
2682
+ "seed": 202018
2683
+ },
2684
+ {
2685
+ "proxy_name": "cloth_proxy",
2686
+ "task_name": "cloth",
2687
+ "task_id": 2,
2688
+ "stress_slice": "nominal",
2689
+ "difficulty_bin": "medium",
2690
+ "episode_index": 98,
2691
+ "seed": 200019
2692
+ },
2693
+ {
2694
+ "proxy_name": "cloth_proxy",
2695
+ "task_name": "cloth",
2696
+ "task_id": 2,
2697
+ "stress_slice": "nominal",
2698
+ "difficulty_bin": "hard",
2699
+ "episode_index": 99,
2700
+ "seed": 202019
2701
+ }
2702
+ ]
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/ablations_v7/ablations.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Ablations
2
+
3
+ ## full_model
4
+ - mean_success: 0.280
5
+ - visibility_integral: 31.968
6
+ - corridor_availability: 0.675
7
+ - reocclusion_rate: 0.014
8
+ - disturbance_cost: 0.393
9
+ - premature_retrieve_rate: 0.125
10
+ - reocclusion_after_reveal_rate: 0.547
11
+ - planner_regret: 0.091
12
+ - chunk_commit_steps: 0.000
13
+
14
+ ## no_planner
15
+ - mean_success: 0.200
16
+ - visibility_integral: 6.978
17
+ - corridor_availability: 0.230
18
+ - reocclusion_rate: 0.020
19
+ - disturbance_cost: 0.110
20
+ - premature_retrieve_rate: 0.151
21
+ - reocclusion_after_reveal_rate: 0.800
22
+ - planner_regret: 0.041
23
+ - chunk_commit_steps: 0.000
24
+ - paired_paired_episodes_vs_full_model: 300.000
25
+ - paired_success_delta_vs_full_model: -0.080
26
+ - paired_visibility_delta_vs_full_model: -24.990
27
+ - paired_reocclusion_delta_vs_full_model: 0.006
28
+ - paired_disturbance_delta_vs_full_model: -0.283
29
+
30
+ ## no_spatial_memory
31
+ - mean_success: 0.323
32
+ - visibility_integral: 37.043
33
+ - corridor_availability: 0.825
34
+ - reocclusion_rate: 0.015
35
+ - disturbance_cost: 0.441
36
+ - premature_retrieve_rate: 0.112
37
+ - reocclusion_after_reveal_rate: 0.260
38
+ - planner_regret: 0.163
39
+ - chunk_commit_steps: 0.000
40
+ - paired_paired_episodes_vs_full_model: 300.000
41
+ - paired_success_delta_vs_full_model: 0.043
42
+ - paired_visibility_delta_vs_full_model: 5.075
43
+ - paired_reocclusion_delta_vs_full_model: 0.001
44
+ - paired_disturbance_delta_vs_full_model: 0.048
45
+
46
+ ## no_task_head
47
+ - mean_success: 0.280
48
+ - visibility_integral: 31.965
49
+ - corridor_availability: 0.675
50
+ - reocclusion_rate: 0.014
51
+ - disturbance_cost: 0.393
52
+ - premature_retrieve_rate: 0.125
53
+ - reocclusion_after_reveal_rate: 0.547
54
+ - planner_regret: 0.091
55
+ - chunk_commit_steps: 0.000
56
+ - paired_paired_episodes_vs_full_model: 300.000
57
+ - paired_success_delta_vs_full_model: 0.000
58
+ - paired_visibility_delta_vs_full_model: -0.003
59
+ - paired_reocclusion_delta_vs_full_model: 0.000
60
+ - paired_disturbance_delta_vs_full_model: 0.000
61
+
62
+ ## no_geometry
63
+ - mean_success: 0.270
64
+ - visibility_integral: 32.415
65
+ - corridor_availability: 0.675
66
+ - reocclusion_rate: 0.013
67
+ - disturbance_cost: 0.398
68
+ - premature_retrieve_rate: 0.127
69
+ - reocclusion_after_reveal_rate: 0.550
70
+ - planner_regret: 0.091
71
+ - chunk_commit_steps: 0.000
72
+ - paired_paired_episodes_vs_full_model: 300.000
73
+ - paired_success_delta_vs_full_model: -0.010
74
+ - paired_visibility_delta_vs_full_model: 0.446
75
+ - paired_reocclusion_delta_vs_full_model: -0.001
76
+ - paired_disturbance_delta_vs_full_model: 0.004
77
+
78
+ ## no_camera_pose
79
+ - mean_success: 0.293
80
+ - visibility_integral: 31.640
81
+ - corridor_availability: 0.681
82
+ - reocclusion_rate: 0.017
83
+ - disturbance_cost: 0.389
84
+ - premature_retrieve_rate: 0.126
85
+ - reocclusion_after_reveal_rate: 0.543
86
+ - planner_regret: 0.092
87
+ - chunk_commit_steps: 0.000
88
+ - paired_paired_episodes_vs_full_model: 300.000
89
+ - paired_success_delta_vs_full_model: 0.013
90
+ - paired_visibility_delta_vs_full_model: -0.329
91
+ - paired_reocclusion_delta_vs_full_model: 0.003
92
+ - paired_disturbance_delta_vs_full_model: -0.004
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "local_eval_csv": "/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/eval_data.csv",
3
+ "public_eval_csv": "/workspace/baselines/AnyBimanual/Peract-LF_AnyBimanual/eval_data.csv",
4
+ "overlap_tasks": [
5
+ "coordinated_push_box",
6
+ "coordinated_lift_ball",
7
+ "dual_push_buttons"
8
+ ],
9
+ "local_last_complete_step": {
10
+ "step": 1000,
11
+ "eval_episodes": 25,
12
+ "per_task_return": {
13
+ "coordinated_push_box": 0.0,
14
+ "coordinated_lift_ball": 0.0,
15
+ "dual_push_buttons": 12.0
16
+ },
17
+ "per_task_success": {
18
+ "coordinated_push_box": 0.0,
19
+ "coordinated_lift_ball": 0.0,
20
+ "dual_push_buttons": 0.48
21
+ },
22
+ "per_task_length": {
23
+ "coordinated_push_box": 25.0,
24
+ "coordinated_lift_ball": 25.0,
25
+ "dual_push_buttons": 23.12
26
+ },
27
+ "per_task_total_transitions": {
28
+ "coordinated_push_box": 625.0,
29
+ "coordinated_lift_ball": 1250.0,
30
+ "dual_push_buttons": 1828.0
31
+ },
32
+ "mean_success": 0.16
33
+ },
34
+ "public_best_overlap_step": {
35
+ "step": 60000,
36
+ "eval_episodes": 25,
37
+ "per_task_return": {
38
+ "coordinated_push_box": 20.0,
39
+ "coordinated_lift_ball": 8.0,
40
+ "dual_push_buttons": 24.0
41
+ },
42
+ "per_task_success": {
43
+ "coordinated_push_box": 0.8,
44
+ "coordinated_lift_ball": 0.32,
45
+ "dual_push_buttons": 0.96
46
+ },
47
+ "per_task_length": {
48
+ "coordinated_push_box": 25.0,
49
+ "coordinated_lift_ball": 23.24,
50
+ "dual_push_buttons": 21.56
51
+ },
52
+ "per_task_total_transitions": {
53
+ "coordinated_push_box": 3693.0,
54
+ "coordinated_lift_ball": 2443.0,
55
+ "dual_push_buttons": 4857.0
56
+ },
57
+ "mean_success": 0.6933333333333334
58
+ },
59
+ "delta_vs_public_best": {
60
+ "mean_success_delta": -0.5333333333333333,
61
+ "per_task_success_delta": {
62
+ "coordinated_push_box": -0.8,
63
+ "coordinated_lift_ball": -0.32,
64
+ "dual_push_buttons": -0.48
65
+ }
66
+ }
67
+ }
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AnyBimanual Overlap Eval Summary
2
+
3
+ - Local eval CSV: `/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/eval_data.csv`
4
+ - Public eval CSV: `/workspace/baselines/AnyBimanual/Peract-LF_AnyBimanual/eval_data.csv`
5
+
6
+ ## Local Last Complete Step
7
+
8
+ - step: `1000`
9
+ - mean_success: `0.160`
10
+ - coordinated_push_box: success=`0.000`, return=`0.0`
11
+ - coordinated_lift_ball: success=`0.000`, return=`0.0`
12
+ - dual_push_buttons: success=`0.480`, return=`12.0`
13
+
14
+ ## Public Best Overlap Step
15
+
16
+ - step: `60000`
17
+ - mean_success: `0.693`
18
+ - coordinated_push_box: success=`0.800`, return=`20.0`
19
+ - coordinated_lift_ball: success=`0.320`, return=`8.0`
20
+ - dual_push_buttons: success=`0.960`, return=`24.0`
21
+
22
+ ## Delta Vs Public Best
23
+
24
+ - mean_success_delta: `-0.533`
25
+ - coordinated_push_box: `-0.800`
26
+ - coordinated_lift_ball: `-0.320`
27
+ - dual_push_buttons: `-0.480`
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/status.md ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AnyBimanual Overlap Baseline Status 2026-03-30
2
+
3
+ ## Goal
4
+
5
+ Establish a fair official-baseline comparison path on the exact 3-task subset3 overlap used for local RLBench general-task checks:
6
+
7
+ - `coordinated_push_box`
8
+ - `coordinated_lift_ball`
9
+ - `dual_push_buttons`
10
+
11
+ The purpose is not to replace the reveal proxy benchmark as the main selector for the custom foliage / bag / cloth architecture. The purpose is to get a credible general-task baseline anchor on an official external method.
12
+
13
+ ## Code Changes
14
+
15
+ - Added overlap-train wrapper:
16
+ - [run_anybimanual_subset3_overlap_train.sh](/workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_anybimanual_subset3_overlap_train.sh)
17
+ - Added overlap-eval wrapper:
18
+ - [run_anybimanual_subset3_overlap_eval.sh](/workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_anybimanual_subset3_overlap_eval.sh)
19
+ - Patched AnyBimanual offline runner to:
20
+ - accept `ANYBIMANUAL_PRETRAINED_WEIGHTS_DIR` instead of a hardcoded placeholder path
21
+ - guard unconditional `wandb` logging in the q-attention agents
22
+ - lazily import `pandas` in the eval-resume helper
23
+ - prefer existing local checkpoints over release weights when `framework.load_existing_weights=True`
24
+ - always save a final checkpoint at `framework.training_iterations` when the loop exits without already writing that step
25
+ - Added a lightweight checkpoint-init helper:
26
+ - [weight_init_utils.py](/workspace/third_party/AnyBimanual/third_party/YARR/yarr/runners/weight_init_utils.py)
27
+ - Added targeted resume-logic coverage:
28
+ - [test_anybimanual_resume_logic.py](/workspace/VLAarchtests/tests/test_anybimanual_resume_logic.py)
29
+ - Added overlap-eval summary parser:
30
+ - [summarize_anybimanual_overlap_eval.py](/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/summarize_anybimanual_overlap_eval.py)
31
+ - Added targeted summary-parser coverage:
32
+ - [test_anybimanual_overlap_eval_summary.py](/workspace/VLAarchtests/tests/test_anybimanual_overlap_eval_summary.py)
33
+
34
+ Wrapper responsibilities:
35
+
36
+ - activate `/workspace/envs/rlbench`
37
+ - keep all outputs under `/workspace`
38
+ - point AnyBimanual at the local subset3 overlap demo root
39
+ - use the exact overlap task list
40
+ - avoid the upstream tmux launcher path
41
+
42
+ ## Public Reference
43
+
44
+ Local official release artifact:
45
+
46
+ - [eval_data.csv](/workspace/baselines/AnyBimanual/Peract-LF_AnyBimanual/eval_data.csv)
47
+
48
+ Best overlap-relevant released step in the local CSV:
49
+
50
+ - step `60000`
51
+ - `coordinated_push_box`: `20/25 = 0.80`
52
+ - `coordinated_lift_ball`: `8/25 = 0.32`
53
+ - `dual_push_buttons`: `24/25 = 0.96`
54
+ - overlap-task mean: `0.693`
55
+
56
+ These are the external target-line numbers for this branch.
57
+
58
+ ## Completed Tests
59
+
60
+ - Released AnyBimanual checkpoint live sanity on local overlap subset, `1` episode per task:
61
+ - [anybimanual_live_subset3_ep1.log](/workspace/reports/true_baseline_compare_subset3_v1/anybimanual_live_subset3_ep1.log)
62
+ - Result: `0.0` on all three tasks in that local subset sanity.
63
+ - Interpretation: released checkpoint plus local overlap subset is not an apples-to-apples comparison by itself.
64
+
65
+ - Official overlap smoke train launch, `200` iterations target:
66
+ - log: [anybimanual_subset3_overlap_smoke200_train.log](/workspace/reports/anybimanual_subset3_overlap_smoke200_train.log)
67
+ - run root: [/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200](/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200)
68
+ - replay root: [/workspace/baselines/AnyBimanual_overlap_replay](/workspace/baselines/AnyBimanual_overlap_replay)
69
+
70
+ - Resume-path verification after the runner patch:
71
+ - `python -m py_compile /workspace/third_party/AnyBimanual/third_party/YARR/yarr/runners/offline_train_runner.py /workspace/third_party/AnyBimanual/third_party/YARR/yarr/runners/weight_init_utils.py`
72
+ - `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_anybimanual_subset3_overlap_train.sh`
73
+ - `PYTHONPATH=/workspace/third_party/AnyBimanual/third_party/YARR pytest -q /workspace/VLAarchtests/tests/test_anybimanual_resume_logic.py`
74
+ - Result: `4 passed`
75
+
76
+ - Off-by-one checkpoint diagnosis:
77
+ - The first smoke really completed `200` updates but only wrote `weights/0`, because the upstream save condition uses loop index `i` and a `range(start_iter, training_iterations)` loop.
78
+ - Preserved failed smoke log:
79
+ - [anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_train_presavefix.log](/workspace/reports/anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_train_presavefix.log)
80
+ - This was the reason the eval watcher never fired on the first pass.
81
+
82
+ - Fixed smoke retry, `200` iterations with `save_freq=50`:
83
+ - live train log:
84
+ - [anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_train.log](/workspace/reports/anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_train.log)
85
+ - run root:
86
+ - [/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0](/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0)
87
+ - result:
88
+ - the run finished cleanly and wrote `weights/50`, `weights/100`, `weights/150`, and `weights/200`
89
+ - `training.log` shows `# Starting training from weights: 0 to 200`
90
+
91
+ - Partial `200`-step overlap eval on the official path:
92
+ - log:
93
+ - [anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_eval.log](/workspace/reports/anybimanual_subset3_overlap_smoke200_fixpretrain_nowandb3_eval.log)
94
+ - observed result before termination:
95
+ - `coordinated_push_box`: `0/25 = 0.0`
96
+ - `coordinated_lift_ball`: `0/18 = 0.0`
97
+ - at least one explicit infeasible-waypoint episode on `coordinated_lift_ball`
98
+ - interpretation:
99
+ - the `200`-step checkpoint is decisively below the useful comparison floor
100
+ - the eval was intentionally stopped early to reclaim wall-clock for the first potentially meaningful resumed checkpoint
101
+
102
+ - Resume-to-`1000` launch validation:
103
+ - live train session now runs with:
104
+ - `framework.load_existing_weights=True`
105
+ - `framework.training_iterations=1000`
106
+ - `training.log` now shows:
107
+ - `# Starting training from weights: 200 to 1000`
108
+ - this confirms the patched resume path is working in the real official-baseline run, not just in unit tests
109
+
110
+ - First resumed progress block from the live train session:
111
+ - the stdout progress stream reached resumed step `100/800`, which corresponds to global train step `300`
112
+ - logged metrics at that point:
113
+ - loss `40.91718`
114
+ - sample time `0.093029`
115
+ - step time `14.0686`
116
+ - interpretation:
117
+ - the resumed official baseline is now past pure startup and is performing real SGD on the reused replay
118
+ - there is still no post-resume checkpoint on disk yet because the next save boundary is `weights/400`
119
+
120
+ - First post-resume checkpoint milestone:
121
+ - the run reached resumed step `200/800`, which corresponds to global train step `400`
122
+ - logged metrics at that point:
123
+ - loss `33.26684`
124
+ - sample time `0.073085`
125
+ - step time `14.3032`
126
+ - checkpoint now exists on disk at:
127
+ - [/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/weights/400](/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/weights/400)
128
+ - interpretation:
129
+ - the official overlap baseline is not flatlining during resume
130
+ - loss has dropped meaningfully from the step-`300` block, so keeping the `400 -> 1000` leg alive is justified
131
+
132
+ - Post-train eval and summary chain:
133
+ - the waiting watcher now does:
134
+ - wait for the live `200 -> 1000` train PID to exit
135
+ - run the official overlap eval at `EVAL_TYPE=last`
136
+ - summarize the resulting `seed0/eval_data.csv` against the public release `eval_data.csv`
137
+ - then rerun the direct task-routed proxy benchmark wrapper on the fixed sprint benchmark
138
+ - then run the bag-only selector specialization wrapper and compose a candidate routed summary
139
+ - eval log target:
140
+ - [/workspace/reports/anybimanual_subset3_overlap_resume1000_eval.log](/workspace/reports/anybimanual_subset3_overlap_resume1000_eval.log)
141
+ - summary log target:
142
+ - [/workspace/reports/anybimanual_subset3_overlap_resume1000_summary.log](/workspace/reports/anybimanual_subset3_overlap_resume1000_summary.log)
143
+ - summary output target:
144
+ - [/workspace/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary](/workspace/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary)
145
+ - routed proxy rerun log target:
146
+ - [/workspace/reports/task_routed_proxy_v1_rerun.log](/workspace/reports/task_routed_proxy_v1_rerun.log)
147
+ - routed proxy rerun output target:
148
+ - [/workspace/VLAarchtests/artifacts/reports/task_routed_proxy_v1_rerun](/workspace/VLAarchtests/artifacts/reports/task_routed_proxy_v1_rerun)
149
+ - bag specialization log target:
150
+ - [/workspace/reports/run_bag_selector_iter9.log](/workspace/reports/run_bag_selector_iter9.log)
151
+ - bag specialization report target:
152
+ - [/workspace/VLAarchtests/artifacts/reports/selector_finetune_v7_iter9_bag](/workspace/VLAarchtests/artifacts/reports/selector_finetune_v7_iter9_bag)
153
+ - bag-routed candidate summary target:
154
+ - [/workspace/VLAarchtests/artifacts/reports/task_routed_proxy_v2_candidate](/workspace/VLAarchtests/artifacts/reports/task_routed_proxy_v2_candidate)
155
+
156
+ ## Live Status At Record Time
157
+
158
+ - The first replay build is complete and reused locally from:
159
+ - `/workspace/baselines/AnyBimanual_overlap_replay/multi`
160
+ - The current live official-baseline leg is the resumed train:
161
+ - run root:
162
+ - [/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0](/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0)
163
+ - training log:
164
+ - [/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/training.log](/workspace/baselines/AnyBimanual_overlap_runs/peract_bc_subset3_overlap_smoke200_fixpretrain_nowandb3/PERACT_BC/seed0/training.log)
165
+ - state at record time:
166
+ - resumed from `weights/200`
167
+ - training toward `weights/1000`
168
+ - latest live progress sample reached roughly `203/800` resumed steps
169
+ - `weights/400` now exists on disk
170
+ - running on the reused local replay instead of rebuilding data
171
+ - paired watcher is waiting to run eval and then write the overlap comparison summary automatically
172
+ - The strongest current custom-task proxy controller remains:
173
+ - [task_routed_proxy_v1/summary.md](/workspace/VLAarchtests/artifacts/reports/task_routed_proxy_v1/summary.md)
174
+ - mean success `0.4867`
175
+ - foliage `0.46`, bag `0.41`, cloth `0.59`
176
+
177
+ ## Current Interpretation
178
+
179
+ - The most useful near-term internal general-task signal still comes from the recovered push-box comparator, where the current fair-step1 line beat the historical internal control.
180
+ - The official external-baseline path is now real, reproducible, and past the replay-build stage.
181
+ - The new resume fix now matters empirically as well as operationally:
182
+ - the real run is continuing from `200` to `1000` inside the same experiment directory
183
+ - no replay rebuild and no reset to the public release weights were needed
184
+ - The `200`-step smoke point is useful only as a failure-floor check. It is not a competitive baseline point.
185
+ - The next meaningful milestone for this branch is:
186
+ - completed `1000`-step resumed checkpoint,
187
+ - then immediate overlap eval on that checkpoint,
188
+ - then automatic summary against the public AnyBimanual overlap line,
189
+ - then decide whether the official baseline branch needs more budget or whether the local architecture already has enough evidence to move into direct head-to-head reporting.
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/controller_sweep_v7/controller_sweep.md ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Controller Sweep
2
+
3
+ ## model
4
+ - controller: model
5
+ - episodes: 300.000
6
+ - mean_success: 0.280
7
+ - visibility_integral: 31.968
8
+ - corridor_availability: 0.675
9
+ - reocclusion_rate: 0.014
10
+ - disturbance_cost: 0.393
11
+ - premature_retrieve_rate: 0.125
12
+ - reocclusion_after_reveal_rate: 0.547
13
+ - planner_regret: 0.091
14
+ - foliage_success: 0.390
15
+ - bag_success: 0.310
16
+ - cloth_success: 0.140
17
+
18
+ ## candidate0
19
+ - controller: candidate0
20
+ - episodes: 300.000
21
+ - mean_success: 0.200
22
+ - visibility_integral: 6.978
23
+ - corridor_availability: 0.230
24
+ - reocclusion_rate: 0.020
25
+ - disturbance_cost: 0.110
26
+ - premature_retrieve_rate: 0.151
27
+ - reocclusion_after_reveal_rate: 0.800
28
+ - planner_regret: 0.041
29
+ - foliage_success: 0.240
30
+ - bag_success: 0.220
31
+ - cloth_success: 0.140
32
+ - paired_paired_episodes_vs_model: 300.000
33
+ - paired_success_delta_vs_model: -0.080
34
+ - paired_visibility_delta_vs_model: -24.990
35
+ - paired_reocclusion_delta_vs_model: 0.006
36
+ - paired_disturbance_delta_vs_model: -0.283
37
+
38
+ ## random
39
+ - controller: random
40
+ - episodes: 300.000
41
+ - mean_success: 0.433
42
+ - visibility_integral: 19.548
43
+ - corridor_availability: 0.536
44
+ - reocclusion_rate: 0.024
45
+ - disturbance_cost: 0.334
46
+ - premature_retrieve_rate: 0.182
47
+ - reocclusion_after_reveal_rate: 0.673
48
+ - planner_regret: 0.135
49
+ - foliage_success: 0.410
50
+ - bag_success: 0.370
51
+ - cloth_success: 0.520
52
+ - paired_paired_episodes_vs_model: 300.000
53
+ - paired_success_delta_vs_model: 0.153
54
+ - paired_visibility_delta_vs_model: -12.421
55
+ - paired_reocclusion_delta_vs_model: 0.010
56
+ - paired_disturbance_delta_vs_model: -0.059
57
+
58
+ ## oracle
59
+ - controller: oracle
60
+ - episodes: 300.000
61
+ - mean_success: 0.407
62
+ - visibility_integral: 24.971
63
+ - corridor_availability: 0.698
64
+ - reocclusion_rate: 0.004
65
+ - disturbance_cost: 0.121
66
+ - premature_retrieve_rate: 0.143
67
+ - reocclusion_after_reveal_rate: 0.523
68
+ - planner_regret: 0.000
69
+ - foliage_success: 0.500
70
+ - bag_success: 0.420
71
+ - cloth_success: 0.300
72
+ - paired_paired_episodes_vs_model: 300.000
73
+ - paired_success_delta_vs_model: 0.127
74
+ - paired_visibility_delta_vs_model: -6.998
75
+ - paired_reocclusion_delta_vs_model: -0.011
76
+ - paired_disturbance_delta_vs_model: -0.272
77
+
78
+ ## scripted
79
+ - controller: scripted
80
+ - episodes: 300.000
81
+ - mean_success: 1.000
82
+ - visibility_integral: 1.691
83
+ - corridor_availability: 0.665
84
+ - reocclusion_rate: 0.000
85
+ - disturbance_cost: 0.161
86
+ - premature_retrieve_rate: 0.000
87
+ - reocclusion_after_reveal_rate: 0.000
88
+ - planner_regret: 0.000
89
+ - foliage_success: 1.000
90
+ - bag_success: 1.000
91
+ - cloth_success: 1.000
92
+ - paired_paired_episodes_vs_model: 300.000
93
+ - paired_success_delta_vs_model: 0.720
94
+ - paired_visibility_delta_vs_model: -30.277
95
+ - paired_reocclusion_delta_vs_model: -0.014
96
+ - paired_disturbance_delta_vs_model: -0.232
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "bimanual_dual_push_buttons",
3
+ "date": "2026-03-31",
4
+ "code_changes": [
5
+ "/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_retarget_utils.py",
6
+ "/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_full_arch_utils.py",
7
+ "/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_full_arch_hybrid_eval.py",
8
+ "/workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_full_arch_hybrid_eval.sh",
9
+ "/workspace/VLAarchtests/tests/test_dual_push_retarget_utils.py",
10
+ "/workspace/VLAarchtests/tests/test_dual_push_full_arch_utils.py"
11
+ ],
12
+ "verification": [
13
+ "pytest -q /workspace/VLAarchtests/tests/test_dual_push_retarget_utils.py /workspace/VLAarchtests/tests/test_dual_push_full_arch_utils.py",
14
+ "python -m py_compile /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_retarget_utils.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_full_arch_utils.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_full_arch_hybrid_eval.py",
15
+ "bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_full_arch_hybrid_eval.sh"
16
+ ],
17
+ "results": {
18
+ "elastic_scene_retarget_probe_1x120": {
19
+ "path": "/workspace/reports/dual_push_full_arch_probe_iter6_scene_ep1/summary.json",
20
+ "checkpoint": "/workspace/outputs/rlbench_true_baselines/rlbench_subset3_elastic_reveal_proxy_iter6_100demo_fair_seed17/checkpoint_best.pt",
21
+ "mean_success": 1.0,
22
+ "mean_return": 1.0,
23
+ "retrieved_episode_index": 11,
24
+ "retrieval_similarity": 0.9998629689216614
25
+ },
26
+ "full_arch_hybrid_iter6_backbone_1x120": {
27
+ "path": "/workspace/reports/dual_push_full_arch_hybrid_iter6_backbone_ep1/summary.json",
28
+ "controller_checkpoint": "/workspace/outputs/rlbench_true_baselines/rlbench_subset3_elastic_reveal_proxy_iter6_100demo_fair_seed17/checkpoint_best.pt",
29
+ "retrieval_checkpoint": "/workspace/outputs/rlbench_dual_push/rlbench_dual_push_backbone_only_clip_chunk8_weighted_seed17/checkpoint_best.pt",
30
+ "mean_success": 1.0,
31
+ "mean_return": 1.0,
32
+ "steps": 116,
33
+ "path_recoveries": 0,
34
+ "noop_fallbacks": 0,
35
+ "active_modules": [
36
+ "multiview fusion",
37
+ "observation memory",
38
+ "learned action decoder",
39
+ "learned selector shortlist",
40
+ "world model",
41
+ "planner",
42
+ "geometry tokens",
43
+ "camera-pose tokens"
44
+ ],
45
+ "first_selected_mode": "residual::maintain_opening",
46
+ "last_selected_mode": "residual::base_action"
47
+ }
48
+ }
49
+ }
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/dual_push_full_arch_hybrid_20260331/summary.md ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dual Push Full-Architecture Hybrid
2
+
3
+ - Task: `bimanual_dual_push_buttons`
4
+ - Date: `2026-03-31`
5
+
6
+ ## Code Changes
7
+
8
+ - Added delta conversion from absolute retargeted poses:
9
+ - `/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_retarget_utils.py`
10
+ - Added hybrid candidate composition utilities:
11
+ - `/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_full_arch_utils.py`
12
+ - Added full-architecture dual-push hybrid evaluator:
13
+ - `/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_full_arch_hybrid_eval.py`
14
+ - Added reproducible wrapper:
15
+ - `/workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_full_arch_hybrid_eval.sh`
16
+ - Added regression coverage:
17
+ - `/workspace/VLAarchtests/tests/test_dual_push_retarget_utils.py`
18
+ - `/workspace/VLAarchtests/tests/test_dual_push_full_arch_utils.py`
19
+
20
+ ## Verification
21
+
22
+ - `pytest -q /workspace/VLAarchtests/tests/test_dual_push_retarget_utils.py /workspace/VLAarchtests/tests/test_dual_push_full_arch_utils.py`
23
+ - `python -m py_compile /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_retarget_utils.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/dual_push_full_arch_utils.py /workspace/VLAarchtests/code/reveal_vla_bimanual/eval/run_rlbench_dual_push_full_arch_hybrid_eval.py`
24
+ - `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_dual_push_full_arch_hybrid_eval.sh`
25
+
26
+ ## Raw Results
27
+
28
+ - Elastic checkpoint retargeted-demo probe with scene retrieval and vision-only button localization, `1` episode, `120` steps:
29
+ - `/workspace/reports/dual_push_full_arch_probe_iter6_scene_ep1/summary.json`
30
+ - checkpoint: `/workspace/outputs/rlbench_true_baselines/rlbench_subset3_elastic_reveal_proxy_iter6_100demo_fair_seed17/checkpoint_best.pt`
31
+ - mean success: `1.0`
32
+ - mean return: `1.0`
33
+ - retrieved episode index: `11`
34
+ - retrieval similarity: `0.9998629689216614`
35
+ - Full-architecture hybrid eval with elastic controller checkpoint plus dual-push retrieval checkpoint, vision-only button localization, `1` episode, `120` steps:
36
+ - `/workspace/reports/dual_push_full_arch_hybrid_iter6_backbone_ep1/summary.json`
37
+ - controller checkpoint: `/workspace/outputs/rlbench_true_baselines/rlbench_subset3_elastic_reveal_proxy_iter6_100demo_fair_seed17/checkpoint_best.pt`
38
+ - retrieval checkpoint: `/workspace/outputs/rlbench_dual_push/rlbench_dual_push_backbone_only_clip_chunk8_weighted_seed17/checkpoint_best.pt`
39
+ - active modules:
40
+ - multiview fusion
41
+ - observation memory
42
+ - learned action decoder
43
+ - learned selector shortlist
44
+ - world model
45
+ - planner
46
+ - geometry tokens
47
+ - camera-pose tokens
48
+ - mean success: `1.0`
49
+ - mean return: `1.0`
50
+ - steps: `116`
51
+ - path recoveries: `0`
52
+ - noop fallbacks: `0`
53
+ - first selected mode: `residual::maintain_opening`
54
+ - last selected mode: `residual::base_action`
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proposal_alignment_diagnostics_v7/proposal_alignment_diagnostics.json ADDED
@@ -0,0 +1,718 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt",
3
+ "dataset_path": "/workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase.pt",
4
+ "teacher_macro_names_by_task": {
5
+ "foliage": [
6
+ "teacher",
7
+ "pin_canopy",
8
+ "maintain_gap",
9
+ "premature_retrieve",
10
+ "reveal_with_release",
11
+ "wrong_side_reveal",
12
+ "foliage_immediate_reocclusion",
13
+ "over_disturbance"
14
+ ],
15
+ "bag": [
16
+ "teacher",
17
+ "widen_mouth",
18
+ "maintain_mouth",
19
+ "premature_retrieve",
20
+ "reveal_with_release",
21
+ "wrong_edge_reveal",
22
+ "pin_left_rim",
23
+ "bag_fabric_probe"
24
+ ],
25
+ "cloth": [
26
+ "teacher",
27
+ "lift_edge",
28
+ "stabilize_fold",
29
+ "premature_retrieve",
30
+ "reveal_with_release",
31
+ "cloth_lift_high",
32
+ "wrong_layer_reveal",
33
+ "delayed_actor_entry"
34
+ ]
35
+ },
36
+ "overall": {
37
+ "samples": 131,
38
+ "teacher_oracle_top1_accuracy": 0.9694656488549618,
39
+ "proposal_teacher_utility_spearman": 0.9828442390658302,
40
+ "slotwise_reconstruction_mse": [
41
+ 0.02260977029800415,
42
+ 0.13588657975196838,
43
+ 0.18361078202724457,
44
+ 0.21640430390834808,
45
+ 0.12281869351863861,
46
+ 0.19468815624713898,
47
+ 0.14250919222831726,
48
+ 0.2174699604511261
49
+ ],
50
+ "slotwise_best_match_mse": [
51
+ 0.02260977029800415,
52
+ 0.07509409636259079,
53
+ 0.06449330598115921,
54
+ 0.082102470099926,
55
+ 0.07563762366771698,
56
+ 0.07175964117050171,
57
+ 0.0797775611281395,
58
+ 0.07993023842573166
59
+ ],
60
+ "diagonal_reconstruction_mse": 0.15449969470500946,
61
+ "best_match_reconstruction_mse": 0.06892558932304382,
62
+ "teacher_slot_coverage_rate": [
63
+ 1.0,
64
+ 0.07633587718009949,
65
+ 0.0,
66
+ 0.0,
67
+ 0.1679389327764511,
68
+ 0.0,
69
+ 0.007633587811142206,
70
+ 0.0
71
+ ],
72
+ "proposal_slot_teacher_confusion": [
73
+ [
74
+ 131,
75
+ 0,
76
+ 0,
77
+ 0,
78
+ 0,
79
+ 0,
80
+ 0,
81
+ 0
82
+ ],
83
+ [
84
+ 122,
85
+ 9,
86
+ 0,
87
+ 0,
88
+ 0,
89
+ 0,
90
+ 0,
91
+ 0
92
+ ],
93
+ [
94
+ 130,
95
+ 1,
96
+ 0,
97
+ 0,
98
+ 0,
99
+ 0,
100
+ 0,
101
+ 0
102
+ ],
103
+ [
104
+ 131,
105
+ 0,
106
+ 0,
107
+ 0,
108
+ 0,
109
+ 0,
110
+ 0,
111
+ 0
112
+ ],
113
+ [
114
+ 109,
115
+ 0,
116
+ 0,
117
+ 0,
118
+ 22,
119
+ 0,
120
+ 0,
121
+ 0
122
+ ],
123
+ [
124
+ 128,
125
+ 3,
126
+ 0,
127
+ 0,
128
+ 0,
129
+ 0,
130
+ 0,
131
+ 0
132
+ ],
133
+ [
134
+ 118,
135
+ 4,
136
+ 0,
137
+ 0,
138
+ 8,
139
+ 0,
140
+ 1,
141
+ 0
142
+ ],
143
+ [
144
+ 131,
145
+ 0,
146
+ 0,
147
+ 0,
148
+ 0,
149
+ 0,
150
+ 0,
151
+ 0
152
+ ]
153
+ ],
154
+ "proposal_slot_best_teacher_slot": [
155
+ {
156
+ "proposal_slot": 0,
157
+ "best_teacher_slot": 0,
158
+ "best_teacher_slot_rate": 1.0,
159
+ "support": 131
160
+ },
161
+ {
162
+ "proposal_slot": 1,
163
+ "best_teacher_slot": 0,
164
+ "best_teacher_slot_rate": 0.9312977099236641,
165
+ "support": 131
166
+ },
167
+ {
168
+ "proposal_slot": 2,
169
+ "best_teacher_slot": 0,
170
+ "best_teacher_slot_rate": 0.9923664122137404,
171
+ "support": 131
172
+ },
173
+ {
174
+ "proposal_slot": 3,
175
+ "best_teacher_slot": 0,
176
+ "best_teacher_slot_rate": 1.0,
177
+ "support": 131
178
+ },
179
+ {
180
+ "proposal_slot": 4,
181
+ "best_teacher_slot": 0,
182
+ "best_teacher_slot_rate": 0.8320610687022901,
183
+ "support": 131
184
+ },
185
+ {
186
+ "proposal_slot": 5,
187
+ "best_teacher_slot": 0,
188
+ "best_teacher_slot_rate": 0.9770992366412213,
189
+ "support": 131
190
+ },
191
+ {
192
+ "proposal_slot": 6,
193
+ "best_teacher_slot": 0,
194
+ "best_teacher_slot_rate": 0.9007633587786259,
195
+ "support": 131
196
+ },
197
+ {
198
+ "proposal_slot": 7,
199
+ "best_teacher_slot": 0,
200
+ "best_teacher_slot_rate": 1.0,
201
+ "support": 131
202
+ }
203
+ ],
204
+ "proposal_candidate_pairwise_l2": 2.6658203583637268
205
+ },
206
+ "by_task": {
207
+ "foliage": {
208
+ "samples": 43,
209
+ "teacher_oracle_top1_accuracy": 1.0,
210
+ "proposal_teacher_utility_spearman": 0.9880235200593535,
211
+ "slotwise_reconstruction_mse": [
212
+ 0.019004767760634422,
213
+ 0.1270177662372589,
214
+ 0.13285410404205322,
215
+ 0.22256922721862793,
216
+ 0.11697744578123093,
217
+ 0.18430863320827484,
218
+ 0.1230543926358223,
219
+ 0.23497486114501953
220
+ ],
221
+ "slotwise_best_match_mse": [
222
+ 0.019004767760634422,
223
+ 0.07327625900506973,
224
+ 0.05797513201832771,
225
+ 0.07962016016244888,
226
+ 0.07355079799890518,
227
+ 0.0724499523639679,
228
+ 0.07063580304384232,
229
+ 0.08340194821357727
230
+ ],
231
+ "diagonal_reconstruction_mse": 0.14509515464305878,
232
+ "best_match_reconstruction_mse": 0.06623934954404831,
233
+ "teacher_slot_coverage_rate": [
234
+ 1.0,
235
+ 0.11627907305955887,
236
+ 0.0,
237
+ 0.0,
238
+ 0.1860465109348297,
239
+ 0.0,
240
+ 0.0,
241
+ 0.0
242
+ ],
243
+ "proposal_slot_teacher_confusion": [
244
+ [
245
+ 43,
246
+ 0,
247
+ 0,
248
+ 0,
249
+ 0,
250
+ 0,
251
+ 0,
252
+ 0
253
+ ],
254
+ [
255
+ 38,
256
+ 5,
257
+ 0,
258
+ 0,
259
+ 0,
260
+ 0,
261
+ 0,
262
+ 0
263
+ ],
264
+ [
265
+ 42,
266
+ 1,
267
+ 0,
268
+ 0,
269
+ 0,
270
+ 0,
271
+ 0,
272
+ 0
273
+ ],
274
+ [
275
+ 43,
276
+ 0,
277
+ 0,
278
+ 0,
279
+ 0,
280
+ 0,
281
+ 0,
282
+ 0
283
+ ],
284
+ [
285
+ 35,
286
+ 0,
287
+ 0,
288
+ 0,
289
+ 8,
290
+ 0,
291
+ 0,
292
+ 0
293
+ ],
294
+ [
295
+ 40,
296
+ 3,
297
+ 0,
298
+ 0,
299
+ 0,
300
+ 0,
301
+ 0,
302
+ 0
303
+ ],
304
+ [
305
+ 35,
306
+ 0,
307
+ 0,
308
+ 0,
309
+ 8,
310
+ 0,
311
+ 0,
312
+ 0
313
+ ],
314
+ [
315
+ 43,
316
+ 0,
317
+ 0,
318
+ 0,
319
+ 0,
320
+ 0,
321
+ 0,
322
+ 0
323
+ ]
324
+ ],
325
+ "proposal_slot_best_teacher_slot": [
326
+ {
327
+ "proposal_slot": 0,
328
+ "best_teacher_slot": 0,
329
+ "best_teacher_slot_rate": 1.0,
330
+ "support": 43
331
+ },
332
+ {
333
+ "proposal_slot": 1,
334
+ "best_teacher_slot": 0,
335
+ "best_teacher_slot_rate": 0.8837209302325582,
336
+ "support": 43
337
+ },
338
+ {
339
+ "proposal_slot": 2,
340
+ "best_teacher_slot": 0,
341
+ "best_teacher_slot_rate": 0.9767441860465116,
342
+ "support": 43
343
+ },
344
+ {
345
+ "proposal_slot": 3,
346
+ "best_teacher_slot": 0,
347
+ "best_teacher_slot_rate": 1.0,
348
+ "support": 43
349
+ },
350
+ {
351
+ "proposal_slot": 4,
352
+ "best_teacher_slot": 0,
353
+ "best_teacher_slot_rate": 0.813953488372093,
354
+ "support": 43
355
+ },
356
+ {
357
+ "proposal_slot": 5,
358
+ "best_teacher_slot": 0,
359
+ "best_teacher_slot_rate": 0.9302325581395349,
360
+ "support": 43
361
+ },
362
+ {
363
+ "proposal_slot": 6,
364
+ "best_teacher_slot": 0,
365
+ "best_teacher_slot_rate": 0.813953488372093,
366
+ "support": 43
367
+ },
368
+ {
369
+ "proposal_slot": 7,
370
+ "best_teacher_slot": 0,
371
+ "best_teacher_slot_rate": 1.0,
372
+ "support": 43
373
+ }
374
+ ],
375
+ "proposal_candidate_pairwise_l2": 2.5722522181133893
376
+ },
377
+ "bag": {
378
+ "samples": 48,
379
+ "teacher_oracle_top1_accuracy": 1.0,
380
+ "proposal_teacher_utility_spearman": 1.0,
381
+ "slotwise_reconstruction_mse": [
382
+ 0.008700483478605747,
383
+ 0.13847370445728302,
384
+ 0.2201945185661316,
385
+ 0.2279948592185974,
386
+ 0.1253373622894287,
387
+ 0.1859302669763565,
388
+ 0.12765681743621826,
389
+ 0.22562821209430695
390
+ ],
391
+ "slotwise_best_match_mse": [
392
+ 0.008700483478605747,
393
+ 0.060360122472047806,
394
+ 0.05706041678786278,
395
+ 0.07241564244031906,
396
+ 0.061498433351516724,
397
+ 0.05977138876914978,
398
+ 0.06826537102460861,
399
+ 0.06816806644201279
400
+ ],
401
+ "diagonal_reconstruction_mse": 0.15748952329158783,
402
+ "best_match_reconstruction_mse": 0.05702998861670494,
403
+ "teacher_slot_coverage_rate": [
404
+ 1.0,
405
+ 0.0,
406
+ 0.0,
407
+ 0.0,
408
+ 0.0,
409
+ 0.0,
410
+ 0.0,
411
+ 0.0
412
+ ],
413
+ "proposal_slot_teacher_confusion": [
414
+ [
415
+ 48,
416
+ 0,
417
+ 0,
418
+ 0,
419
+ 0,
420
+ 0,
421
+ 0,
422
+ 0
423
+ ],
424
+ [
425
+ 48,
426
+ 0,
427
+ 0,
428
+ 0,
429
+ 0,
430
+ 0,
431
+ 0,
432
+ 0
433
+ ],
434
+ [
435
+ 48,
436
+ 0,
437
+ 0,
438
+ 0,
439
+ 0,
440
+ 0,
441
+ 0,
442
+ 0
443
+ ],
444
+ [
445
+ 48,
446
+ 0,
447
+ 0,
448
+ 0,
449
+ 0,
450
+ 0,
451
+ 0,
452
+ 0
453
+ ],
454
+ [
455
+ 48,
456
+ 0,
457
+ 0,
458
+ 0,
459
+ 0,
460
+ 0,
461
+ 0,
462
+ 0
463
+ ],
464
+ [
465
+ 48,
466
+ 0,
467
+ 0,
468
+ 0,
469
+ 0,
470
+ 0,
471
+ 0,
472
+ 0
473
+ ],
474
+ [
475
+ 48,
476
+ 0,
477
+ 0,
478
+ 0,
479
+ 0,
480
+ 0,
481
+ 0,
482
+ 0
483
+ ],
484
+ [
485
+ 48,
486
+ 0,
487
+ 0,
488
+ 0,
489
+ 0,
490
+ 0,
491
+ 0,
492
+ 0
493
+ ]
494
+ ],
495
+ "proposal_slot_best_teacher_slot": [
496
+ {
497
+ "proposal_slot": 0,
498
+ "best_teacher_slot": 0,
499
+ "best_teacher_slot_rate": 1.0,
500
+ "support": 48
501
+ },
502
+ {
503
+ "proposal_slot": 1,
504
+ "best_teacher_slot": 0,
505
+ "best_teacher_slot_rate": 1.0,
506
+ "support": 48
507
+ },
508
+ {
509
+ "proposal_slot": 2,
510
+ "best_teacher_slot": 0,
511
+ "best_teacher_slot_rate": 1.0,
512
+ "support": 48
513
+ },
514
+ {
515
+ "proposal_slot": 3,
516
+ "best_teacher_slot": 0,
517
+ "best_teacher_slot_rate": 1.0,
518
+ "support": 48
519
+ },
520
+ {
521
+ "proposal_slot": 4,
522
+ "best_teacher_slot": 0,
523
+ "best_teacher_slot_rate": 1.0,
524
+ "support": 48
525
+ },
526
+ {
527
+ "proposal_slot": 5,
528
+ "best_teacher_slot": 0,
529
+ "best_teacher_slot_rate": 1.0,
530
+ "support": 48
531
+ },
532
+ {
533
+ "proposal_slot": 6,
534
+ "best_teacher_slot": 0,
535
+ "best_teacher_slot_rate": 1.0,
536
+ "support": 48
537
+ },
538
+ {
539
+ "proposal_slot": 7,
540
+ "best_teacher_slot": 0,
541
+ "best_teacher_slot_rate": 1.0,
542
+ "support": 48
543
+ }
544
+ ],
545
+ "proposal_candidate_pairwise_l2": 2.8185487488905587
546
+ },
547
+ "cloth": {
548
+ "samples": 40,
549
+ "teacher_oracle_top1_accuracy": 0.9,
550
+ "proposal_teacher_utility_spearman": 0.9566895988767886,
551
+ "slotwise_reconstruction_mse": [
552
+ 0.04317628592252731,
553
+ 0.14231613278388977,
554
+ 0.19427374005317688,
555
+ 0.19586828351020813,
556
+ 0.12607567012310028,
557
+ 0.2163556069135666,
558
+ 0.18124601244926453,
559
+ 0.1888623684644699
560
+ ],
561
+ "slotwise_best_match_mse": [
562
+ 0.04317628592252731,
563
+ 0.09472905099391937,
564
+ 0.08041983097791672,
565
+ 0.0963950902223587,
566
+ 0.09484796226024628,
567
+ 0.08540350198745728,
568
+ 0.10341956466436386,
569
+ 0.09031279385089874
570
+ ],
571
+ "diagonal_reconstruction_mse": 0.16102175414562225,
572
+ "best_match_reconstruction_mse": 0.08608800917863846,
573
+ "teacher_slot_coverage_rate": [
574
+ 1.0,
575
+ 0.125,
576
+ 0.0,
577
+ 0.0,
578
+ 0.3499999940395355,
579
+ 0.0,
580
+ 0.02500000037252903,
581
+ 0.0
582
+ ],
583
+ "proposal_slot_teacher_confusion": [
584
+ [
585
+ 40,
586
+ 0,
587
+ 0,
588
+ 0,
589
+ 0,
590
+ 0,
591
+ 0,
592
+ 0
593
+ ],
594
+ [
595
+ 36,
596
+ 4,
597
+ 0,
598
+ 0,
599
+ 0,
600
+ 0,
601
+ 0,
602
+ 0
603
+ ],
604
+ [
605
+ 40,
606
+ 0,
607
+ 0,
608
+ 0,
609
+ 0,
610
+ 0,
611
+ 0,
612
+ 0
613
+ ],
614
+ [
615
+ 40,
616
+ 0,
617
+ 0,
618
+ 0,
619
+ 0,
620
+ 0,
621
+ 0,
622
+ 0
623
+ ],
624
+ [
625
+ 26,
626
+ 0,
627
+ 0,
628
+ 0,
629
+ 14,
630
+ 0,
631
+ 0,
632
+ 0
633
+ ],
634
+ [
635
+ 40,
636
+ 0,
637
+ 0,
638
+ 0,
639
+ 0,
640
+ 0,
641
+ 0,
642
+ 0
643
+ ],
644
+ [
645
+ 35,
646
+ 4,
647
+ 0,
648
+ 0,
649
+ 0,
650
+ 0,
651
+ 1,
652
+ 0
653
+ ],
654
+ [
655
+ 40,
656
+ 0,
657
+ 0,
658
+ 0,
659
+ 0,
660
+ 0,
661
+ 0,
662
+ 0
663
+ ]
664
+ ],
665
+ "proposal_slot_best_teacher_slot": [
666
+ {
667
+ "proposal_slot": 0,
668
+ "best_teacher_slot": 0,
669
+ "best_teacher_slot_rate": 1.0,
670
+ "support": 40
671
+ },
672
+ {
673
+ "proposal_slot": 1,
674
+ "best_teacher_slot": 0,
675
+ "best_teacher_slot_rate": 0.9,
676
+ "support": 40
677
+ },
678
+ {
679
+ "proposal_slot": 2,
680
+ "best_teacher_slot": 0,
681
+ "best_teacher_slot_rate": 1.0,
682
+ "support": 40
683
+ },
684
+ {
685
+ "proposal_slot": 3,
686
+ "best_teacher_slot": 0,
687
+ "best_teacher_slot_rate": 1.0,
688
+ "support": 40
689
+ },
690
+ {
691
+ "proposal_slot": 4,
692
+ "best_teacher_slot": 0,
693
+ "best_teacher_slot_rate": 0.65,
694
+ "support": 40
695
+ },
696
+ {
697
+ "proposal_slot": 5,
698
+ "best_teacher_slot": 0,
699
+ "best_teacher_slot_rate": 1.0,
700
+ "support": 40
701
+ },
702
+ {
703
+ "proposal_slot": 6,
704
+ "best_teacher_slot": 0,
705
+ "best_teacher_slot_rate": 0.875,
706
+ "support": 40
707
+ },
708
+ {
709
+ "proposal_slot": 7,
710
+ "best_teacher_slot": 0,
711
+ "best_teacher_slot_rate": 1.0,
712
+ "support": 40
713
+ }
714
+ ],
715
+ "proposal_candidate_pairwise_l2": 2.583132040500641
716
+ }
717
+ }
718
+ }
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proposal_alignment_diagnostics_v7/proposal_alignment_diagnostics.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Proposal Alignment Diagnostics
2
+
3
+ - checkpoint: `/workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt`
4
+ - dataset: `/workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase.pt`
5
+ - samples: 131
6
+
7
+ ## Overall
8
+ - teacher-oracle top1 accuracy: 0.969
9
+ - proposal/teacher utility spearman: 0.983
10
+ - diagonal reconstruction MSE: 0.1545
11
+ - best-match reconstruction MSE: 0.0689
12
+ - proposal candidate pairwise L2: 2.6658
13
+
14
+ ## By Task
15
+ ### foliage
16
+ - samples: 43
17
+ - teacher-oracle top1 accuracy: 1.000
18
+ - proposal/teacher utility spearman: 0.988
19
+ - diagonal reconstruction MSE: 0.1451
20
+ - best-match reconstruction MSE: 0.0662
21
+ - proposal candidate pairwise L2: 2.5723
22
+ - dominant slot alignment:
23
+ proposal slot 0 -> teacher slot 0 (teacher), rate 1.000
24
+ proposal slot 1 -> teacher slot 0 (teacher), rate 0.884
25
+ proposal slot 2 -> teacher slot 0 (teacher), rate 0.977
26
+ proposal slot 3 -> teacher slot 0 (teacher), rate 1.000
27
+ proposal slot 4 -> teacher slot 0 (teacher), rate 0.814
28
+ proposal slot 5 -> teacher slot 0 (teacher), rate 0.930
29
+ proposal slot 6 -> teacher slot 0 (teacher), rate 0.814
30
+ proposal slot 7 -> teacher slot 0 (teacher), rate 1.000
31
+
32
+ ### bag
33
+ - samples: 48
34
+ - teacher-oracle top1 accuracy: 1.000
35
+ - proposal/teacher utility spearman: 1.000
36
+ - diagonal reconstruction MSE: 0.1575
37
+ - best-match reconstruction MSE: 0.0570
38
+ - proposal candidate pairwise L2: 2.8185
39
+ - dominant slot alignment:
40
+ proposal slot 0 -> teacher slot 0 (teacher), rate 1.000
41
+ proposal slot 1 -> teacher slot 0 (teacher), rate 1.000
42
+ proposal slot 2 -> teacher slot 0 (teacher), rate 1.000
43
+ proposal slot 3 -> teacher slot 0 (teacher), rate 1.000
44
+ proposal slot 4 -> teacher slot 0 (teacher), rate 1.000
45
+ proposal slot 5 -> teacher slot 0 (teacher), rate 1.000
46
+ proposal slot 6 -> teacher slot 0 (teacher), rate 1.000
47
+ proposal slot 7 -> teacher slot 0 (teacher), rate 1.000
48
+
49
+ ### cloth
50
+ - samples: 40
51
+ - teacher-oracle top1 accuracy: 0.900
52
+ - proposal/teacher utility spearman: 0.957
53
+ - diagonal reconstruction MSE: 0.1610
54
+ - best-match reconstruction MSE: 0.0861
55
+ - proposal candidate pairwise L2: 2.5831
56
+ - dominant slot alignment:
57
+ proposal slot 0 -> teacher slot 0 (teacher), rate 1.000
58
+ proposal slot 1 -> teacher slot 0 (teacher), rate 0.900
59
+ proposal slot 2 -> teacher slot 0 (teacher), rate 1.000
60
+ proposal slot 3 -> teacher slot 0 (teacher), rate 1.000
61
+ proposal slot 4 -> teacher slot 0 (teacher), rate 0.650
62
+ proposal slot 5 -> teacher slot 0 (teacher), rate 1.000
63
+ proposal slot 6 -> teacher slot 0 (teacher), rate 0.875
64
+ proposal slot 7 -> teacher slot 0 (teacher), rate 1.000
65
+
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proxy_diagnostics_v7/proxy_diagnostics.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "planner_top1_accuracy": 1.0,
3
+ "planner_regret": 0.0,
4
+ "planner_score_utility_spearman": 0.5312977433204651,
5
+ "risk_calibration_mse": 0.009349034167826176,
6
+ "role_collapse_rate": 0.0,
7
+ "proposal_diversity": 0.13560470938682556,
8
+ "left_right_equivariance_error": 0.0006655894565697321,
9
+ "belief_calibration_brier": 0.0033960030414164066,
10
+ "reocclusion_calibration_brier": 0.23249056935310364,
11
+ "support_stability_mae": 0.031876545399427414,
12
+ "clearance_auc": 0.9635915460321444,
13
+ "memory_write_rate": 0.013606361113488674,
14
+ "memory_saturation": 0.24346540868282318,
15
+ "num_samples": 131
16
+ }
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/real_baseline_compare_v7_full/reveal_benchmark.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## baseline_rgbd_stage3
4
+ - controller: model
5
+ - checkpoint: /workspace/VLAarchtests_hf_outputs/artifacts/outputs/r3d/proxy_interaction_r3d_stage3_clip_rgbd_seed17/checkpoint_best.pt
6
+ - episodes: 300.000
7
+ - mean_success: 0.310
8
+ - visibility_integral: 16.799
9
+ - corridor_availability: 0.440
10
+ - reocclusion_rate: 0.034
11
+ - disturbance_cost: 0.171
12
+ - premature_retrieve_rate: 0.423
13
+ - reocclusion_after_reveal_rate: 0.533
14
+ - planner_regret: 0.025
15
+ - foliage_success: 0.210
16
+ - bag_success: 0.150
17
+ - cloth_success: 0.570
18
+
19
+ ## iter5_selector
20
+ - controller: model
21
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter5_seed17/checkpoint_best.pt
22
+ - episodes: 300.000
23
+ - mean_success: 0.450
24
+ - visibility_integral: 40.492
25
+ - corridor_availability: 0.880
26
+ - reocclusion_rate: 0.006
27
+ - disturbance_cost: 0.454
28
+ - premature_retrieve_rate: 0.107
29
+ - reocclusion_after_reveal_rate: 0.260
30
+ - planner_regret: 0.116
31
+ - foliage_success: 0.440
32
+ - bag_success: 0.400
33
+ - cloth_success: 0.510
34
+ - paired_paired_episodes_vs_baseline_rgbd_stage3: 300.000
35
+ - paired_success_delta_vs_baseline_rgbd_stage3: 0.140
36
+ - paired_visibility_delta_vs_baseline_rgbd_stage3: 23.692
37
+ - paired_reocclusion_delta_vs_baseline_rgbd_stage3: -0.028
38
+ - paired_disturbance_delta_vs_baseline_rgbd_stage3: 0.283
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/summary.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "anybimanual_local_overlap_floor": {
3
+ "path": "/workspace/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json",
4
+ "step": 1000,
5
+ "mean_success": 0.16,
6
+ "per_task_success": {
7
+ "coordinated_push_box": 0.0,
8
+ "coordinated_lift_ball": 0.0,
9
+ "dual_push_buttons": 0.48
10
+ }
11
+ },
12
+ "anybimanual_public_best_overlap": {
13
+ "path": "/workspace/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json",
14
+ "step": 60000,
15
+ "mean_success": 0.6933333333333334,
16
+ "per_task_success": {
17
+ "coordinated_push_box": 0.8,
18
+ "coordinated_lift_ball": 0.32,
19
+ "dual_push_buttons": 0.96
20
+ }
21
+ },
22
+ "stage1_frozen": {
23
+ "path": "/workspace/reports/rvt_overlap_branch_fixedbounds_20260330/evals/rlbench_subset3_backbone_only_rvt_100demo_frozen_fixedbounds_seed17_noplan_split/rollout_eval.json",
24
+ "checkpoint": "/workspace/outputs/rlbench_rvt_branch/rlbench_subset3_backbone_only_rvt_100demo_frozen_fixedbounds_seed17/checkpoint_best.pt",
25
+ "mean_success": 0.0,
26
+ "per_task_success": {
27
+ "bimanual_push_box": 0.0,
28
+ "bimanual_lift_ball": 0.0,
29
+ "bimanual_dual_push_buttons": 0.0
30
+ },
31
+ "per_task_return": {
32
+ "bimanual_push_box": 0.0,
33
+ "bimanual_lift_ball": 0.0,
34
+ "bimanual_dual_push_buttons": 0.0
35
+ }
36
+ },
37
+ "stage2_unfreeze_top2": null,
38
+ "gates": {
39
+ "stage1_clears_local_floor": false,
40
+ "stage2_clears_local_floor": false
41
+ }
42
+ }
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_fixedbounds_20260330/summary.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RVT Overlap Branch Summary
2
+
3
+ ## External Anchors
4
+
5
+ - anybimanual_local_overlap_floor: step=`1000`, mean_success=`0.160`
6
+ - anybimanual_public_best_overlap: step=`60000`, mean_success=`0.693`
7
+
8
+ ## RVT Runs
9
+
10
+ - stage1_frozen: mean_success=`0.000`, path=`/workspace/reports/rvt_overlap_branch_fixedbounds_20260330/evals/rlbench_subset3_backbone_only_rvt_100demo_frozen_fixedbounds_seed17_noplan_split/rollout_eval.json`
11
+ - stage2_unfreeze_top2: `not_run`
12
+
13
+ ## Gates
14
+
15
+ - stage1_clears_local_floor: `False`
16
+ - stage2_clears_local_floor: `False`
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_full/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_full/reveal_benchmark.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## scripted
4
+ - controller: scripted
5
+ - checkpoint: none
6
+ - episodes: 300.000
7
+ - mean_success: 1.000
8
+ - visibility_integral: 1.691
9
+ - corridor_availability: 0.665
10
+ - reocclusion_rate: 0.000
11
+ - disturbance_cost: 0.161
12
+ - premature_retrieve_rate: 0.000
13
+ - reocclusion_after_reveal_rate: 0.000
14
+ - planner_regret: 0.000
15
+ - foliage_success: 1.000
16
+ - bag_success: 1.000
17
+ - cloth_success: 1.000
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_smoke/reveal_benchmark.json ADDED
@@ -0,0 +1,566 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "benchmark_config": {
3
+ "benchmark_mode": "smoke",
4
+ "controller": "scripted",
5
+ "ablation": null,
6
+ "proxies": [
7
+ "foliage_proxy",
8
+ "bag_proxy",
9
+ "cloth_proxy"
10
+ ],
11
+ "chunk_commit_steps": 0,
12
+ "benchmark_spec_path": "/workspace/VLAarchtests/artifacts/generated_configs/reveal_proxy_sprint_benchmark_v7.json",
13
+ "episodes": 6,
14
+ "base_config": "/workspace/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_base.yaml"
15
+ },
16
+ "scripted": {
17
+ "per_task_success": {
18
+ "foliage": 1.0,
19
+ "bag": 1.0,
20
+ "cloth": 1.0
21
+ },
22
+ "mean_success": 1.0,
23
+ "per_stress_success": {
24
+ "nominal": 1.0
25
+ },
26
+ "per_difficulty_success": {
27
+ "medium": 1.0
28
+ },
29
+ "task_stress_success": {
30
+ "foliage": {
31
+ "nominal": 1.0
32
+ },
33
+ "bag": {
34
+ "nominal": 1.0
35
+ },
36
+ "cloth": {
37
+ "nominal": 1.0
38
+ }
39
+ },
40
+ "visibility_integral": 1.7259430487950642,
41
+ "corridor_availability": 0.7083333432674408,
42
+ "reocclusion_rate": 0.0,
43
+ "disturbance_cost": 0.1390303730945823,
44
+ "premature_retrieve_rate": 0.0,
45
+ "reocclusion_after_reveal_rate": 0.0,
46
+ "planner_regret": 0.0,
47
+ "task_specific_metrics": {
48
+ "gap_width": 0.04750262860390331,
49
+ "damage_proxy": 0.1390303730945823,
50
+ "actor_feasibility_floor": 0.0,
51
+ "mouth_aperture": 0.2505193983474954,
52
+ "hold_persistence": 0.9572114593639115,
53
+ "rim_slip_rate": 0.3306301846990871,
54
+ "insertable_corridor": 1.0,
55
+ "fold_preservation": 0.2847869507479139,
56
+ "layer_separation_quality": 0.1776304892024866,
57
+ "top_layer_stability": 0.9572114593639115,
58
+ "lift_too_high_rate": 0.0
59
+ },
60
+ "checkpoint": null,
61
+ "shape_mismatch_keys": [],
62
+ "episode_records": [
63
+ {
64
+ "proxy_name": "foliage_proxy",
65
+ "task_name": "foliage",
66
+ "task_id": 0,
67
+ "stress_slice": "nominal",
68
+ "difficulty_bin": "medium",
69
+ "seed": 0,
70
+ "episode_index": 0,
71
+ "controller": "scripted",
72
+ "resolved_task_name": "foliage",
73
+ "success": 1.0,
74
+ "visibility_integral": 1.4501793384552002,
75
+ "corridor_availability": 0.6666666865348816,
76
+ "reocclusion_rate": 0.0,
77
+ "disturbance_cost": 0.09915829507599976,
78
+ "premature_retrieve_rate": 0.0,
79
+ "reocclusion_after_reveal": 0.0,
80
+ "planner_regret": 0.0,
81
+ "gap_width": 0.1280654333918135,
82
+ "damage_proxy": 0.09915829507599976,
83
+ "actor_feasibility_floor": 0.0,
84
+ "mouth_aperture": 0.0,
85
+ "hold_persistence": 0.9918578793641878,
86
+ "rim_slip_rate": 0.36158549250089017,
87
+ "insertable_corridor": 1.0,
88
+ "fold_preservation": 0.0,
89
+ "layer_separation_quality": 0.0,
90
+ "top_layer_stability": 0.9918578793641878,
91
+ "lift_too_high_rate": 0.0,
92
+ "step_trace": [
93
+ {
94
+ "step_index": 1,
95
+ "visibility": 0.48582570303966377,
96
+ "support_mode": 0,
97
+ "corridor_available": 1.0,
98
+ "reocclusion_risk": 0.033686798271736935,
99
+ "disturbance_cost": 0.09624799506210555,
100
+ "chosen_candidate_family": "scripted_teacher",
101
+ "chosen_candidate_index": 0,
102
+ "task_name": "foliage",
103
+ "stress_slice": "nominal",
104
+ "difficulty_bin": "medium"
105
+ },
106
+ {
107
+ "step_index": 2,
108
+ "visibility": 0.7367230437799538,
109
+ "support_mode": 0,
110
+ "corridor_available": 1.0,
111
+ "reocclusion_risk": 0.05106967923093351,
112
+ "disturbance_cost": 0.14694608592714536,
113
+ "chosen_candidate_family": "scripted_teacher",
114
+ "chosen_candidate_index": 0,
115
+ "task_name": "foliage",
116
+ "stress_slice": "nominal",
117
+ "difficulty_bin": "medium"
118
+ }
119
+ ],
120
+ "chosen_candidate_families": [
121
+ "scripted_teacher"
122
+ ],
123
+ "episode_metadata": {
124
+ "proxy_name": "foliage_proxy",
125
+ "task_name": "foliage",
126
+ "task_id": 0,
127
+ "stress_slice": "nominal",
128
+ "difficulty_bin": "medium",
129
+ "camera_pose_jitter": 0.0,
130
+ "focal_jitter": 0.0,
131
+ "lateral_skew": 0.0,
132
+ "reocclusion_bias": 0.0,
133
+ "closure_scale": 1.0,
134
+ "disturbance_gain_scale": 1.0,
135
+ "corridor_scale": 1.0,
136
+ "support_stability_penalty": 0.0,
137
+ "collateral_bias": 0.0
138
+ }
139
+ },
140
+ {
141
+ "proxy_name": "foliage_proxy",
142
+ "task_name": "foliage",
143
+ "task_id": 0,
144
+ "stress_slice": "nominal",
145
+ "difficulty_bin": "medium",
146
+ "seed": 1,
147
+ "episode_index": 1,
148
+ "controller": "scripted",
149
+ "resolved_task_name": "foliage",
150
+ "success": 1.0,
151
+ "visibility_integral": 1.9396257400512695,
152
+ "corridor_availability": 0.75,
153
+ "reocclusion_rate": 0.0,
154
+ "disturbance_cost": 0.19006885791579062,
155
+ "premature_retrieve_rate": 0.0,
156
+ "reocclusion_after_reveal": 0.0,
157
+ "planner_regret": 0.0,
158
+ "gap_width": 0.1569503382316063,
159
+ "damage_proxy": 0.19006885791579062,
160
+ "actor_feasibility_floor": 0.0,
161
+ "mouth_aperture": 0.0,
162
+ "hold_persistence": 0.9690937340076613,
163
+ "rim_slip_rate": 0.30506012136432714,
164
+ "insertable_corridor": 1.0,
165
+ "fold_preservation": 0.0,
166
+ "layer_separation_quality": 0.0,
167
+ "top_layer_stability": 0.9690937340076613,
168
+ "lift_too_high_rate": 0.0,
169
+ "step_trace": [
170
+ {
171
+ "step_index": 1,
172
+ "visibility": 0.3652986439816591,
173
+ "support_mode": 0,
174
+ "corridor_available": 1.0,
175
+ "reocclusion_risk": 0.05512812322244198,
176
+ "disturbance_cost": 0.15750892349269138,
177
+ "chosen_candidate_family": "scripted_teacher",
178
+ "chosen_candidate_index": 0,
179
+ "task_name": "foliage",
180
+ "stress_slice": "nominal",
181
+ "difficulty_bin": "medium"
182
+ },
183
+ {
184
+ "step_index": 2,
185
+ "visibility": 0.6124352318551509,
186
+ "support_mode": 0,
187
+ "corridor_available": 1.0,
188
+ "reocclusion_risk": 0.07228509740440894,
189
+ "disturbance_cost": 0.2065288497268827,
190
+ "chosen_candidate_family": "scripted_teacher",
191
+ "chosen_candidate_index": 0,
192
+ "task_name": "foliage",
193
+ "stress_slice": "nominal",
194
+ "difficulty_bin": "medium"
195
+ },
196
+ {
197
+ "step_index": 3,
198
+ "visibility": 0.8506532259033559,
199
+ "support_mode": 0,
200
+ "corridor_available": 1.0,
201
+ "reocclusion_risk": 0.09282726483045785,
202
+ "disturbance_cost": 0.2806959257742542,
203
+ "chosen_candidate_family": "scripted_teacher",
204
+ "chosen_candidate_index": 0,
205
+ "task_name": "foliage",
206
+ "stress_slice": "nominal",
207
+ "difficulty_bin": "medium"
208
+ }
209
+ ],
210
+ "chosen_candidate_families": [
211
+ "scripted_teacher"
212
+ ],
213
+ "episode_metadata": {
214
+ "proxy_name": "foliage_proxy",
215
+ "task_name": "foliage",
216
+ "task_id": 0,
217
+ "stress_slice": "nominal",
218
+ "difficulty_bin": "medium",
219
+ "camera_pose_jitter": 0.0,
220
+ "focal_jitter": 0.0,
221
+ "lateral_skew": 0.0,
222
+ "reocclusion_bias": 0.0,
223
+ "closure_scale": 1.0,
224
+ "disturbance_gain_scale": 1.0,
225
+ "corridor_scale": 1.0,
226
+ "support_stability_penalty": 0.0,
227
+ "collateral_bias": 0.0
228
+ }
229
+ },
230
+ {
231
+ "proxy_name": "bag_proxy",
232
+ "task_name": "bag",
233
+ "task_id": 1,
234
+ "stress_slice": "nominal",
235
+ "difficulty_bin": "medium",
236
+ "seed": 10000,
237
+ "episode_index": 0,
238
+ "controller": "scripted",
239
+ "resolved_task_name": "bag",
240
+ "success": 1.0,
241
+ "visibility_integral": 2.222443103790283,
242
+ "corridor_availability": 0.75,
243
+ "reocclusion_rate": 0.0,
244
+ "disturbance_cost": 0.13910771782752218,
245
+ "premature_retrieve_rate": 0.0,
246
+ "reocclusion_after_reveal": 0.0,
247
+ "planner_regret": 0.0,
248
+ "gap_width": 0.0,
249
+ "damage_proxy": 0.13910771782752218,
250
+ "actor_feasibility_floor": 0.0,
251
+ "mouth_aperture": 0.741060188586711,
252
+ "hold_persistence": 0.9364245639602276,
253
+ "rim_slip_rate": 0.2922986490934589,
254
+ "insertable_corridor": 1.0,
255
+ "fold_preservation": 0.0,
256
+ "layer_separation_quality": 0.0,
257
+ "top_layer_stability": 0.9364245639602276,
258
+ "lift_too_high_rate": 0.0,
259
+ "step_trace": [
260
+ {
261
+ "step_index": 1,
262
+ "visibility": 0.44712191590268047,
263
+ "support_mode": 0,
264
+ "corridor_available": 1.0,
265
+ "reocclusion_risk": 0.03871004013416583,
266
+ "disturbance_cost": 0.11060011466904525,
267
+ "chosen_candidate_family": "scripted_teacher",
268
+ "chosen_candidate_index": 0,
269
+ "task_name": "bag",
270
+ "stress_slice": "nominal",
271
+ "difficulty_bin": "medium"
272
+ },
273
+ {
274
+ "step_index": 2,
275
+ "visibility": 0.6701348483026268,
276
+ "support_mode": 0,
277
+ "corridor_available": 1.0,
278
+ "reocclusion_risk": 0.05604201435785605,
279
+ "disturbance_cost": 0.16012004102244587,
280
+ "chosen_candidate_family": "scripted_teacher",
281
+ "chosen_candidate_index": 0,
282
+ "task_name": "bag",
283
+ "stress_slice": "nominal",
284
+ "difficulty_bin": "medium"
285
+ },
286
+ {
287
+ "step_index": 3,
288
+ "visibility": 0.888034440204704,
289
+ "support_mode": 0,
290
+ "corridor_available": 1.0,
291
+ "reocclusion_risk": 0.0744425418818137,
292
+ "disturbance_cost": 0.2175777918921189,
293
+ "chosen_candidate_family": "scripted_teacher",
294
+ "chosen_candidate_index": 0,
295
+ "task_name": "bag",
296
+ "stress_slice": "nominal",
297
+ "difficulty_bin": "medium"
298
+ }
299
+ ],
300
+ "chosen_candidate_families": [
301
+ "scripted_teacher"
302
+ ],
303
+ "episode_metadata": {
304
+ "proxy_name": "bag_proxy",
305
+ "task_name": "bag",
306
+ "task_id": 1,
307
+ "stress_slice": "nominal",
308
+ "difficulty_bin": "medium",
309
+ "camera_pose_jitter": 0.0,
310
+ "focal_jitter": 0.0,
311
+ "lateral_skew": 0.0,
312
+ "reocclusion_bias": 0.0,
313
+ "closure_scale": 1.0,
314
+ "disturbance_gain_scale": 1.0,
315
+ "corridor_scale": 1.0,
316
+ "support_stability_penalty": 0.0,
317
+ "collateral_bias": 0.0
318
+ }
319
+ },
320
+ {
321
+ "proxy_name": "bag_proxy",
322
+ "task_name": "bag",
323
+ "task_id": 1,
324
+ "stress_slice": "nominal",
325
+ "difficulty_bin": "medium",
326
+ "seed": 10001,
327
+ "episode_index": 1,
328
+ "controller": "scripted",
329
+ "resolved_task_name": "bag",
330
+ "success": 1.0,
331
+ "visibility_integral": 2.2613422870635986,
332
+ "corridor_availability": 0.75,
333
+ "reocclusion_rate": 0.0,
334
+ "disturbance_cost": 0.15830948550161264,
335
+ "premature_retrieve_rate": 0.0,
336
+ "reocclusion_after_reveal": 0.0,
337
+ "planner_regret": 0.0,
338
+ "gap_width": 0.0,
339
+ "damage_proxy": 0.15830948550161264,
340
+ "actor_feasibility_floor": 0.0,
341
+ "mouth_aperture": 0.7620562014982616,
342
+ "hold_persistence": 0.9274478323003021,
343
+ "rim_slip_rate": 0.297251848429244,
344
+ "insertable_corridor": 1.0,
345
+ "fold_preservation": 0.0,
346
+ "layer_separation_quality": 0.0,
347
+ "top_layer_stability": 0.9274478323003021,
348
+ "lift_too_high_rate": 0.0,
349
+ "step_trace": [
350
+ {
351
+ "step_index": 1,
352
+ "visibility": 0.45762398304566504,
353
+ "support_mode": 0,
354
+ "corridor_available": 1.0,
355
+ "reocclusion_risk": 0.0451918291732286,
356
+ "disturbance_cost": 0.1291195119235103,
357
+ "chosen_candidate_family": "scripted_teacher",
358
+ "chosen_candidate_index": 0,
359
+ "task_name": "bag",
360
+ "stress_slice": "nominal",
361
+ "difficulty_bin": "medium"
362
+ },
363
+ {
364
+ "step_index": 2,
365
+ "visibility": 0.6793868561309351,
366
+ "support_mode": 0,
367
+ "corridor_available": 1.0,
368
+ "reocclusion_risk": 0.06252380339691882,
369
+ "disturbance_cost": 0.17863943827691092,
370
+ "chosen_candidate_family": "scripted_teacher",
371
+ "chosen_candidate_index": 0,
372
+ "task_name": "bag",
373
+ "stress_slice": "nominal",
374
+ "difficulty_bin": "medium"
375
+ },
376
+ {
377
+ "step_index": 3,
378
+ "visibility": 0.8954275043439008,
379
+ "support_mode": 0,
380
+ "corridor_available": 1.0,
381
+ "reocclusion_risk": 0.08129176114682861,
382
+ "disturbance_cost": 0.23882667082508555,
383
+ "chosen_candidate_family": "scripted_teacher",
384
+ "chosen_candidate_index": 0,
385
+ "task_name": "bag",
386
+ "stress_slice": "nominal",
387
+ "difficulty_bin": "medium"
388
+ }
389
+ ],
390
+ "chosen_candidate_families": [
391
+ "scripted_teacher"
392
+ ],
393
+ "episode_metadata": {
394
+ "proxy_name": "bag_proxy",
395
+ "task_name": "bag",
396
+ "task_id": 1,
397
+ "stress_slice": "nominal",
398
+ "difficulty_bin": "medium",
399
+ "camera_pose_jitter": 0.0,
400
+ "focal_jitter": 0.0,
401
+ "lateral_skew": 0.0,
402
+ "reocclusion_bias": 0.0,
403
+ "closure_scale": 1.0,
404
+ "disturbance_gain_scale": 1.0,
405
+ "corridor_scale": 1.0,
406
+ "support_stability_penalty": 0.0,
407
+ "collateral_bias": 0.0
408
+ }
409
+ },
410
+ {
411
+ "proxy_name": "cloth_proxy",
412
+ "task_name": "cloth",
413
+ "task_id": 2,
414
+ "stress_slice": "nominal",
415
+ "difficulty_bin": "medium",
416
+ "seed": 20000,
417
+ "episode_index": 0,
418
+ "controller": "scripted",
419
+ "resolved_task_name": "cloth",
420
+ "success": 1.0,
421
+ "visibility_integral": 1.315584659576416,
422
+ "corridor_availability": 0.6666666865348816,
423
+ "reocclusion_rate": 0.0,
424
+ "disturbance_cost": 0.12976740198650083,
425
+ "premature_retrieve_rate": 0.0,
426
+ "reocclusion_after_reveal": 0.0,
427
+ "planner_regret": 0.0,
428
+ "gap_width": 0.0,
429
+ "damage_proxy": 0.12976740198650083,
430
+ "actor_feasibility_floor": 0.0,
431
+ "mouth_aperture": 0.0,
432
+ "hold_persistence": 0.956672084038026,
433
+ "rim_slip_rate": 0.36507675439044956,
434
+ "insertable_corridor": 1.0,
435
+ "fold_preservation": 0.8460247236628703,
436
+ "layer_separation_quality": 0.5584220826257553,
437
+ "top_layer_stability": 0.956672084038026,
438
+ "lift_too_high_rate": 0.0,
439
+ "step_trace": [
440
+ {
441
+ "step_index": 1,
442
+ "visibility": 0.44091287317149475,
443
+ "support_mode": 0,
444
+ "corridor_available": 1.0,
445
+ "reocclusion_risk": 0.043451471119726,
446
+ "disturbance_cost": 0.12414706034207429,
447
+ "chosen_candidate_family": "scripted_teacher",
448
+ "chosen_candidate_index": 0,
449
+ "task_name": "cloth",
450
+ "stress_slice": "nominal",
451
+ "difficulty_bin": "medium"
452
+ },
453
+ {
454
+ "step_index": 2,
455
+ "visibility": 0.6914333836220553,
456
+ "support_mode": 0,
457
+ "corridor_available": 1.0,
458
+ "reocclusion_risk": 0.051778792051622516,
459
+ "disturbance_cost": 0.15397527633712968,
460
+ "chosen_candidate_family": "scripted_teacher",
461
+ "chosen_candidate_index": 0,
462
+ "task_name": "cloth",
463
+ "stress_slice": "nominal",
464
+ "difficulty_bin": "medium"
465
+ }
466
+ ],
467
+ "chosen_candidate_families": [
468
+ "scripted_teacher"
469
+ ],
470
+ "episode_metadata": {
471
+ "proxy_name": "cloth_proxy",
472
+ "task_name": "cloth",
473
+ "task_id": 2,
474
+ "stress_slice": "nominal",
475
+ "difficulty_bin": "medium",
476
+ "camera_pose_jitter": 0.0,
477
+ "focal_jitter": 0.0,
478
+ "lateral_skew": 0.0,
479
+ "reocclusion_bias": 0.0,
480
+ "closure_scale": 1.0,
481
+ "disturbance_gain_scale": 1.0,
482
+ "corridor_scale": 1.0,
483
+ "support_stability_penalty": 0.0,
484
+ "collateral_bias": 0.0
485
+ }
486
+ },
487
+ {
488
+ "proxy_name": "cloth_proxy",
489
+ "task_name": "cloth",
490
+ "task_id": 2,
491
+ "stress_slice": "nominal",
492
+ "difficulty_bin": "medium",
493
+ "seed": 20001,
494
+ "episode_index": 1,
495
+ "controller": "scripted",
496
+ "resolved_task_name": "cloth",
497
+ "success": 1.0,
498
+ "visibility_integral": 1.1664831638336182,
499
+ "corridor_availability": 0.6666666865348816,
500
+ "reocclusion_rate": 0.0,
501
+ "disturbance_cost": 0.11777048026006785,
502
+ "premature_retrieve_rate": 0.0,
503
+ "reocclusion_after_reveal": 0.0,
504
+ "planner_regret": 0.0,
505
+ "gap_width": 0.0,
506
+ "damage_proxy": 0.11777048026006785,
507
+ "actor_feasibility_floor": 0.0,
508
+ "mouth_aperture": 0.0,
509
+ "hold_persistence": 0.9617726625130647,
510
+ "rim_slip_rate": 0.3625082424161528,
511
+ "insertable_corridor": 1.0,
512
+ "fold_preservation": 0.8626969808246128,
513
+ "layer_separation_quality": 0.5073608525891643,
514
+ "top_layer_stability": 0.9617726625130647,
515
+ "lift_too_high_rate": 0.0,
516
+ "step_trace": [
517
+ {
518
+ "step_index": 1,
519
+ "visibility": 0.39069084905965384,
520
+ "support_mode": 0,
521
+ "corridor_available": 1.0,
522
+ "reocclusion_risk": 0.04007073221665362,
523
+ "disturbance_cost": 0.11448780633329608,
524
+ "chosen_candidate_family": "scripted_teacher",
525
+ "chosen_candidate_index": 0,
526
+ "task_name": "cloth",
527
+ "stress_slice": "nominal",
528
+ "difficulty_bin": "medium"
529
+ },
530
+ {
531
+ "step_index": 2,
532
+ "visibility": 0.6434277983206992,
533
+ "support_mode": 0,
534
+ "corridor_available": 1.0,
535
+ "reocclusion_risk": 0.047453995031804946,
536
+ "disturbance_cost": 0.13730301917538718,
537
+ "chosen_candidate_family": "scripted_teacher",
538
+ "chosen_candidate_index": 0,
539
+ "task_name": "cloth",
540
+ "stress_slice": "nominal",
541
+ "difficulty_bin": "medium"
542
+ }
543
+ ],
544
+ "chosen_candidate_families": [
545
+ "scripted_teacher"
546
+ ],
547
+ "episode_metadata": {
548
+ "proxy_name": "cloth_proxy",
549
+ "task_name": "cloth",
550
+ "task_id": 2,
551
+ "stress_slice": "nominal",
552
+ "difficulty_bin": "medium",
553
+ "camera_pose_jitter": 0.0,
554
+ "focal_jitter": 0.0,
555
+ "lateral_skew": 0.0,
556
+ "reocclusion_bias": 0.0,
557
+ "closure_scale": 1.0,
558
+ "disturbance_gain_scale": 1.0,
559
+ "corridor_scale": 1.0,
560
+ "support_stability_penalty": 0.0,
561
+ "collateral_bias": 0.0
562
+ }
563
+ }
564
+ ]
565
+ }
566
+ }
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/scripted_smoke/reveal_benchmark.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## scripted
4
+ - controller: scripted
5
+ - checkpoint: none
6
+ - episodes: 6.000
7
+ - mean_success: 1.000
8
+ - visibility_integral: 1.726
9
+ - corridor_availability: 0.708
10
+ - reocclusion_rate: 0.000
11
+ - disturbance_cost: 0.139
12
+ - premature_retrieve_rate: 0.000
13
+ - reocclusion_after_reveal_rate: 0.000
14
+ - planner_regret: 0.000
15
+ - foliage_success: 1.000
16
+ - bag_success: 1.000
17
+ - cloth_success: 1.000
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_factorization_v7/fullset_planner_score_off/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_factorization_v7/fullset_planner_score_off/reveal_benchmark.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## full_model
4
+ - controller: model
5
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt
6
+ - episodes: 300.000
7
+ - mean_success: 0.397
8
+ - visibility_integral: 42.366
9
+ - corridor_availability: 0.903
10
+ - reocclusion_rate: 0.000
11
+ - disturbance_cost: 0.548
12
+ - premature_retrieve_rate: 0.104
13
+ - reocclusion_after_reveal_rate: 0.000
14
+ - planner_regret: 0.110
15
+ - foliage_success: 0.390
16
+ - bag_success: 0.390
17
+ - cloth_success: 0.410