Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.json +15 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.md +13 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/config_resolved.yaml +149 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/diagnostics_full/proxy_diagnostics.json +16 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/metrics.json +230 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/summary.json +557 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.json +15 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.md +13 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/config_resolved.yaml +149 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/diagnostics_full/proxy_diagnostics.json +16 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/metrics.json +230 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/summary.json +557 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.json +15 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.md +13 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/config_resolved.yaml +149 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/diagnostics_full/proxy_diagnostics.json +16 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/metrics.json +230 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/summary.json +557 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.json +15 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.md +13 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.json +15 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.md +13 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.json +15 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.md +13 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/config_resolved.yaml +147 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/diagnostics_full/proxy_diagnostics.json +16 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/metrics.json +230 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/summary.json +14 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.json +15 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.md +13 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_planner/reveal_benchmark.json +15 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_planner/reveal_benchmark.md +13 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_role_symmetry/reveal_benchmark.json +15 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_role_symmetry/reveal_benchmark.md +13 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/config_resolved.yaml +147 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/diagnostics_full/proxy_diagnostics.json +16 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/metrics.json +230 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/summary.json +14 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.json +15 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.md +13 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_planner/reveal_benchmark.json +15 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_planner/reveal_benchmark.md +13 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_role_symmetry/reveal_benchmark.json +15 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_role_symmetry/reveal_benchmark.md +13 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/config_resolved.yaml +147 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/diagnostics_full/proxy_diagnostics.json +16 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/metrics.json +230 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/summary.json +14 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.json +15 -0
- artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.md +13 -0
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"full": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.375,
|
| 5 |
+
"bag_proxy": 0.4583333333333333,
|
| 6 |
+
"cloth_proxy": 0.5833333333333334
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.47222222222222215,
|
| 9 |
+
"visibility_integral": 37.36026926173104,
|
| 10 |
+
"corridor_availability": 0.8730104863643646,
|
| 11 |
+
"reocclusion_rate": 0.04405864197530864,
|
| 12 |
+
"persistence_horizon_mae": 1.033145775666108,
|
| 13 |
+
"disturbance_cost": 0.3228136783000082
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## full
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.472
|
| 6 |
+
- visibility_integral: 37.360
|
| 7 |
+
- corridor_availability: 0.873
|
| 8 |
+
- reocclusion_rate: 0.044
|
| 9 |
+
- persistence_horizon_mae: 1.033
|
| 10 |
+
- disturbance_cost: 0.323
|
| 11 |
+
- foliage_proxy_success: 0.375
|
| 12 |
+
- bag_proxy_success: 0.458
|
| 13 |
+
- cloth_proxy_success: 0.583
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/config_resolved.yaml
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_stage1_clip_seed7
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 7
|
| 5 |
+
init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
|
| 6 |
+
init_strict: false
|
| 7 |
+
data:
|
| 8 |
+
proxies:
|
| 9 |
+
- foliage_proxy
|
| 10 |
+
- bag_proxy
|
| 11 |
+
- cloth_proxy
|
| 12 |
+
resolution: 224
|
| 13 |
+
dataset_version: reveal_proxy_v6_rgbd_elastic_state
|
| 14 |
+
train_episodes_per_proxy: 48
|
| 15 |
+
val_episodes_per_proxy: 16
|
| 16 |
+
train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage1_seed7.pt
|
| 17 |
+
val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage1_seed7.pt
|
| 18 |
+
rebuild_dataset: false
|
| 19 |
+
chunk_horizon: 8
|
| 20 |
+
rollout_horizon: 5
|
| 21 |
+
history_steps: 6
|
| 22 |
+
planner_candidates: 8
|
| 23 |
+
seed: 7
|
| 24 |
+
optim:
|
| 25 |
+
epochs: 4
|
| 26 |
+
batch_size: 2
|
| 27 |
+
num_workers: 4
|
| 28 |
+
lr: 0.0003
|
| 29 |
+
weight_decay: 0.0001
|
| 30 |
+
trainer:
|
| 31 |
+
policy_type: elastic_reveal
|
| 32 |
+
use_bf16: true
|
| 33 |
+
grad_clip_norm: 1.0
|
| 34 |
+
freeze_backbone: true
|
| 35 |
+
gradient_checkpointing: false
|
| 36 |
+
plan_during_train: true
|
| 37 |
+
plan_during_eval: true
|
| 38 |
+
support_mode_conditioning: true
|
| 39 |
+
planner_mode: trainable
|
| 40 |
+
use_depth: false
|
| 41 |
+
use_world_model: true
|
| 42 |
+
use_role_tokens: true
|
| 43 |
+
compute_equivariance_probe: true
|
| 44 |
+
policy:
|
| 45 |
+
backbone:
|
| 46 |
+
model_name: openai/clip-vit-base-patch32
|
| 47 |
+
hidden_dim: 512
|
| 48 |
+
max_text_tokens: 32
|
| 49 |
+
freeze_backbone: true
|
| 50 |
+
gradient_checkpointing: false
|
| 51 |
+
use_dummy_backbone: false
|
| 52 |
+
fusion:
|
| 53 |
+
hidden_dim: 512
|
| 54 |
+
num_cameras: 3
|
| 55 |
+
num_layers: 4
|
| 56 |
+
num_heads: 8
|
| 57 |
+
ff_dim: 2048
|
| 58 |
+
dropout: 0.1
|
| 59 |
+
proprio_dim: 32
|
| 60 |
+
proprio_tokens: 1
|
| 61 |
+
memory:
|
| 62 |
+
hidden_dim: 512
|
| 63 |
+
action_dim: 14
|
| 64 |
+
history_steps: 6
|
| 65 |
+
scene_history_steps: 3
|
| 66 |
+
belief_history_steps: 8
|
| 67 |
+
num_layers: 2
|
| 68 |
+
dropout: 0.1
|
| 69 |
+
memory_bank_size: 4
|
| 70 |
+
scene_bank_size: 2
|
| 71 |
+
belief_bank_size: 2
|
| 72 |
+
num_heads: 8
|
| 73 |
+
max_history_steps: 8
|
| 74 |
+
decoder:
|
| 75 |
+
hidden_dim: 512
|
| 76 |
+
num_heads: 8
|
| 77 |
+
num_layers: 4
|
| 78 |
+
ff_dim: 2048
|
| 79 |
+
dropout: 0.1
|
| 80 |
+
chunk_size: 8
|
| 81 |
+
action_dim: 14
|
| 82 |
+
arm_action_dim: 7
|
| 83 |
+
num_candidates: 8
|
| 84 |
+
num_phases: 5
|
| 85 |
+
num_arm_roles: 4
|
| 86 |
+
num_proposal_modes: 6
|
| 87 |
+
planner_top_k: 4
|
| 88 |
+
reveal_head:
|
| 89 |
+
hidden_dim: 512
|
| 90 |
+
num_support_modes: 3
|
| 91 |
+
num_approach_templates: 32
|
| 92 |
+
rollout_horizon: 5
|
| 93 |
+
belief_map_size: 32
|
| 94 |
+
field_size: 16
|
| 95 |
+
num_heads: 8
|
| 96 |
+
predict_belief_map: true
|
| 97 |
+
num_phases: 5
|
| 98 |
+
num_arm_roles: 4
|
| 99 |
+
num_interaction_tokens: 8
|
| 100 |
+
world_model:
|
| 101 |
+
hidden_dim: 512
|
| 102 |
+
action_dim: 14
|
| 103 |
+
num_support_modes: 3
|
| 104 |
+
num_approach_templates: 32
|
| 105 |
+
rollout_horizon: 5
|
| 106 |
+
field_size: 16
|
| 107 |
+
num_heads: 8
|
| 108 |
+
num_phases: 5
|
| 109 |
+
num_arm_roles: 4
|
| 110 |
+
num_interaction_tokens: 8
|
| 111 |
+
belief_map_size: 32
|
| 112 |
+
predict_belief_map: true
|
| 113 |
+
scene_bank_size: 2
|
| 114 |
+
belief_bank_size: 2
|
| 115 |
+
planner:
|
| 116 |
+
hidden_dim: 512
|
| 117 |
+
num_candidates: 8
|
| 118 |
+
action_dim: 14
|
| 119 |
+
num_support_modes: 3
|
| 120 |
+
utility_margin: 0.1
|
| 121 |
+
num_heads: 8
|
| 122 |
+
num_layers: 2
|
| 123 |
+
num_phases: 5
|
| 124 |
+
num_arm_roles: 4
|
| 125 |
+
top_k: 4
|
| 126 |
+
loss_weights:
|
| 127 |
+
action: 1.0
|
| 128 |
+
phase: 0.1
|
| 129 |
+
arm_role: 0.15
|
| 130 |
+
support_mode: 0.1
|
| 131 |
+
corridor: 0.15
|
| 132 |
+
persistence: 0.05
|
| 133 |
+
disturbance: 0.05
|
| 134 |
+
world_model: 0.2
|
| 135 |
+
belief: 0.05
|
| 136 |
+
visibility: 0.05
|
| 137 |
+
clearance: 0.05
|
| 138 |
+
support_stability: 0.05
|
| 139 |
+
reocclusion: 0.05
|
| 140 |
+
occluder_contact: 0.05
|
| 141 |
+
grasp_affordance: 0.05
|
| 142 |
+
planner_success: 0.25
|
| 143 |
+
planner_risk: 0.1
|
| 144 |
+
planner_ranking: 0.2
|
| 145 |
+
proposal_reconstruction: 0.1
|
| 146 |
+
proposal_success: 0.15
|
| 147 |
+
proposal_ranking: 0.2
|
| 148 |
+
proposal_diversity: 0.05
|
| 149 |
+
role_swap_consistency: 0.05
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/diagnostics_full/proxy_diagnostics.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"planner_top1_accuracy": 0.25396825396825395,
|
| 3 |
+
"planner_regret": 0.024764427915215492,
|
| 4 |
+
"planner_score_utility_spearman": 0.1904761791229248,
|
| 5 |
+
"risk_calibration_mse": 0.010364258661866188,
|
| 6 |
+
"role_collapse_rate": 0.0,
|
| 7 |
+
"proposal_diversity": 0.022177213802933693,
|
| 8 |
+
"left_right_equivariance_error": 0.0002942846322184778,
|
| 9 |
+
"belief_calibration_brier": 0.003581121563911438,
|
| 10 |
+
"reocclusion_calibration_brier": 0.23373088240623474,
|
| 11 |
+
"support_stability_mae": 0.022998232394456863,
|
| 12 |
+
"clearance_auc": 0.8989269585276155,
|
| 13 |
+
"memory_write_rate": 0.0,
|
| 14 |
+
"memory_saturation": 0.41934600472450256,
|
| 15 |
+
"num_samples": 126
|
| 16 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/metrics.json
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.025519870977400175,
|
| 6 |
+
"arm_role": 0.03451829462151253,
|
| 7 |
+
"belief": 0.11532339149432656,
|
| 8 |
+
"clearance": 0.09198410963122758,
|
| 9 |
+
"corridor": 0.27232400180664673,
|
| 10 |
+
"disturbance": 0.005858588227789626,
|
| 11 |
+
"grasp_affordance": 0.018751464233153464,
|
| 12 |
+
"occluder_contact": 0.21359099159065967,
|
| 13 |
+
"persistence": 5.231568055785678,
|
| 14 |
+
"phase": 0.7372311896678665,
|
| 15 |
+
"planner_ranking": 0.1646315749647481,
|
| 16 |
+
"planner_risk": 0.014348083711473067,
|
| 17 |
+
"planner_success": 0.6091769787029446,
|
| 18 |
+
"proposal_diversity": 0.0,
|
| 19 |
+
"proposal_ranking": 1.253575401780493,
|
| 20 |
+
"proposal_reconstruction": 0.067724266230904,
|
| 21 |
+
"proposal_success": 0.6851897648491785,
|
| 22 |
+
"reocclusion": 0.7031442959895309,
|
| 23 |
+
"role_swap_consistency": 0.00044027801038677857,
|
| 24 |
+
"support_mode": 0.7282283443430956,
|
| 25 |
+
"support_stability": 0.15459337279551627,
|
| 26 |
+
"total": 1.6319934494832424,
|
| 27 |
+
"uncertainty": 0.013496716971069097,
|
| 28 |
+
"visibility": 0.11563199924314833,
|
| 29 |
+
"world_model": 2.671503098223222
|
| 30 |
+
},
|
| 31 |
+
"val": {
|
| 32 |
+
"action": 0.020692157455616526,
|
| 33 |
+
"arm_role": 9.546122843554865e-05,
|
| 34 |
+
"belief": 0.09874132736807778,
|
| 35 |
+
"clearance": 0.08244451738539196,
|
| 36 |
+
"corridor": 0.2306106292775699,
|
| 37 |
+
"disturbance": 0.006118982125097694,
|
| 38 |
+
"grasp_affordance": 0.009981726739732992,
|
| 39 |
+
"occluder_contact": 0.19720953915800368,
|
| 40 |
+
"persistence": 3.8672617465730696,
|
| 41 |
+
"phase": 0.668701058815396,
|
| 42 |
+
"planner_ranking": 0.03794538755975072,
|
| 43 |
+
"planner_risk": 0.009814016923349026,
|
| 44 |
+
"planner_success": 0.5628143776030767,
|
| 45 |
+
"proposal_diversity": 0.0,
|
| 46 |
+
"proposal_ranking": 1.1249213124078417,
|
| 47 |
+
"proposal_reconstruction": 0.06329423224642164,
|
| 48 |
+
"proposal_success": 0.6747160203873165,
|
| 49 |
+
"reocclusion": 0.692203164100647,
|
| 50 |
+
"role_swap_consistency": 0.0,
|
| 51 |
+
"support_mode": 0.6680677216204386,
|
| 52 |
+
"support_stability": 0.1511912994411966,
|
| 53 |
+
"total": 1.358805573175824,
|
| 54 |
+
"uncertainty": 0.003482046378185115,
|
| 55 |
+
"visibility": 0.10417925601913816,
|
| 56 |
+
"world_model": 2.1376701915074907
|
| 57 |
+
}
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train": {
|
| 62 |
+
"action": 0.02150821143575988,
|
| 63 |
+
"arm_role": 1.9482293054071397e-05,
|
| 64 |
+
"belief": 0.09863162136280725,
|
| 65 |
+
"clearance": 0.08064276829400924,
|
| 66 |
+
"corridor": 0.24359133383210416,
|
| 67 |
+
"disturbance": 0.002735878452234476,
|
| 68 |
+
"grasp_affordance": 0.009349104797184779,
|
| 69 |
+
"occluder_contact": 0.1937003313558888,
|
| 70 |
+
"persistence": 4.076787073262699,
|
| 71 |
+
"phase": 0.6966290698625655,
|
| 72 |
+
"planner_ranking": 0.04271617977273956,
|
| 73 |
+
"planner_risk": 0.010049402082938681,
|
| 74 |
+
"planner_success": 0.5399472568359674,
|
| 75 |
+
"proposal_diversity": 0.0,
|
| 76 |
+
"proposal_ranking": 1.1569982820156357,
|
| 77 |
+
"proposal_reconstruction": 0.06389496966962414,
|
| 78 |
+
"proposal_success": 0.6711133328407847,
|
| 79 |
+
"reocclusion": 0.6940537130957498,
|
| 80 |
+
"role_swap_consistency": 0.00022550253765151655,
|
| 81 |
+
"support_mode": 0.6837139029777487,
|
| 82 |
+
"support_stability": 0.14029162690160474,
|
| 83 |
+
"total": 1.3837347957476271,
|
| 84 |
+
"uncertainty": 0.0016494125736687157,
|
| 85 |
+
"visibility": 0.09400421737922424,
|
| 86 |
+
"world_model": 2.175609592991974
|
| 87 |
+
},
|
| 88 |
+
"val": {
|
| 89 |
+
"action": 0.020051477757829523,
|
| 90 |
+
"arm_role": 2.626385377793451e-06,
|
| 91 |
+
"belief": 0.09183884199176516,
|
| 92 |
+
"clearance": 0.07657587877105153,
|
| 93 |
+
"corridor": 0.22728621321065084,
|
| 94 |
+
"disturbance": 0.0016498260886850951,
|
| 95 |
+
"grasp_affordance": 0.009590831518705403,
|
| 96 |
+
"occluder_contact": 0.1917984854607355,
|
| 97 |
+
"persistence": 3.699212070495363,
|
| 98 |
+
"phase": 0.6689459842348856,
|
| 99 |
+
"planner_ranking": 0.03331218510795715,
|
| 100 |
+
"planner_risk": 0.010092773325076061,
|
| 101 |
+
"planner_success": 0.5014436940352122,
|
| 102 |
+
"proposal_diversity": 0.0,
|
| 103 |
+
"proposal_ranking": 1.1606994933552213,
|
| 104 |
+
"proposal_reconstruction": 0.062439400820978104,
|
| 105 |
+
"proposal_success": 0.675733851061927,
|
| 106 |
+
"reocclusion": 0.6921006942552234,
|
| 107 |
+
"role_swap_consistency": 0.0,
|
| 108 |
+
"support_mode": 0.6564426545112853,
|
| 109 |
+
"support_stability": 0.14099458102432508,
|
| 110 |
+
"total": 1.313369631767273,
|
| 111 |
+
"uncertainty": 0.0024020517326240973,
|
| 112 |
+
"visibility": 0.08723713226971172,
|
| 113 |
+
"world_model": 2.0216772158940635
|
| 114 |
+
}
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 2,
|
| 118 |
+
"train": {
|
| 119 |
+
"action": 0.018980447901412845,
|
| 120 |
+
"arm_role": 2.3090714559505124e-05,
|
| 121 |
+
"belief": 0.1100015923263827,
|
| 122 |
+
"clearance": 0.0791148773262872,
|
| 123 |
+
"corridor": 0.23030528037001852,
|
| 124 |
+
"disturbance": 0.002447301701405857,
|
| 125 |
+
"grasp_affordance": 0.009001106255400087,
|
| 126 |
+
"occluder_contact": 0.21010415864552504,
|
| 127 |
+
"persistence": 2.0494745795430753,
|
| 128 |
+
"phase": 0.459073231482381,
|
| 129 |
+
"planner_ranking": 0.036845811475892686,
|
| 130 |
+
"planner_risk": 0.011261017404920885,
|
| 131 |
+
"planner_success": 0.5133467099741491,
|
| 132 |
+
"proposal_diversity": 0.0,
|
| 133 |
+
"proposal_ranking": 1.1499755538570944,
|
| 134 |
+
"proposal_reconstruction": 0.062038555780318395,
|
| 135 |
+
"proposal_success": 0.6672172468370168,
|
| 136 |
+
"reocclusion": 0.41151915600825667,
|
| 137 |
+
"role_swap_consistency": 0.0007739521978125561,
|
| 138 |
+
"support_mode": 0.38595684411013936,
|
| 139 |
+
"support_stability": 0.1425538511912665,
|
| 140 |
+
"total": 1.1811942648513154,
|
| 141 |
+
"uncertainty": 0.000767841034371724,
|
| 142 |
+
"visibility": 0.10209987125315591,
|
| 143 |
+
"world_model": 2.070929214904446
|
| 144 |
+
},
|
| 145 |
+
"val": {
|
| 146 |
+
"action": 0.0138629823627453,
|
| 147 |
+
"arm_role": 0.002011558223822855,
|
| 148 |
+
"belief": 0.10340341582657799,
|
| 149 |
+
"clearance": 0.0855481999497565,
|
| 150 |
+
"corridor": 0.2235906974427284,
|
| 151 |
+
"disturbance": 0.0011637268657111797,
|
| 152 |
+
"grasp_affordance": 0.010592727485807642,
|
| 153 |
+
"occluder_contact": 0.20843842601965343,
|
| 154 |
+
"persistence": 1.1762515253254346,
|
| 155 |
+
"phase": 0.3442955078771486,
|
| 156 |
+
"planner_ranking": 0.03461442932137519,
|
| 157 |
+
"planner_risk": 0.01165175854065825,
|
| 158 |
+
"planner_success": 0.45808544967855724,
|
| 159 |
+
"proposal_diversity": 0.0,
|
| 160 |
+
"proposal_ranking": 1.3026971003365895,
|
| 161 |
+
"proposal_reconstruction": 0.05888378312663427,
|
| 162 |
+
"proposal_success": 0.7430036550476438,
|
| 163 |
+
"reocclusion": 0.2871374910076459,
|
| 164 |
+
"role_swap_consistency": 0.0,
|
| 165 |
+
"support_mode": 0.22473623181900215,
|
| 166 |
+
"support_stability": 0.1320991822414928,
|
| 167 |
+
"total": 1.1099917330439129,
|
| 168 |
+
"uncertainty": 0.0005805234163528352,
|
| 169 |
+
"visibility": 0.09557991185122067,
|
| 170 |
+
"world_model": 1.9994045325687952
|
| 171 |
+
}
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 3,
|
| 175 |
+
"train": {
|
| 176 |
+
"action": 0.014569098466314883,
|
| 177 |
+
"arm_role": 4.4951576212937916e-05,
|
| 178 |
+
"belief": 0.09620984569582015,
|
| 179 |
+
"clearance": 0.07538617284315106,
|
| 180 |
+
"corridor": 0.21248489566188775,
|
| 181 |
+
"disturbance": 0.0016758848629270635,
|
| 182 |
+
"grasp_affordance": 0.008272631588777167,
|
| 183 |
+
"occluder_contact": 0.19746327033529731,
|
| 184 |
+
"persistence": 1.1089699098374644,
|
| 185 |
+
"phase": 0.3716845961765469,
|
| 186 |
+
"planner_ranking": 0.03254403228879829,
|
| 187 |
+
"planner_risk": 0.010248634800575772,
|
| 188 |
+
"planner_success": 0.47941413580279074,
|
| 189 |
+
"proposal_diversity": 0.0,
|
| 190 |
+
"proposal_ranking": 1.153262345578658,
|
| 191 |
+
"proposal_reconstruction": 0.05860933205064055,
|
| 192 |
+
"proposal_success": 0.6466394141706496,
|
| 193 |
+
"reocclusion": 0.2566672772173989,
|
| 194 |
+
"role_swap_consistency": 0.0010398222479868085,
|
| 195 |
+
"support_mode": 0.21815690070546734,
|
| 196 |
+
"support_stability": 0.13650912478449145,
|
| 197 |
+
"total": 1.0633102330861914,
|
| 198 |
+
"uncertainty": 0.0002461711761398012,
|
| 199 |
+
"visibility": 0.09588275449984361,
|
| 200 |
+
"world_model": 1.9903733518111144
|
| 201 |
+
},
|
| 202 |
+
"val": {
|
| 203 |
+
"action": 0.01619998768474611,
|
| 204 |
+
"arm_role": 3.844006559777174e-06,
|
| 205 |
+
"belief": 0.09427393618084136,
|
| 206 |
+
"clearance": 0.07296533326780985,
|
| 207 |
+
"corridor": 0.2100035525148823,
|
| 208 |
+
"disturbance": 0.0013519242122204862,
|
| 209 |
+
"grasp_affordance": 0.007646961093303703,
|
| 210 |
+
"occluder_contact": 0.1950870676646157,
|
| 211 |
+
"persistence": 1.3894045449024628,
|
| 212 |
+
"phase": 0.6804814789192899,
|
| 213 |
+
"planner_ranking": 0.027768202883649677,
|
| 214 |
+
"planner_risk": 0.010219628483081044,
|
| 215 |
+
"planner_success": 0.4819766197885786,
|
| 216 |
+
"proposal_diversity": 0.0,
|
| 217 |
+
"proposal_ranking": 1.1241777983922807,
|
| 218 |
+
"proposal_reconstruction": 0.060782825840370994,
|
| 219 |
+
"proposal_success": 0.6369421221907177,
|
| 220 |
+
"reocclusion": 0.27461627113913734,
|
| 221 |
+
"role_swap_consistency": 0.0,
|
| 222 |
+
"support_mode": 0.08716485598531093,
|
| 223 |
+
"support_stability": 0.13245442648610425,
|
| 224 |
+
"total": 1.0629130696493483,
|
| 225 |
+
"uncertainty": 8.45672577761145e-05,
|
| 226 |
+
"visibility": 0.1013997554306,
|
| 227 |
+
"world_model": 1.8573077273747278
|
| 228 |
+
}
|
| 229 |
+
}
|
| 230 |
+
]
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/summary.json
ADDED
|
@@ -0,0 +1,557 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"experiment_name": "proxy_interaction_r3d_stage1_clip_seed7",
|
| 3 |
+
"device": "cuda",
|
| 4 |
+
"best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/checkpoint_best.pt",
|
| 5 |
+
"final_train_total": 1.0633102330861914,
|
| 6 |
+
"final_val_total": 1.0629130696493483,
|
| 7 |
+
"train_time_sec": 174.85308933258057,
|
| 8 |
+
"peak_gpu_memory_mb": 1919.8251953125,
|
| 9 |
+
"num_train_samples": 382,
|
| 10 |
+
"num_val_samples": 126,
|
| 11 |
+
"planner_mode": "trainable",
|
| 12 |
+
"frozen_modules": [],
|
| 13 |
+
"init_info": {
|
| 14 |
+
"path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
|
| 15 |
+
"loaded_keys": 461,
|
| 16 |
+
"skipped_shape_mismatch_keys": [
|
| 17 |
+
"memory.gru.weight_ih_l0",
|
| 18 |
+
"memory.gru.weight_hh_l0",
|
| 19 |
+
"memory.gru.bias_ih_l0",
|
| 20 |
+
"memory.gru.bias_hh_l0",
|
| 21 |
+
"memory.token_proj.0.weight",
|
| 22 |
+
"memory.token_proj.0.bias",
|
| 23 |
+
"memory.token_proj.1.weight",
|
| 24 |
+
"memory.token_proj.1.bias",
|
| 25 |
+
"decoder.actor_role_bias",
|
| 26 |
+
"decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
|
| 27 |
+
"decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
|
| 28 |
+
"decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
|
| 29 |
+
"decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
|
| 30 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
|
| 31 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
|
| 32 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
|
| 33 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
|
| 34 |
+
"decoder.revealer_decoder.layers.0.linear1.weight",
|
| 35 |
+
"decoder.revealer_decoder.layers.0.linear1.bias",
|
| 36 |
+
"decoder.revealer_decoder.layers.0.linear2.weight",
|
| 37 |
+
"decoder.revealer_decoder.layers.0.linear2.bias",
|
| 38 |
+
"decoder.revealer_decoder.layers.0.norm1.weight",
|
| 39 |
+
"decoder.revealer_decoder.layers.0.norm1.bias",
|
| 40 |
+
"decoder.revealer_decoder.layers.0.norm2.weight",
|
| 41 |
+
"decoder.revealer_decoder.layers.0.norm2.bias",
|
| 42 |
+
"decoder.revealer_decoder.layers.0.norm3.weight",
|
| 43 |
+
"decoder.revealer_decoder.layers.0.norm3.bias",
|
| 44 |
+
"decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
|
| 45 |
+
"decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
|
| 46 |
+
"decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
|
| 47 |
+
"decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
|
| 48 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
|
| 49 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
|
| 50 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
|
| 51 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
|
| 52 |
+
"decoder.revealer_decoder.layers.1.linear1.weight",
|
| 53 |
+
"decoder.revealer_decoder.layers.1.linear1.bias",
|
| 54 |
+
"decoder.revealer_decoder.layers.1.linear2.weight",
|
| 55 |
+
"decoder.revealer_decoder.layers.1.linear2.bias",
|
| 56 |
+
"decoder.revealer_decoder.layers.1.norm1.weight",
|
| 57 |
+
"decoder.revealer_decoder.layers.1.norm1.bias",
|
| 58 |
+
"decoder.revealer_decoder.layers.1.norm2.weight",
|
| 59 |
+
"decoder.revealer_decoder.layers.1.norm2.bias",
|
| 60 |
+
"decoder.revealer_decoder.layers.1.norm3.weight",
|
| 61 |
+
"decoder.revealer_decoder.layers.1.norm3.bias",
|
| 62 |
+
"decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
|
| 63 |
+
"decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
|
| 64 |
+
"decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
|
| 65 |
+
"decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
|
| 66 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
|
| 67 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
|
| 68 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
|
| 69 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
|
| 70 |
+
"decoder.revealer_decoder.layers.2.linear1.weight",
|
| 71 |
+
"decoder.revealer_decoder.layers.2.linear1.bias",
|
| 72 |
+
"decoder.revealer_decoder.layers.2.linear2.weight",
|
| 73 |
+
"decoder.revealer_decoder.layers.2.linear2.bias",
|
| 74 |
+
"decoder.revealer_decoder.layers.2.norm1.weight",
|
| 75 |
+
"decoder.revealer_decoder.layers.2.norm1.bias",
|
| 76 |
+
"decoder.revealer_decoder.layers.2.norm2.weight",
|
| 77 |
+
"decoder.revealer_decoder.layers.2.norm2.bias",
|
| 78 |
+
"decoder.revealer_decoder.layers.2.norm3.weight",
|
| 79 |
+
"decoder.revealer_decoder.layers.2.norm3.bias",
|
| 80 |
+
"decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
|
| 81 |
+
"decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
|
| 82 |
+
"decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
|
| 83 |
+
"decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
|
| 84 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
|
| 85 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
|
| 86 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
|
| 87 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
|
| 88 |
+
"decoder.revealer_decoder.layers.3.linear1.weight",
|
| 89 |
+
"decoder.revealer_decoder.layers.3.linear1.bias",
|
| 90 |
+
"decoder.revealer_decoder.layers.3.linear2.weight",
|
| 91 |
+
"decoder.revealer_decoder.layers.3.linear2.bias",
|
| 92 |
+
"decoder.revealer_decoder.layers.3.norm1.weight",
|
| 93 |
+
"decoder.revealer_decoder.layers.3.norm1.bias",
|
| 94 |
+
"decoder.revealer_decoder.layers.3.norm2.weight",
|
| 95 |
+
"decoder.revealer_decoder.layers.3.norm2.bias",
|
| 96 |
+
"decoder.revealer_decoder.layers.3.norm3.weight",
|
| 97 |
+
"decoder.revealer_decoder.layers.3.norm3.bias",
|
| 98 |
+
"decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
|
| 99 |
+
"decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
|
| 100 |
+
"decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
|
| 101 |
+
"decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
|
| 102 |
+
"decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
|
| 103 |
+
"decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
|
| 104 |
+
"decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
|
| 105 |
+
"decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
|
| 106 |
+
"decoder.actor_decoder.layers.0.linear1.weight",
|
| 107 |
+
"decoder.actor_decoder.layers.0.linear1.bias",
|
| 108 |
+
"decoder.actor_decoder.layers.0.linear2.weight",
|
| 109 |
+
"decoder.actor_decoder.layers.0.linear2.bias",
|
| 110 |
+
"decoder.actor_decoder.layers.0.norm1.weight",
|
| 111 |
+
"decoder.actor_decoder.layers.0.norm1.bias",
|
| 112 |
+
"decoder.actor_decoder.layers.0.norm2.weight",
|
| 113 |
+
"decoder.actor_decoder.layers.0.norm2.bias",
|
| 114 |
+
"decoder.actor_decoder.layers.0.norm3.weight",
|
| 115 |
+
"decoder.actor_decoder.layers.0.norm3.bias",
|
| 116 |
+
"decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
|
| 117 |
+
"decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
|
| 118 |
+
"decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
|
| 119 |
+
"decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
|
| 120 |
+
"decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
|
| 121 |
+
"decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
|
| 122 |
+
"decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
|
| 123 |
+
"decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
|
| 124 |
+
"decoder.actor_decoder.layers.1.linear1.weight",
|
| 125 |
+
"decoder.actor_decoder.layers.1.linear1.bias",
|
| 126 |
+
"decoder.actor_decoder.layers.1.linear2.weight",
|
| 127 |
+
"decoder.actor_decoder.layers.1.linear2.bias",
|
| 128 |
+
"decoder.actor_decoder.layers.1.norm1.weight",
|
| 129 |
+
"decoder.actor_decoder.layers.1.norm1.bias",
|
| 130 |
+
"decoder.actor_decoder.layers.1.norm2.weight",
|
| 131 |
+
"decoder.actor_decoder.layers.1.norm2.bias",
|
| 132 |
+
"decoder.actor_decoder.layers.1.norm3.weight",
|
| 133 |
+
"decoder.actor_decoder.layers.1.norm3.bias",
|
| 134 |
+
"decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
|
| 135 |
+
"decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
|
| 136 |
+
"decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
|
| 137 |
+
"decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
|
| 138 |
+
"decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
|
| 139 |
+
"decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
|
| 140 |
+
"decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
|
| 141 |
+
"decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
|
| 142 |
+
"decoder.actor_decoder.layers.2.linear1.weight",
|
| 143 |
+
"decoder.actor_decoder.layers.2.linear1.bias",
|
| 144 |
+
"decoder.actor_decoder.layers.2.linear2.weight",
|
| 145 |
+
"decoder.actor_decoder.layers.2.linear2.bias",
|
| 146 |
+
"decoder.actor_decoder.layers.2.norm1.weight",
|
| 147 |
+
"decoder.actor_decoder.layers.2.norm1.bias",
|
| 148 |
+
"decoder.actor_decoder.layers.2.norm2.weight",
|
| 149 |
+
"decoder.actor_decoder.layers.2.norm2.bias",
|
| 150 |
+
"decoder.actor_decoder.layers.2.norm3.weight",
|
| 151 |
+
"decoder.actor_decoder.layers.2.norm3.bias",
|
| 152 |
+
"decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
|
| 153 |
+
"decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
|
| 154 |
+
"decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
|
| 155 |
+
"decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
|
| 156 |
+
"decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
|
| 157 |
+
"decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
|
| 158 |
+
"decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
|
| 159 |
+
"decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
|
| 160 |
+
"decoder.actor_decoder.layers.3.linear1.weight",
|
| 161 |
+
"decoder.actor_decoder.layers.3.linear1.bias",
|
| 162 |
+
"decoder.actor_decoder.layers.3.linear2.weight",
|
| 163 |
+
"decoder.actor_decoder.layers.3.linear2.bias",
|
| 164 |
+
"decoder.actor_decoder.layers.3.norm1.weight",
|
| 165 |
+
"decoder.actor_decoder.layers.3.norm1.bias",
|
| 166 |
+
"decoder.actor_decoder.layers.3.norm2.weight",
|
| 167 |
+
"decoder.actor_decoder.layers.3.norm2.bias",
|
| 168 |
+
"decoder.actor_decoder.layers.3.norm3.weight",
|
| 169 |
+
"decoder.actor_decoder.layers.3.norm3.bias",
|
| 170 |
+
"decoder.revealer_mean.weight",
|
| 171 |
+
"decoder.revealer_mean.bias",
|
| 172 |
+
"decoder.revealer_log_std.weight",
|
| 173 |
+
"decoder.revealer_log_std.bias",
|
| 174 |
+
"decoder.actor_mean.weight",
|
| 175 |
+
"decoder.actor_mean.bias",
|
| 176 |
+
"decoder.actor_log_std.weight",
|
| 177 |
+
"decoder.actor_log_std.bias",
|
| 178 |
+
"decoder.proposal_score.0.weight",
|
| 179 |
+
"decoder.proposal_score.0.bias",
|
| 180 |
+
"decoder.proposal_score.1.weight",
|
| 181 |
+
"decoder.proposal_score.1.bias"
|
| 182 |
+
],
|
| 183 |
+
"missing_keys": [
|
| 184 |
+
"backbone.depth_adapter.depth_proj.0.weight",
|
| 185 |
+
"backbone.depth_adapter.depth_proj.0.bias",
|
| 186 |
+
"backbone.depth_adapter.depth_proj.1.weight",
|
| 187 |
+
"backbone.depth_adapter.depth_proj.1.bias",
|
| 188 |
+
"backbone.depth_adapter.depth_proj.3.weight",
|
| 189 |
+
"backbone.depth_adapter.depth_proj.3.bias",
|
| 190 |
+
"backbone.depth_adapter.geometry_proj.0.weight",
|
| 191 |
+
"backbone.depth_adapter.geometry_proj.0.bias",
|
| 192 |
+
"backbone.depth_adapter.geometry_proj.1.weight",
|
| 193 |
+
"backbone.depth_adapter.geometry_proj.1.bias",
|
| 194 |
+
"backbone.depth_adapter.camera_proj.0.weight",
|
| 195 |
+
"backbone.depth_adapter.camera_proj.0.bias",
|
| 196 |
+
"backbone.depth_adapter.camera_proj.1.weight",
|
| 197 |
+
"backbone.depth_adapter.camera_proj.1.bias",
|
| 198 |
+
"fusion.geometry_fusion.attn.in_proj_weight",
|
| 199 |
+
"fusion.geometry_fusion.attn.in_proj_bias",
|
| 200 |
+
"fusion.geometry_fusion.attn.out_proj.weight",
|
| 201 |
+
"fusion.geometry_fusion.attn.out_proj.bias",
|
| 202 |
+
"fusion.geometry_fusion.gate.0.weight",
|
| 203 |
+
"fusion.geometry_fusion.gate.0.bias",
|
| 204 |
+
"fusion.geometry_fusion.gate.1.weight",
|
| 205 |
+
"fusion.geometry_fusion.gate.1.bias",
|
| 206 |
+
"fusion.geometry_fusion.gate.3.weight",
|
| 207 |
+
"fusion.geometry_fusion.gate.3.bias",
|
| 208 |
+
"fusion.geometry_fusion.out.0.weight",
|
| 209 |
+
"fusion.geometry_fusion.out.0.bias",
|
| 210 |
+
"fusion.geometry_fusion.out.1.weight",
|
| 211 |
+
"fusion.geometry_fusion.out.1.bias",
|
| 212 |
+
"memory.scene_memory.position_embedding",
|
| 213 |
+
"memory.scene_memory.bank_queries",
|
| 214 |
+
"memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
|
| 215 |
+
"memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
|
| 216 |
+
"memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
|
| 217 |
+
"memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
|
| 218 |
+
"memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
|
| 219 |
+
"memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
|
| 220 |
+
"memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
|
| 221 |
+
"memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
|
| 222 |
+
"memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
|
| 223 |
+
"memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
|
| 224 |
+
"memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
|
| 225 |
+
"memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
|
| 226 |
+
"memory.scene_memory.bank_attention.in_proj_weight",
|
| 227 |
+
"memory.scene_memory.bank_attention.in_proj_bias",
|
| 228 |
+
"memory.scene_memory.bank_attention.out_proj.weight",
|
| 229 |
+
"memory.scene_memory.bank_attention.out_proj.bias",
|
| 230 |
+
"memory.scene_memory.action_proj.0.weight",
|
| 231 |
+
"memory.scene_memory.action_proj.0.bias",
|
| 232 |
+
"memory.scene_memory.action_proj.1.weight",
|
| 233 |
+
"memory.scene_memory.action_proj.1.bias",
|
| 234 |
+
"memory.scene_memory.write_gate.0.weight",
|
| 235 |
+
"memory.scene_memory.write_gate.0.bias",
|
| 236 |
+
"memory.scene_memory.write_gate.1.weight",
|
| 237 |
+
"memory.scene_memory.write_gate.1.bias",
|
| 238 |
+
"memory.scene_memory.write_gate.3.weight",
|
| 239 |
+
"memory.scene_memory.write_gate.3.bias",
|
| 240 |
+
"memory.scene_memory.token_proj.0.weight",
|
| 241 |
+
"memory.scene_memory.token_proj.0.bias",
|
| 242 |
+
"memory.scene_memory.token_proj.1.weight",
|
| 243 |
+
"memory.scene_memory.token_proj.1.bias",
|
| 244 |
+
"memory.belief_memory.position_embedding",
|
| 245 |
+
"memory.belief_memory.bank_queries",
|
| 246 |
+
"memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
|
| 247 |
+
"memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
|
| 248 |
+
"memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
|
| 249 |
+
"memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
|
| 250 |
+
"memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
|
| 251 |
+
"memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
|
| 252 |
+
"memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
|
| 253 |
+
"memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
|
| 254 |
+
"memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
|
| 255 |
+
"memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
|
| 256 |
+
"memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
|
| 257 |
+
"memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
|
| 258 |
+
"memory.belief_memory.bank_attention.in_proj_weight",
|
| 259 |
+
"memory.belief_memory.bank_attention.in_proj_bias",
|
| 260 |
+
"memory.belief_memory.bank_attention.out_proj.weight",
|
| 261 |
+
"memory.belief_memory.bank_attention.out_proj.bias",
|
| 262 |
+
"memory.belief_memory.action_proj.0.weight",
|
| 263 |
+
"memory.belief_memory.action_proj.0.bias",
|
| 264 |
+
"memory.belief_memory.action_proj.1.weight",
|
| 265 |
+
"memory.belief_memory.action_proj.1.bias",
|
| 266 |
+
"memory.belief_memory.write_gate.0.weight",
|
| 267 |
+
"memory.belief_memory.write_gate.0.bias",
|
| 268 |
+
"memory.belief_memory.write_gate.1.weight",
|
| 269 |
+
"memory.belief_memory.write_gate.1.bias",
|
| 270 |
+
"memory.belief_memory.write_gate.3.weight",
|
| 271 |
+
"memory.belief_memory.write_gate.3.bias",
|
| 272 |
+
"memory.belief_memory.token_proj.0.weight",
|
| 273 |
+
"memory.belief_memory.token_proj.0.bias",
|
| 274 |
+
"memory.belief_memory.token_proj.1.weight",
|
| 275 |
+
"memory.belief_memory.token_proj.1.bias",
|
| 276 |
+
"decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
|
| 277 |
+
"decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
|
| 278 |
+
"decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
|
| 279 |
+
"decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
|
| 280 |
+
"decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
|
| 281 |
+
"decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
|
| 282 |
+
"decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
|
| 283 |
+
"decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
|
| 284 |
+
"decoder.arm_decoder.layers.0.linear1.weight",
|
| 285 |
+
"decoder.arm_decoder.layers.0.linear1.bias",
|
| 286 |
+
"decoder.arm_decoder.layers.0.linear2.weight",
|
| 287 |
+
"decoder.arm_decoder.layers.0.linear2.bias",
|
| 288 |
+
"decoder.arm_decoder.layers.0.norm1.weight",
|
| 289 |
+
"decoder.arm_decoder.layers.0.norm1.bias",
|
| 290 |
+
"decoder.arm_decoder.layers.0.norm2.weight",
|
| 291 |
+
"decoder.arm_decoder.layers.0.norm2.bias",
|
| 292 |
+
"decoder.arm_decoder.layers.0.norm3.weight",
|
| 293 |
+
"decoder.arm_decoder.layers.0.norm3.bias",
|
| 294 |
+
"decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
|
| 295 |
+
"decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
|
| 296 |
+
"decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
|
| 297 |
+
"decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
|
| 298 |
+
"decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
|
| 299 |
+
"decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
|
| 300 |
+
"decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
|
| 301 |
+
"decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
|
| 302 |
+
"decoder.arm_decoder.layers.1.linear1.weight",
|
| 303 |
+
"decoder.arm_decoder.layers.1.linear1.bias",
|
| 304 |
+
"decoder.arm_decoder.layers.1.linear2.weight",
|
| 305 |
+
"decoder.arm_decoder.layers.1.linear2.bias",
|
| 306 |
+
"decoder.arm_decoder.layers.1.norm1.weight",
|
| 307 |
+
"decoder.arm_decoder.layers.1.norm1.bias",
|
| 308 |
+
"decoder.arm_decoder.layers.1.norm2.weight",
|
| 309 |
+
"decoder.arm_decoder.layers.1.norm2.bias",
|
| 310 |
+
"decoder.arm_decoder.layers.1.norm3.weight",
|
| 311 |
+
"decoder.arm_decoder.layers.1.norm3.bias",
|
| 312 |
+
"decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
|
| 313 |
+
"decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
|
| 314 |
+
"decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
|
| 315 |
+
"decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
|
| 316 |
+
"decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
|
| 317 |
+
"decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
|
| 318 |
+
"decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
|
| 319 |
+
"decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
|
| 320 |
+
"decoder.arm_decoder.layers.2.linear1.weight",
|
| 321 |
+
"decoder.arm_decoder.layers.2.linear1.bias",
|
| 322 |
+
"decoder.arm_decoder.layers.2.linear2.weight",
|
| 323 |
+
"decoder.arm_decoder.layers.2.linear2.bias",
|
| 324 |
+
"decoder.arm_decoder.layers.2.norm1.weight",
|
| 325 |
+
"decoder.arm_decoder.layers.2.norm1.bias",
|
| 326 |
+
"decoder.arm_decoder.layers.2.norm2.weight",
|
| 327 |
+
"decoder.arm_decoder.layers.2.norm2.bias",
|
| 328 |
+
"decoder.arm_decoder.layers.2.norm3.weight",
|
| 329 |
+
"decoder.arm_decoder.layers.2.norm3.bias",
|
| 330 |
+
"decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
|
| 331 |
+
"decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
|
| 332 |
+
"decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
|
| 333 |
+
"decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
|
| 334 |
+
"decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
|
| 335 |
+
"decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
|
| 336 |
+
"decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
|
| 337 |
+
"decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
|
| 338 |
+
"decoder.arm_decoder.layers.3.linear1.weight",
|
| 339 |
+
"decoder.arm_decoder.layers.3.linear1.bias",
|
| 340 |
+
"decoder.arm_decoder.layers.3.linear2.weight",
|
| 341 |
+
"decoder.arm_decoder.layers.3.linear2.bias",
|
| 342 |
+
"decoder.arm_decoder.layers.3.norm1.weight",
|
| 343 |
+
"decoder.arm_decoder.layers.3.norm1.bias",
|
| 344 |
+
"decoder.arm_decoder.layers.3.norm2.weight",
|
| 345 |
+
"decoder.arm_decoder.layers.3.norm2.bias",
|
| 346 |
+
"decoder.arm_decoder.layers.3.norm3.weight",
|
| 347 |
+
"decoder.arm_decoder.layers.3.norm3.bias",
|
| 348 |
+
"decoder.arm_identity.weight",
|
| 349 |
+
"decoder.phase_adapter.weight",
|
| 350 |
+
"decoder.phase_adapter.bias",
|
| 351 |
+
"decoder.role_adapter.weight",
|
| 352 |
+
"decoder.role_adapter.bias",
|
| 353 |
+
"decoder.context_proj.0.weight",
|
| 354 |
+
"decoder.context_proj.0.bias",
|
| 355 |
+
"decoder.context_proj.1.weight",
|
| 356 |
+
"decoder.context_proj.1.bias",
|
| 357 |
+
"decoder.arm_head.0.weight",
|
| 358 |
+
"decoder.arm_head.0.bias",
|
| 359 |
+
"decoder.arm_head.1.weight",
|
| 360 |
+
"decoder.arm_head.1.bias",
|
| 361 |
+
"decoder.arm_mean.weight",
|
| 362 |
+
"decoder.arm_mean.bias",
|
| 363 |
+
"decoder.arm_log_std.weight",
|
| 364 |
+
"decoder.arm_log_std.bias",
|
| 365 |
+
"decoder.proposal_mode_head.0.weight",
|
| 366 |
+
"decoder.proposal_mode_head.0.bias",
|
| 367 |
+
"decoder.proposal_mode_head.1.weight",
|
| 368 |
+
"decoder.proposal_mode_head.1.bias",
|
| 369 |
+
"decoder.proposal_mode_head.3.weight",
|
| 370 |
+
"decoder.proposal_mode_head.3.bias",
|
| 371 |
+
"decoder.proposal_mode_embeddings.weight",
|
| 372 |
+
"decoder.proposal_slot_embeddings.weight",
|
| 373 |
+
"decoder.mode_residual_heads.0.0.weight",
|
| 374 |
+
"decoder.mode_residual_heads.0.0.bias",
|
| 375 |
+
"decoder.mode_residual_heads.0.1.weight",
|
| 376 |
+
"decoder.mode_residual_heads.0.1.bias",
|
| 377 |
+
"decoder.mode_residual_heads.0.3.weight",
|
| 378 |
+
"decoder.mode_residual_heads.0.3.bias",
|
| 379 |
+
"decoder.mode_residual_heads.1.0.weight",
|
| 380 |
+
"decoder.mode_residual_heads.1.0.bias",
|
| 381 |
+
"decoder.mode_residual_heads.1.1.weight",
|
| 382 |
+
"decoder.mode_residual_heads.1.1.bias",
|
| 383 |
+
"decoder.mode_residual_heads.1.3.weight",
|
| 384 |
+
"decoder.mode_residual_heads.1.3.bias",
|
| 385 |
+
"decoder.mode_residual_heads.2.0.weight",
|
| 386 |
+
"decoder.mode_residual_heads.2.0.bias",
|
| 387 |
+
"decoder.mode_residual_heads.2.1.weight",
|
| 388 |
+
"decoder.mode_residual_heads.2.1.bias",
|
| 389 |
+
"decoder.mode_residual_heads.2.3.weight",
|
| 390 |
+
"decoder.mode_residual_heads.2.3.bias",
|
| 391 |
+
"decoder.mode_residual_heads.3.0.weight",
|
| 392 |
+
"decoder.mode_residual_heads.3.0.bias",
|
| 393 |
+
"decoder.mode_residual_heads.3.1.weight",
|
| 394 |
+
"decoder.mode_residual_heads.3.1.bias",
|
| 395 |
+
"decoder.mode_residual_heads.3.3.weight",
|
| 396 |
+
"decoder.mode_residual_heads.3.3.bias",
|
| 397 |
+
"decoder.mode_residual_heads.4.0.weight",
|
| 398 |
+
"decoder.mode_residual_heads.4.0.bias",
|
| 399 |
+
"decoder.mode_residual_heads.4.1.weight",
|
| 400 |
+
"decoder.mode_residual_heads.4.1.bias",
|
| 401 |
+
"decoder.mode_residual_heads.4.3.weight",
|
| 402 |
+
"decoder.mode_residual_heads.4.3.bias",
|
| 403 |
+
"decoder.mode_residual_heads.5.0.weight",
|
| 404 |
+
"decoder.mode_residual_heads.5.0.bias",
|
| 405 |
+
"decoder.mode_residual_heads.5.1.weight",
|
| 406 |
+
"decoder.mode_residual_heads.5.1.bias",
|
| 407 |
+
"decoder.mode_residual_heads.5.3.weight",
|
| 408 |
+
"decoder.mode_residual_heads.5.3.bias",
|
| 409 |
+
"decoder.slot_delta.0.weight",
|
| 410 |
+
"decoder.slot_delta.0.bias",
|
| 411 |
+
"decoder.slot_delta.1.weight",
|
| 412 |
+
"decoder.slot_delta.1.bias",
|
| 413 |
+
"decoder.slot_delta.3.weight",
|
| 414 |
+
"decoder.slot_delta.3.bias",
|
| 415 |
+
"decoder.proposal_score.0.weight",
|
| 416 |
+
"decoder.proposal_score.0.bias",
|
| 417 |
+
"decoder.proposal_score.1.weight",
|
| 418 |
+
"decoder.proposal_score.1.bias",
|
| 419 |
+
"decoder.proposal_score.3.weight",
|
| 420 |
+
"decoder.proposal_score.3.bias",
|
| 421 |
+
"elastic_state_head.interaction_queries",
|
| 422 |
+
"elastic_state_head.interaction_attention.in_proj_weight",
|
| 423 |
+
"elastic_state_head.interaction_attention.in_proj_bias",
|
| 424 |
+
"elastic_state_head.interaction_attention.out_proj.weight",
|
| 425 |
+
"elastic_state_head.interaction_attention.out_proj.bias",
|
| 426 |
+
"elastic_state_head.interaction_mlp.0.weight",
|
| 427 |
+
"elastic_state_head.interaction_mlp.0.bias",
|
| 428 |
+
"elastic_state_head.interaction_mlp.1.weight",
|
| 429 |
+
"elastic_state_head.interaction_mlp.1.bias",
|
| 430 |
+
"elastic_state_head.interaction_mlp.3.weight",
|
| 431 |
+
"elastic_state_head.interaction_mlp.3.bias",
|
| 432 |
+
"elastic_state_head.decoder.field_queries",
|
| 433 |
+
"elastic_state_head.decoder.field_attention.in_proj_weight",
|
| 434 |
+
"elastic_state_head.decoder.field_attention.in_proj_bias",
|
| 435 |
+
"elastic_state_head.decoder.field_attention.out_proj.weight",
|
| 436 |
+
"elastic_state_head.decoder.field_attention.out_proj.bias",
|
| 437 |
+
"elastic_state_head.decoder.field_mlp.0.weight",
|
| 438 |
+
"elastic_state_head.decoder.field_mlp.0.bias",
|
| 439 |
+
"elastic_state_head.decoder.field_mlp.1.weight",
|
| 440 |
+
"elastic_state_head.decoder.field_mlp.1.bias",
|
| 441 |
+
"elastic_state_head.decoder.field_mlp.3.weight",
|
| 442 |
+
"elastic_state_head.decoder.field_mlp.3.bias",
|
| 443 |
+
"elastic_state_head.decoder.summary_proj.0.weight",
|
| 444 |
+
"elastic_state_head.decoder.summary_proj.0.bias",
|
| 445 |
+
"elastic_state_head.decoder.summary_proj.1.weight",
|
| 446 |
+
"elastic_state_head.decoder.summary_proj.1.bias",
|
| 447 |
+
"elastic_state_head.decoder.phase_head.0.weight",
|
| 448 |
+
"elastic_state_head.decoder.phase_head.0.bias",
|
| 449 |
+
"elastic_state_head.decoder.phase_head.1.weight",
|
| 450 |
+
"elastic_state_head.decoder.phase_head.1.bias",
|
| 451 |
+
"elastic_state_head.decoder.phase_head.3.weight",
|
| 452 |
+
"elastic_state_head.decoder.phase_head.3.bias",
|
| 453 |
+
"elastic_state_head.decoder.arm_role_head.0.weight",
|
| 454 |
+
"elastic_state_head.decoder.arm_role_head.0.bias",
|
| 455 |
+
"elastic_state_head.decoder.arm_role_head.1.weight",
|
| 456 |
+
"elastic_state_head.decoder.arm_role_head.1.bias",
|
| 457 |
+
"elastic_state_head.decoder.arm_role_head.3.weight",
|
| 458 |
+
"elastic_state_head.decoder.arm_role_head.3.bias",
|
| 459 |
+
"elastic_state_head.decoder.arm_identity.weight",
|
| 460 |
+
"elastic_state_head.decoder.support_mode.0.weight",
|
| 461 |
+
"elastic_state_head.decoder.support_mode.0.bias",
|
| 462 |
+
"elastic_state_head.decoder.support_mode.1.weight",
|
| 463 |
+
"elastic_state_head.decoder.support_mode.1.bias",
|
| 464 |
+
"elastic_state_head.decoder.support_mode.3.weight",
|
| 465 |
+
"elastic_state_head.decoder.support_mode.3.bias",
|
| 466 |
+
"elastic_state_head.decoder.access_field.weight",
|
| 467 |
+
"elastic_state_head.decoder.access_field.bias",
|
| 468 |
+
"elastic_state_head.decoder.target_belief_field.weight",
|
| 469 |
+
"elastic_state_head.decoder.target_belief_field.bias",
|
| 470 |
+
"elastic_state_head.decoder.visibility_field.weight",
|
| 471 |
+
"elastic_state_head.decoder.visibility_field.bias",
|
| 472 |
+
"elastic_state_head.decoder.clearance_field.weight",
|
| 473 |
+
"elastic_state_head.decoder.clearance_field.bias",
|
| 474 |
+
"elastic_state_head.decoder.occluder_contact_field.weight",
|
| 475 |
+
"elastic_state_head.decoder.occluder_contact_field.bias",
|
| 476 |
+
"elastic_state_head.decoder.grasp_affordance_field.weight",
|
| 477 |
+
"elastic_state_head.decoder.grasp_affordance_field.bias",
|
| 478 |
+
"elastic_state_head.decoder.support_stability_field.weight",
|
| 479 |
+
"elastic_state_head.decoder.support_stability_field.bias",
|
| 480 |
+
"elastic_state_head.decoder.persistence_field.weight",
|
| 481 |
+
"elastic_state_head.decoder.persistence_field.bias",
|
| 482 |
+
"elastic_state_head.decoder.reocclusion_field.weight",
|
| 483 |
+
"elastic_state_head.decoder.reocclusion_field.bias",
|
| 484 |
+
"elastic_state_head.decoder.disturbance_field.weight",
|
| 485 |
+
"elastic_state_head.decoder.disturbance_field.bias",
|
| 486 |
+
"elastic_state_head.decoder.uncertainty_field.weight",
|
| 487 |
+
"elastic_state_head.decoder.uncertainty_field.bias",
|
| 488 |
+
"elastic_state_head.decoder.reocclusion_head.0.weight",
|
| 489 |
+
"elastic_state_head.decoder.reocclusion_head.0.bias",
|
| 490 |
+
"elastic_state_head.decoder.reocclusion_head.1.weight",
|
| 491 |
+
"elastic_state_head.decoder.reocclusion_head.1.bias",
|
| 492 |
+
"elastic_state_head.decoder.reocclusion_head.3.weight",
|
| 493 |
+
"elastic_state_head.decoder.reocclusion_head.3.bias",
|
| 494 |
+
"world_model.state_encoder.0.weight",
|
| 495 |
+
"world_model.state_encoder.0.bias",
|
| 496 |
+
"world_model.state_encoder.1.weight",
|
| 497 |
+
"world_model.state_encoder.1.bias",
|
| 498 |
+
"world_model.scene_memory_proj.0.weight",
|
| 499 |
+
"world_model.scene_memory_proj.0.bias",
|
| 500 |
+
"world_model.scene_memory_proj.1.weight",
|
| 501 |
+
"world_model.scene_memory_proj.1.bias",
|
| 502 |
+
"world_model.belief_memory_proj.0.weight",
|
| 503 |
+
"world_model.belief_memory_proj.0.bias",
|
| 504 |
+
"world_model.belief_memory_proj.1.weight",
|
| 505 |
+
"world_model.belief_memory_proj.1.bias",
|
| 506 |
+
"world_model.action_encoder.0.weight",
|
| 507 |
+
"world_model.action_encoder.0.bias",
|
| 508 |
+
"world_model.action_encoder.1.weight",
|
| 509 |
+
"world_model.action_encoder.1.bias",
|
| 510 |
+
"world_model.transition.weight_ih",
|
| 511 |
+
"world_model.transition.weight_hh",
|
| 512 |
+
"world_model.transition.bias_ih",
|
| 513 |
+
"world_model.transition.bias_hh",
|
| 514 |
+
"world_model.scene_memory_update.weight",
|
| 515 |
+
"world_model.scene_memory_update.bias",
|
| 516 |
+
"world_model.belief_memory_update.weight",
|
| 517 |
+
"world_model.belief_memory_update.bias",
|
| 518 |
+
"world_model.compact_decoder.weight",
|
| 519 |
+
"world_model.compact_decoder.bias",
|
| 520 |
+
"world_model.target_belief_head.weight",
|
| 521 |
+
"world_model.target_belief_head.bias",
|
| 522 |
+
"world_model.visibility_head.weight",
|
| 523 |
+
"world_model.visibility_head.bias",
|
| 524 |
+
"world_model.clearance_head.weight",
|
| 525 |
+
"world_model.clearance_head.bias",
|
| 526 |
+
"world_model.occluder_contact_head.weight",
|
| 527 |
+
"world_model.occluder_contact_head.bias",
|
| 528 |
+
"world_model.grasp_affordance_head.weight",
|
| 529 |
+
"world_model.grasp_affordance_head.bias",
|
| 530 |
+
"world_model.support_stability_head.weight",
|
| 531 |
+
"world_model.support_stability_head.bias",
|
| 532 |
+
"world_model.persistence_head.weight",
|
| 533 |
+
"world_model.persistence_head.bias",
|
| 534 |
+
"world_model.reocclusion_head.weight",
|
| 535 |
+
"world_model.reocclusion_head.bias",
|
| 536 |
+
"world_model.disturbance_head.weight",
|
| 537 |
+
"world_model.disturbance_head.bias",
|
| 538 |
+
"world_model.uncertainty_head.weight",
|
| 539 |
+
"world_model.uncertainty_head.bias",
|
| 540 |
+
"world_model.access_head.weight",
|
| 541 |
+
"world_model.access_head.bias",
|
| 542 |
+
"planner.residual.trunk.0.weight",
|
| 543 |
+
"planner.residual.trunk.0.bias",
|
| 544 |
+
"planner.residual.trunk.1.weight",
|
| 545 |
+
"planner.residual.trunk.1.bias",
|
| 546 |
+
"planner.residual.trunk.3.weight",
|
| 547 |
+
"planner.residual.trunk.3.bias",
|
| 548 |
+
"planner.residual.success_head.weight",
|
| 549 |
+
"planner.residual.success_head.bias",
|
| 550 |
+
"planner.residual.risk_head.weight",
|
| 551 |
+
"planner.residual.risk_head.bias",
|
| 552 |
+
"planner.residual.residual_head.weight",
|
| 553 |
+
"planner.residual.residual_head.bias"
|
| 554 |
+
],
|
| 555 |
+
"unexpected_keys": []
|
| 556 |
+
}
|
| 557 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"full": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4166666666666667,
|
| 5 |
+
"bag_proxy": 0.5833333333333334,
|
| 6 |
+
"cloth_proxy": 0.6666666666666666
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5555555555555555,
|
| 9 |
+
"visibility_integral": 31.92372977733612,
|
| 10 |
+
"corridor_availability": 0.8500884034567409,
|
| 11 |
+
"reocclusion_rate": 0.029287114566719827,
|
| 12 |
+
"persistence_horizon_mae": 0.894922278028389,
|
| 13 |
+
"disturbance_cost": 0.28616168903600836
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## full
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.556
|
| 6 |
+
- visibility_integral: 31.924
|
| 7 |
+
- corridor_availability: 0.850
|
| 8 |
+
- reocclusion_rate: 0.029
|
| 9 |
+
- persistence_horizon_mae: 0.895
|
| 10 |
+
- disturbance_cost: 0.286
|
| 11 |
+
- foliage_proxy_success: 0.417
|
| 12 |
+
- bag_proxy_success: 0.583
|
| 13 |
+
- cloth_proxy_success: 0.667
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/config_resolved.yaml
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_stage1_clip_seed8
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 8
|
| 5 |
+
init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
|
| 6 |
+
init_strict: false
|
| 7 |
+
data:
|
| 8 |
+
proxies:
|
| 9 |
+
- foliage_proxy
|
| 10 |
+
- bag_proxy
|
| 11 |
+
- cloth_proxy
|
| 12 |
+
resolution: 224
|
| 13 |
+
dataset_version: reveal_proxy_v6_rgbd_elastic_state
|
| 14 |
+
train_episodes_per_proxy: 48
|
| 15 |
+
val_episodes_per_proxy: 16
|
| 16 |
+
train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage1_seed8.pt
|
| 17 |
+
val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage1_seed8.pt
|
| 18 |
+
rebuild_dataset: false
|
| 19 |
+
chunk_horizon: 8
|
| 20 |
+
rollout_horizon: 5
|
| 21 |
+
history_steps: 6
|
| 22 |
+
planner_candidates: 8
|
| 23 |
+
seed: 8
|
| 24 |
+
optim:
|
| 25 |
+
epochs: 4
|
| 26 |
+
batch_size: 2
|
| 27 |
+
num_workers: 4
|
| 28 |
+
lr: 0.0003
|
| 29 |
+
weight_decay: 0.0001
|
| 30 |
+
trainer:
|
| 31 |
+
policy_type: elastic_reveal
|
| 32 |
+
use_bf16: true
|
| 33 |
+
grad_clip_norm: 1.0
|
| 34 |
+
freeze_backbone: true
|
| 35 |
+
gradient_checkpointing: false
|
| 36 |
+
plan_during_train: true
|
| 37 |
+
plan_during_eval: true
|
| 38 |
+
support_mode_conditioning: true
|
| 39 |
+
planner_mode: trainable
|
| 40 |
+
use_depth: false
|
| 41 |
+
use_world_model: true
|
| 42 |
+
use_role_tokens: true
|
| 43 |
+
compute_equivariance_probe: true
|
| 44 |
+
policy:
|
| 45 |
+
backbone:
|
| 46 |
+
model_name: openai/clip-vit-base-patch32
|
| 47 |
+
hidden_dim: 512
|
| 48 |
+
max_text_tokens: 32
|
| 49 |
+
freeze_backbone: true
|
| 50 |
+
gradient_checkpointing: false
|
| 51 |
+
use_dummy_backbone: false
|
| 52 |
+
fusion:
|
| 53 |
+
hidden_dim: 512
|
| 54 |
+
num_cameras: 3
|
| 55 |
+
num_layers: 4
|
| 56 |
+
num_heads: 8
|
| 57 |
+
ff_dim: 2048
|
| 58 |
+
dropout: 0.1
|
| 59 |
+
proprio_dim: 32
|
| 60 |
+
proprio_tokens: 1
|
| 61 |
+
memory:
|
| 62 |
+
hidden_dim: 512
|
| 63 |
+
action_dim: 14
|
| 64 |
+
history_steps: 6
|
| 65 |
+
scene_history_steps: 3
|
| 66 |
+
belief_history_steps: 8
|
| 67 |
+
num_layers: 2
|
| 68 |
+
dropout: 0.1
|
| 69 |
+
memory_bank_size: 4
|
| 70 |
+
scene_bank_size: 2
|
| 71 |
+
belief_bank_size: 2
|
| 72 |
+
num_heads: 8
|
| 73 |
+
max_history_steps: 8
|
| 74 |
+
decoder:
|
| 75 |
+
hidden_dim: 512
|
| 76 |
+
num_heads: 8
|
| 77 |
+
num_layers: 4
|
| 78 |
+
ff_dim: 2048
|
| 79 |
+
dropout: 0.1
|
| 80 |
+
chunk_size: 8
|
| 81 |
+
action_dim: 14
|
| 82 |
+
arm_action_dim: 7
|
| 83 |
+
num_candidates: 8
|
| 84 |
+
num_phases: 5
|
| 85 |
+
num_arm_roles: 4
|
| 86 |
+
num_proposal_modes: 6
|
| 87 |
+
planner_top_k: 4
|
| 88 |
+
reveal_head:
|
| 89 |
+
hidden_dim: 512
|
| 90 |
+
num_support_modes: 3
|
| 91 |
+
num_approach_templates: 32
|
| 92 |
+
rollout_horizon: 5
|
| 93 |
+
belief_map_size: 32
|
| 94 |
+
field_size: 16
|
| 95 |
+
num_heads: 8
|
| 96 |
+
predict_belief_map: true
|
| 97 |
+
num_phases: 5
|
| 98 |
+
num_arm_roles: 4
|
| 99 |
+
num_interaction_tokens: 8
|
| 100 |
+
world_model:
|
| 101 |
+
hidden_dim: 512
|
| 102 |
+
action_dim: 14
|
| 103 |
+
num_support_modes: 3
|
| 104 |
+
num_approach_templates: 32
|
| 105 |
+
rollout_horizon: 5
|
| 106 |
+
field_size: 16
|
| 107 |
+
num_heads: 8
|
| 108 |
+
num_phases: 5
|
| 109 |
+
num_arm_roles: 4
|
| 110 |
+
num_interaction_tokens: 8
|
| 111 |
+
belief_map_size: 32
|
| 112 |
+
predict_belief_map: true
|
| 113 |
+
scene_bank_size: 2
|
| 114 |
+
belief_bank_size: 2
|
| 115 |
+
planner:
|
| 116 |
+
hidden_dim: 512
|
| 117 |
+
num_candidates: 8
|
| 118 |
+
action_dim: 14
|
| 119 |
+
num_support_modes: 3
|
| 120 |
+
utility_margin: 0.1
|
| 121 |
+
num_heads: 8
|
| 122 |
+
num_layers: 2
|
| 123 |
+
num_phases: 5
|
| 124 |
+
num_arm_roles: 4
|
| 125 |
+
top_k: 4
|
| 126 |
+
loss_weights:
|
| 127 |
+
action: 1.0
|
| 128 |
+
phase: 0.1
|
| 129 |
+
arm_role: 0.15
|
| 130 |
+
support_mode: 0.1
|
| 131 |
+
corridor: 0.15
|
| 132 |
+
persistence: 0.05
|
| 133 |
+
disturbance: 0.05
|
| 134 |
+
world_model: 0.2
|
| 135 |
+
belief: 0.05
|
| 136 |
+
visibility: 0.05
|
| 137 |
+
clearance: 0.05
|
| 138 |
+
support_stability: 0.05
|
| 139 |
+
reocclusion: 0.05
|
| 140 |
+
occluder_contact: 0.05
|
| 141 |
+
grasp_affordance: 0.05
|
| 142 |
+
planner_success: 0.25
|
| 143 |
+
planner_risk: 0.1
|
| 144 |
+
planner_ranking: 0.2
|
| 145 |
+
proposal_reconstruction: 0.1
|
| 146 |
+
proposal_success: 0.15
|
| 147 |
+
proposal_ranking: 0.2
|
| 148 |
+
proposal_diversity: 0.05
|
| 149 |
+
role_swap_consistency: 0.05
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/diagnostics_full/proxy_diagnostics.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"planner_top1_accuracy": 0.25984251968503935,
|
| 3 |
+
"planner_regret": 0.024652592837810516,
|
| 4 |
+
"planner_score_utility_spearman": 0.15748029947280884,
|
| 5 |
+
"risk_calibration_mse": 0.010109159164130688,
|
| 6 |
+
"role_collapse_rate": 0.0,
|
| 7 |
+
"proposal_diversity": 0.02039325051009655,
|
| 8 |
+
"left_right_equivariance_error": 8.317838273796951e-05,
|
| 9 |
+
"belief_calibration_brier": 0.0039802417159080505,
|
| 10 |
+
"reocclusion_calibration_brier": 0.2667863667011261,
|
| 11 |
+
"support_stability_mae": 0.023258011788129807,
|
| 12 |
+
"clearance_auc": 0.9407927438472715,
|
| 13 |
+
"memory_write_rate": 0.0,
|
| 14 |
+
"memory_saturation": 0.5879086852073669,
|
| 15 |
+
"num_samples": 127
|
| 16 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/metrics.json
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.025799189747862168,
|
| 6 |
+
"arm_role": 0.027215735138398815,
|
| 7 |
+
"belief": 0.11522909954034222,
|
| 8 |
+
"clearance": 0.09597517975181809,
|
| 9 |
+
"corridor": 0.3045216482132673,
|
| 10 |
+
"disturbance": 0.006567074132739083,
|
| 11 |
+
"grasp_affordance": 0.02625927054055074,
|
| 12 |
+
"occluder_contact": 0.2161167692295544,
|
| 13 |
+
"persistence": 7.305491891831004,
|
| 14 |
+
"phase": 0.7473598300474477,
|
| 15 |
+
"planner_ranking": 0.14102927445574143,
|
| 16 |
+
"planner_risk": 0.014660530898254365,
|
| 17 |
+
"planner_success": 0.596433128830026,
|
| 18 |
+
"proposal_diversity": 0.0,
|
| 19 |
+
"proposal_ranking": 1.26868818193206,
|
| 20 |
+
"proposal_reconstruction": 0.06815405646387819,
|
| 21 |
+
"proposal_success": 0.6748700912710259,
|
| 22 |
+
"reocclusion": 0.7006335564308765,
|
| 23 |
+
"role_swap_consistency": 0.0005011227108655176,
|
| 24 |
+
"support_mode": 0.7077700629908377,
|
| 25 |
+
"support_stability": 0.1599257462645798,
|
| 26 |
+
"total": 1.733834327203441,
|
| 27 |
+
"uncertainty": 0.022427979406115357,
|
| 28 |
+
"visibility": 0.11316451830155562,
|
| 29 |
+
"world_model": 2.674901399312843
|
| 30 |
+
},
|
| 31 |
+
"val": {
|
| 32 |
+
"action": 0.02199536032276228,
|
| 33 |
+
"arm_role": 9.8040056428772e-06,
|
| 34 |
+
"belief": 0.0978035525768064,
|
| 35 |
+
"clearance": 0.07755720446584746,
|
| 36 |
+
"corridor": 0.24431297194678336,
|
| 37 |
+
"disturbance": 0.0019795258613157785,
|
| 38 |
+
"grasp_affordance": 0.008650467454572208,
|
| 39 |
+
"occluder_contact": 0.20205649081617594,
|
| 40 |
+
"persistence": 4.437129996716976,
|
| 41 |
+
"phase": 0.6695621414110065,
|
| 42 |
+
"planner_ranking": 0.04436381870164041,
|
| 43 |
+
"planner_risk": 0.010196975797498453,
|
| 44 |
+
"planner_success": 0.5646271030418575,
|
| 45 |
+
"proposal_diversity": 0.0,
|
| 46 |
+
"proposal_ranking": 1.1638631131500006,
|
| 47 |
+
"proposal_reconstruction": 0.06484090705635026,
|
| 48 |
+
"proposal_success": 0.6649224627763033,
|
| 49 |
+
"reocclusion": 0.7438069470226765,
|
| 50 |
+
"role_swap_consistency": 0.0,
|
| 51 |
+
"support_mode": 0.673728191293776,
|
| 52 |
+
"support_stability": 0.13629821891663596,
|
| 53 |
+
"total": 1.4150245506316423,
|
| 54 |
+
"uncertainty": 0.002036258225416532,
|
| 55 |
+
"visibility": 0.09110353700816631,
|
| 56 |
+
"world_model": 2.210838695988059
|
| 57 |
+
}
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train": {
|
| 62 |
+
"action": 0.02220674532499769,
|
| 63 |
+
"arm_role": 4.0168849585568094e-05,
|
| 64 |
+
"belief": 0.10375202887969491,
|
| 65 |
+
"clearance": 0.08468958432176663,
|
| 66 |
+
"corridor": 0.24882320250282114,
|
| 67 |
+
"disturbance": 0.002981857188692701,
|
| 68 |
+
"grasp_affordance": 0.00994103324857994,
|
| 69 |
+
"occluder_contact": 0.20824503820604054,
|
| 70 |
+
"persistence": 4.263324179262391,
|
| 71 |
+
"phase": 0.7222360341336714,
|
| 72 |
+
"planner_ranking": 0.044953017053952174,
|
| 73 |
+
"planner_risk": 0.010661984013600143,
|
| 74 |
+
"planner_success": 0.5370719069273684,
|
| 75 |
+
"proposal_diversity": 0.0,
|
| 76 |
+
"proposal_ranking": 1.1506784087076236,
|
| 77 |
+
"proposal_reconstruction": 0.06470025059674422,
|
| 78 |
+
"proposal_success": 0.6748968515720667,
|
| 79 |
+
"reocclusion": 0.7042920837539652,
|
| 80 |
+
"role_swap_consistency": 0.00024932249915769023,
|
| 81 |
+
"support_mode": 0.6881518938154451,
|
| 82 |
+
"support_stability": 0.1487102357972979,
|
| 83 |
+
"total": 1.3995415040959862,
|
| 84 |
+
"uncertainty": 0.0019858729011069556,
|
| 85 |
+
"visibility": 0.09729615078156531,
|
| 86 |
+
"world_model": 2.178037493952906
|
| 87 |
+
},
|
| 88 |
+
"val": {
|
| 89 |
+
"action": 0.029678026388864964,
|
| 90 |
+
"arm_role": 0.0003116108114227245,
|
| 91 |
+
"belief": 0.10797233448829502,
|
| 92 |
+
"clearance": 0.08150003047194332,
|
| 93 |
+
"corridor": 0.2509052273235284,
|
| 94 |
+
"disturbance": 0.002103368451003007,
|
| 95 |
+
"grasp_affordance": 0.008963905274868011,
|
| 96 |
+
"occluder_contact": 0.2007133779115975,
|
| 97 |
+
"persistence": 4.478599248453975,
|
| 98 |
+
"phase": 0.7040554136037827,
|
| 99 |
+
"planner_ranking": 0.03813048706929578,
|
| 100 |
+
"planner_risk": 0.01057393318569666,
|
| 101 |
+
"planner_success": 0.5217722351662815,
|
| 102 |
+
"proposal_diversity": 0.0,
|
| 103 |
+
"proposal_ranking": 1.1685641314834356,
|
| 104 |
+
"proposal_reconstruction": 0.07131227233912796,
|
| 105 |
+
"proposal_success": 0.6757729910314083,
|
| 106 |
+
"reocclusion": 0.6976062525063753,
|
| 107 |
+
"role_swap_consistency": 0.0,
|
| 108 |
+
"support_mode": 0.7273222031071782,
|
| 109 |
+
"support_stability": 0.1463006478443276,
|
| 110 |
+
"total": 1.3876731358468533,
|
| 111 |
+
"uncertainty": 0.0005028243003835087,
|
| 112 |
+
"visibility": 0.10090084094554186,
|
| 113 |
+
"world_model": 2.023001086898148
|
| 114 |
+
}
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 2,
|
| 118 |
+
"train": {
|
| 119 |
+
"action": 0.022834130358048446,
|
| 120 |
+
"arm_role": 3.6339485208401505e-05,
|
| 121 |
+
"belief": 0.10015391417978946,
|
| 122 |
+
"clearance": 0.08339313631243418,
|
| 123 |
+
"corridor": 0.24550532728082536,
|
| 124 |
+
"disturbance": 0.002419849791671015,
|
| 125 |
+
"grasp_affordance": 0.011102509094860541,
|
| 126 |
+
"occluder_contact": 0.20242435567041966,
|
| 127 |
+
"persistence": 4.354869382134127,
|
| 128 |
+
"phase": 0.6933721572316754,
|
| 129 |
+
"planner_ranking": 0.04187904763565859,
|
| 130 |
+
"planner_risk": 0.010259467963658331,
|
| 131 |
+
"planner_success": 0.5138571092283538,
|
| 132 |
+
"proposal_diversity": 0.0,
|
| 133 |
+
"proposal_ranking": 1.1488539314394846,
|
| 134 |
+
"proposal_reconstruction": 0.06509613401758733,
|
| 135 |
+
"proposal_success": 0.6776590312962757,
|
| 136 |
+
"reocclusion": 0.70495132540221,
|
| 137 |
+
"role_swap_consistency": 0.0003516697920602868,
|
| 138 |
+
"support_mode": 0.6823001881544503,
|
| 139 |
+
"support_stability": 0.14350243961116718,
|
| 140 |
+
"total": 1.378995967473035,
|
| 141 |
+
"uncertainty": 0.0031733291824921203,
|
| 142 |
+
"visibility": 0.09716511293465555,
|
| 143 |
+
"world_model": 2.104598100584839
|
| 144 |
+
},
|
| 145 |
+
"val": {
|
| 146 |
+
"action": 0.02644303720444441,
|
| 147 |
+
"arm_role": 4.627731826190029e-06,
|
| 148 |
+
"belief": 0.10258024383801967,
|
| 149 |
+
"clearance": 0.07597982959123328,
|
| 150 |
+
"corridor": 0.2423992605181411,
|
| 151 |
+
"disturbance": 0.0015974244740846189,
|
| 152 |
+
"grasp_affordance": 0.007909159859991632,
|
| 153 |
+
"occluder_contact": 0.19435308501124382,
|
| 154 |
+
"persistence": 3.919285401701927,
|
| 155 |
+
"phase": 0.6770087121985853,
|
| 156 |
+
"planner_ranking": 0.030531517459166935,
|
| 157 |
+
"planner_risk": 0.010262692154356046,
|
| 158 |
+
"planner_success": 0.5169326290488243,
|
| 159 |
+
"proposal_diversity": 0.0,
|
| 160 |
+
"proposal_ranking": 1.138186807744205,
|
| 161 |
+
"proposal_reconstruction": 0.06911751109873876,
|
| 162 |
+
"proposal_success": 0.6695848302915692,
|
| 163 |
+
"reocclusion": 0.6975388880819082,
|
| 164 |
+
"role_swap_consistency": 0.0,
|
| 165 |
+
"support_mode": 0.6884247697889805,
|
| 166 |
+
"support_stability": 0.13594868587097153,
|
| 167 |
+
"total": 1.3366163168102503,
|
| 168 |
+
"uncertainty": 0.0006479808544099797,
|
| 169 |
+
"visibility": 0.09649082575924695,
|
| 170 |
+
"world_model": 2.0216304706409574
|
| 171 |
+
}
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 3,
|
| 175 |
+
"train": {
|
| 176 |
+
"action": 0.021160060905544235,
|
| 177 |
+
"arm_role": 5.587545364939105e-05,
|
| 178 |
+
"belief": 0.10077974488909956,
|
| 179 |
+
"clearance": 0.08377115065670762,
|
| 180 |
+
"corridor": 0.2723994788211522,
|
| 181 |
+
"disturbance": 0.0028603613238174243,
|
| 182 |
+
"grasp_affordance": 0.011514163958835196,
|
| 183 |
+
"occluder_contact": 0.20602131318983607,
|
| 184 |
+
"persistence": 3.0813600014851317,
|
| 185 |
+
"phase": 0.6817607779777487,
|
| 186 |
+
"planner_ranking": 0.031658034657560674,
|
| 187 |
+
"planner_risk": 0.010394540625284256,
|
| 188 |
+
"planner_success": 0.5069346120532271,
|
| 189 |
+
"proposal_diversity": 0.0,
|
| 190 |
+
"proposal_ranking": 1.132226309851202,
|
| 191 |
+
"proposal_reconstruction": 0.06328810811900967,
|
| 192 |
+
"proposal_success": 0.6744790461050902,
|
| 193 |
+
"reocclusion": 0.6852282721022661,
|
| 194 |
+
"role_swap_consistency": 0.0005754872515272832,
|
| 195 |
+
"support_mode": 0.6633978239528796,
|
| 196 |
+
"support_stability": 0.14488365837977468,
|
| 197 |
+
"total": 1.293662095569191,
|
| 198 |
+
"uncertainty": 0.0023333917296635863,
|
| 199 |
+
"visibility": 0.09853576490392235,
|
| 200 |
+
"world_model": 2.0413369105748482
|
| 201 |
+
},
|
| 202 |
+
"val": {
|
| 203 |
+
"action": 0.017367416352499276,
|
| 204 |
+
"arm_role": 7.692722565622034e-07,
|
| 205 |
+
"belief": 0.1027774921967648,
|
| 206 |
+
"clearance": 0.08752925635781139,
|
| 207 |
+
"corridor": 0.26156787533545867,
|
| 208 |
+
"disturbance": 0.0016430629628985116,
|
| 209 |
+
"grasp_affordance": 0.010058694657345768,
|
| 210 |
+
"occluder_contact": 0.21157401148229837,
|
| 211 |
+
"persistence": 1.0993698399979621,
|
| 212 |
+
"phase": 0.6142133427783847,
|
| 213 |
+
"planner_ranking": 0.03328441088268619,
|
| 214 |
+
"planner_risk": 0.010188427979301196,
|
| 215 |
+
"planner_success": 0.4918641885742545,
|
| 216 |
+
"proposal_diversity": 0.0,
|
| 217 |
+
"proposal_ranking": 1.1239634547382593,
|
| 218 |
+
"proposal_reconstruction": 0.06056849448941648,
|
| 219 |
+
"proposal_success": 0.6778606250882149,
|
| 220 |
+
"reocclusion": 0.5640022717416286,
|
| 221 |
+
"role_swap_consistency": 0.0,
|
| 222 |
+
"support_mode": 0.5024671151768416,
|
| 223 |
+
"support_stability": 0.13648800805094652,
|
| 224 |
+
"total": 1.1350205279886723,
|
| 225 |
+
"uncertainty": 0.0008341338888158134,
|
| 226 |
+
"visibility": 0.0982570193009451,
|
| 227 |
+
"world_model": 1.93993010930717
|
| 228 |
+
}
|
| 229 |
+
}
|
| 230 |
+
]
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/summary.json
ADDED
|
@@ -0,0 +1,557 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"experiment_name": "proxy_interaction_r3d_stage1_clip_seed8",
|
| 3 |
+
"device": "cuda",
|
| 4 |
+
"best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/checkpoint_best.pt",
|
| 5 |
+
"final_train_total": 1.293662095569191,
|
| 6 |
+
"final_val_total": 1.1350205279886723,
|
| 7 |
+
"train_time_sec": 146.87081933021545,
|
| 8 |
+
"peak_gpu_memory_mb": 1891.1337890625,
|
| 9 |
+
"num_train_samples": 381,
|
| 10 |
+
"num_val_samples": 127,
|
| 11 |
+
"planner_mode": "trainable",
|
| 12 |
+
"frozen_modules": [],
|
| 13 |
+
"init_info": {
|
| 14 |
+
"path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
|
| 15 |
+
"loaded_keys": 461,
|
| 16 |
+
"skipped_shape_mismatch_keys": [
|
| 17 |
+
"memory.gru.weight_ih_l0",
|
| 18 |
+
"memory.gru.weight_hh_l0",
|
| 19 |
+
"memory.gru.bias_ih_l0",
|
| 20 |
+
"memory.gru.bias_hh_l0",
|
| 21 |
+
"memory.token_proj.0.weight",
|
| 22 |
+
"memory.token_proj.0.bias",
|
| 23 |
+
"memory.token_proj.1.weight",
|
| 24 |
+
"memory.token_proj.1.bias",
|
| 25 |
+
"decoder.actor_role_bias",
|
| 26 |
+
"decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
|
| 27 |
+
"decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
|
| 28 |
+
"decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
|
| 29 |
+
"decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
|
| 30 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
|
| 31 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
|
| 32 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
|
| 33 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
|
| 34 |
+
"decoder.revealer_decoder.layers.0.linear1.weight",
|
| 35 |
+
"decoder.revealer_decoder.layers.0.linear1.bias",
|
| 36 |
+
"decoder.revealer_decoder.layers.0.linear2.weight",
|
| 37 |
+
"decoder.revealer_decoder.layers.0.linear2.bias",
|
| 38 |
+
"decoder.revealer_decoder.layers.0.norm1.weight",
|
| 39 |
+
"decoder.revealer_decoder.layers.0.norm1.bias",
|
| 40 |
+
"decoder.revealer_decoder.layers.0.norm2.weight",
|
| 41 |
+
"decoder.revealer_decoder.layers.0.norm2.bias",
|
| 42 |
+
"decoder.revealer_decoder.layers.0.norm3.weight",
|
| 43 |
+
"decoder.revealer_decoder.layers.0.norm3.bias",
|
| 44 |
+
"decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
|
| 45 |
+
"decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
|
| 46 |
+
"decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
|
| 47 |
+
"decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
|
| 48 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
|
| 49 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
|
| 50 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
|
| 51 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
|
| 52 |
+
"decoder.revealer_decoder.layers.1.linear1.weight",
|
| 53 |
+
"decoder.revealer_decoder.layers.1.linear1.bias",
|
| 54 |
+
"decoder.revealer_decoder.layers.1.linear2.weight",
|
| 55 |
+
"decoder.revealer_decoder.layers.1.linear2.bias",
|
| 56 |
+
"decoder.revealer_decoder.layers.1.norm1.weight",
|
| 57 |
+
"decoder.revealer_decoder.layers.1.norm1.bias",
|
| 58 |
+
"decoder.revealer_decoder.layers.1.norm2.weight",
|
| 59 |
+
"decoder.revealer_decoder.layers.1.norm2.bias",
|
| 60 |
+
"decoder.revealer_decoder.layers.1.norm3.weight",
|
| 61 |
+
"decoder.revealer_decoder.layers.1.norm3.bias",
|
| 62 |
+
"decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
|
| 63 |
+
"decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
|
| 64 |
+
"decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
|
| 65 |
+
"decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
|
| 66 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
|
| 67 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
|
| 68 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
|
| 69 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
|
| 70 |
+
"decoder.revealer_decoder.layers.2.linear1.weight",
|
| 71 |
+
"decoder.revealer_decoder.layers.2.linear1.bias",
|
| 72 |
+
"decoder.revealer_decoder.layers.2.linear2.weight",
|
| 73 |
+
"decoder.revealer_decoder.layers.2.linear2.bias",
|
| 74 |
+
"decoder.revealer_decoder.layers.2.norm1.weight",
|
| 75 |
+
"decoder.revealer_decoder.layers.2.norm1.bias",
|
| 76 |
+
"decoder.revealer_decoder.layers.2.norm2.weight",
|
| 77 |
+
"decoder.revealer_decoder.layers.2.norm2.bias",
|
| 78 |
+
"decoder.revealer_decoder.layers.2.norm3.weight",
|
| 79 |
+
"decoder.revealer_decoder.layers.2.norm3.bias",
|
| 80 |
+
"decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
|
| 81 |
+
"decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
|
| 82 |
+
"decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
|
| 83 |
+
"decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
|
| 84 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
|
| 85 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
|
| 86 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
|
| 87 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
|
| 88 |
+
"decoder.revealer_decoder.layers.3.linear1.weight",
|
| 89 |
+
"decoder.revealer_decoder.layers.3.linear1.bias",
|
| 90 |
+
"decoder.revealer_decoder.layers.3.linear2.weight",
|
| 91 |
+
"decoder.revealer_decoder.layers.3.linear2.bias",
|
| 92 |
+
"decoder.revealer_decoder.layers.3.norm1.weight",
|
| 93 |
+
"decoder.revealer_decoder.layers.3.norm1.bias",
|
| 94 |
+
"decoder.revealer_decoder.layers.3.norm2.weight",
|
| 95 |
+
"decoder.revealer_decoder.layers.3.norm2.bias",
|
| 96 |
+
"decoder.revealer_decoder.layers.3.norm3.weight",
|
| 97 |
+
"decoder.revealer_decoder.layers.3.norm3.bias",
|
| 98 |
+
"decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
|
| 99 |
+
"decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
|
| 100 |
+
"decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
|
| 101 |
+
"decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
|
| 102 |
+
"decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
|
| 103 |
+
"decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
|
| 104 |
+
"decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
|
| 105 |
+
"decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
|
| 106 |
+
"decoder.actor_decoder.layers.0.linear1.weight",
|
| 107 |
+
"decoder.actor_decoder.layers.0.linear1.bias",
|
| 108 |
+
"decoder.actor_decoder.layers.0.linear2.weight",
|
| 109 |
+
"decoder.actor_decoder.layers.0.linear2.bias",
|
| 110 |
+
"decoder.actor_decoder.layers.0.norm1.weight",
|
| 111 |
+
"decoder.actor_decoder.layers.0.norm1.bias",
|
| 112 |
+
"decoder.actor_decoder.layers.0.norm2.weight",
|
| 113 |
+
"decoder.actor_decoder.layers.0.norm2.bias",
|
| 114 |
+
"decoder.actor_decoder.layers.0.norm3.weight",
|
| 115 |
+
"decoder.actor_decoder.layers.0.norm3.bias",
|
| 116 |
+
"decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
|
| 117 |
+
"decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
|
| 118 |
+
"decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
|
| 119 |
+
"decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
|
| 120 |
+
"decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
|
| 121 |
+
"decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
|
| 122 |
+
"decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
|
| 123 |
+
"decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
|
| 124 |
+
"decoder.actor_decoder.layers.1.linear1.weight",
|
| 125 |
+
"decoder.actor_decoder.layers.1.linear1.bias",
|
| 126 |
+
"decoder.actor_decoder.layers.1.linear2.weight",
|
| 127 |
+
"decoder.actor_decoder.layers.1.linear2.bias",
|
| 128 |
+
"decoder.actor_decoder.layers.1.norm1.weight",
|
| 129 |
+
"decoder.actor_decoder.layers.1.norm1.bias",
|
| 130 |
+
"decoder.actor_decoder.layers.1.norm2.weight",
|
| 131 |
+
"decoder.actor_decoder.layers.1.norm2.bias",
|
| 132 |
+
"decoder.actor_decoder.layers.1.norm3.weight",
|
| 133 |
+
"decoder.actor_decoder.layers.1.norm3.bias",
|
| 134 |
+
"decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
|
| 135 |
+
"decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
|
| 136 |
+
"decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
|
| 137 |
+
"decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
|
| 138 |
+
"decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
|
| 139 |
+
"decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
|
| 140 |
+
"decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
|
| 141 |
+
"decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
|
| 142 |
+
"decoder.actor_decoder.layers.2.linear1.weight",
|
| 143 |
+
"decoder.actor_decoder.layers.2.linear1.bias",
|
| 144 |
+
"decoder.actor_decoder.layers.2.linear2.weight",
|
| 145 |
+
"decoder.actor_decoder.layers.2.linear2.bias",
|
| 146 |
+
"decoder.actor_decoder.layers.2.norm1.weight",
|
| 147 |
+
"decoder.actor_decoder.layers.2.norm1.bias",
|
| 148 |
+
"decoder.actor_decoder.layers.2.norm2.weight",
|
| 149 |
+
"decoder.actor_decoder.layers.2.norm2.bias",
|
| 150 |
+
"decoder.actor_decoder.layers.2.norm3.weight",
|
| 151 |
+
"decoder.actor_decoder.layers.2.norm3.bias",
|
| 152 |
+
"decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
|
| 153 |
+
"decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
|
| 154 |
+
"decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
|
| 155 |
+
"decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
|
| 156 |
+
"decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
|
| 157 |
+
"decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
|
| 158 |
+
"decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
|
| 159 |
+
"decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
|
| 160 |
+
"decoder.actor_decoder.layers.3.linear1.weight",
|
| 161 |
+
"decoder.actor_decoder.layers.3.linear1.bias",
|
| 162 |
+
"decoder.actor_decoder.layers.3.linear2.weight",
|
| 163 |
+
"decoder.actor_decoder.layers.3.linear2.bias",
|
| 164 |
+
"decoder.actor_decoder.layers.3.norm1.weight",
|
| 165 |
+
"decoder.actor_decoder.layers.3.norm1.bias",
|
| 166 |
+
"decoder.actor_decoder.layers.3.norm2.weight",
|
| 167 |
+
"decoder.actor_decoder.layers.3.norm2.bias",
|
| 168 |
+
"decoder.actor_decoder.layers.3.norm3.weight",
|
| 169 |
+
"decoder.actor_decoder.layers.3.norm3.bias",
|
| 170 |
+
"decoder.revealer_mean.weight",
|
| 171 |
+
"decoder.revealer_mean.bias",
|
| 172 |
+
"decoder.revealer_log_std.weight",
|
| 173 |
+
"decoder.revealer_log_std.bias",
|
| 174 |
+
"decoder.actor_mean.weight",
|
| 175 |
+
"decoder.actor_mean.bias",
|
| 176 |
+
"decoder.actor_log_std.weight",
|
| 177 |
+
"decoder.actor_log_std.bias",
|
| 178 |
+
"decoder.proposal_score.0.weight",
|
| 179 |
+
"decoder.proposal_score.0.bias",
|
| 180 |
+
"decoder.proposal_score.1.weight",
|
| 181 |
+
"decoder.proposal_score.1.bias"
|
| 182 |
+
],
|
| 183 |
+
"missing_keys": [
|
| 184 |
+
"backbone.depth_adapter.depth_proj.0.weight",
|
| 185 |
+
"backbone.depth_adapter.depth_proj.0.bias",
|
| 186 |
+
"backbone.depth_adapter.depth_proj.1.weight",
|
| 187 |
+
"backbone.depth_adapter.depth_proj.1.bias",
|
| 188 |
+
"backbone.depth_adapter.depth_proj.3.weight",
|
| 189 |
+
"backbone.depth_adapter.depth_proj.3.bias",
|
| 190 |
+
"backbone.depth_adapter.geometry_proj.0.weight",
|
| 191 |
+
"backbone.depth_adapter.geometry_proj.0.bias",
|
| 192 |
+
"backbone.depth_adapter.geometry_proj.1.weight",
|
| 193 |
+
"backbone.depth_adapter.geometry_proj.1.bias",
|
| 194 |
+
"backbone.depth_adapter.camera_proj.0.weight",
|
| 195 |
+
"backbone.depth_adapter.camera_proj.0.bias",
|
| 196 |
+
"backbone.depth_adapter.camera_proj.1.weight",
|
| 197 |
+
"backbone.depth_adapter.camera_proj.1.bias",
|
| 198 |
+
"fusion.geometry_fusion.attn.in_proj_weight",
|
| 199 |
+
"fusion.geometry_fusion.attn.in_proj_bias",
|
| 200 |
+
"fusion.geometry_fusion.attn.out_proj.weight",
|
| 201 |
+
"fusion.geometry_fusion.attn.out_proj.bias",
|
| 202 |
+
"fusion.geometry_fusion.gate.0.weight",
|
| 203 |
+
"fusion.geometry_fusion.gate.0.bias",
|
| 204 |
+
"fusion.geometry_fusion.gate.1.weight",
|
| 205 |
+
"fusion.geometry_fusion.gate.1.bias",
|
| 206 |
+
"fusion.geometry_fusion.gate.3.weight",
|
| 207 |
+
"fusion.geometry_fusion.gate.3.bias",
|
| 208 |
+
"fusion.geometry_fusion.out.0.weight",
|
| 209 |
+
"fusion.geometry_fusion.out.0.bias",
|
| 210 |
+
"fusion.geometry_fusion.out.1.weight",
|
| 211 |
+
"fusion.geometry_fusion.out.1.bias",
|
| 212 |
+
"memory.scene_memory.position_embedding",
|
| 213 |
+
"memory.scene_memory.bank_queries",
|
| 214 |
+
"memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
|
| 215 |
+
"memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
|
| 216 |
+
"memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
|
| 217 |
+
"memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
|
| 218 |
+
"memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
|
| 219 |
+
"memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
|
| 220 |
+
"memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
|
| 221 |
+
"memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
|
| 222 |
+
"memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
|
| 223 |
+
"memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
|
| 224 |
+
"memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
|
| 225 |
+
"memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
|
| 226 |
+
"memory.scene_memory.bank_attention.in_proj_weight",
|
| 227 |
+
"memory.scene_memory.bank_attention.in_proj_bias",
|
| 228 |
+
"memory.scene_memory.bank_attention.out_proj.weight",
|
| 229 |
+
"memory.scene_memory.bank_attention.out_proj.bias",
|
| 230 |
+
"memory.scene_memory.action_proj.0.weight",
|
| 231 |
+
"memory.scene_memory.action_proj.0.bias",
|
| 232 |
+
"memory.scene_memory.action_proj.1.weight",
|
| 233 |
+
"memory.scene_memory.action_proj.1.bias",
|
| 234 |
+
"memory.scene_memory.write_gate.0.weight",
|
| 235 |
+
"memory.scene_memory.write_gate.0.bias",
|
| 236 |
+
"memory.scene_memory.write_gate.1.weight",
|
| 237 |
+
"memory.scene_memory.write_gate.1.bias",
|
| 238 |
+
"memory.scene_memory.write_gate.3.weight",
|
| 239 |
+
"memory.scene_memory.write_gate.3.bias",
|
| 240 |
+
"memory.scene_memory.token_proj.0.weight",
|
| 241 |
+
"memory.scene_memory.token_proj.0.bias",
|
| 242 |
+
"memory.scene_memory.token_proj.1.weight",
|
| 243 |
+
"memory.scene_memory.token_proj.1.bias",
|
| 244 |
+
"memory.belief_memory.position_embedding",
|
| 245 |
+
"memory.belief_memory.bank_queries",
|
| 246 |
+
"memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
|
| 247 |
+
"memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
|
| 248 |
+
"memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
|
| 249 |
+
"memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
|
| 250 |
+
"memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
|
| 251 |
+
"memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
|
| 252 |
+
"memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
|
| 253 |
+
"memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
|
| 254 |
+
"memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
|
| 255 |
+
"memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
|
| 256 |
+
"memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
|
| 257 |
+
"memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
|
| 258 |
+
"memory.belief_memory.bank_attention.in_proj_weight",
|
| 259 |
+
"memory.belief_memory.bank_attention.in_proj_bias",
|
| 260 |
+
"memory.belief_memory.bank_attention.out_proj.weight",
|
| 261 |
+
"memory.belief_memory.bank_attention.out_proj.bias",
|
| 262 |
+
"memory.belief_memory.action_proj.0.weight",
|
| 263 |
+
"memory.belief_memory.action_proj.0.bias",
|
| 264 |
+
"memory.belief_memory.action_proj.1.weight",
|
| 265 |
+
"memory.belief_memory.action_proj.1.bias",
|
| 266 |
+
"memory.belief_memory.write_gate.0.weight",
|
| 267 |
+
"memory.belief_memory.write_gate.0.bias",
|
| 268 |
+
"memory.belief_memory.write_gate.1.weight",
|
| 269 |
+
"memory.belief_memory.write_gate.1.bias",
|
| 270 |
+
"memory.belief_memory.write_gate.3.weight",
|
| 271 |
+
"memory.belief_memory.write_gate.3.bias",
|
| 272 |
+
"memory.belief_memory.token_proj.0.weight",
|
| 273 |
+
"memory.belief_memory.token_proj.0.bias",
|
| 274 |
+
"memory.belief_memory.token_proj.1.weight",
|
| 275 |
+
"memory.belief_memory.token_proj.1.bias",
|
| 276 |
+
"decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
|
| 277 |
+
"decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
|
| 278 |
+
"decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
|
| 279 |
+
"decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
|
| 280 |
+
"decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
|
| 281 |
+
"decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
|
| 282 |
+
"decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
|
| 283 |
+
"decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
|
| 284 |
+
"decoder.arm_decoder.layers.0.linear1.weight",
|
| 285 |
+
"decoder.arm_decoder.layers.0.linear1.bias",
|
| 286 |
+
"decoder.arm_decoder.layers.0.linear2.weight",
|
| 287 |
+
"decoder.arm_decoder.layers.0.linear2.bias",
|
| 288 |
+
"decoder.arm_decoder.layers.0.norm1.weight",
|
| 289 |
+
"decoder.arm_decoder.layers.0.norm1.bias",
|
| 290 |
+
"decoder.arm_decoder.layers.0.norm2.weight",
|
| 291 |
+
"decoder.arm_decoder.layers.0.norm2.bias",
|
| 292 |
+
"decoder.arm_decoder.layers.0.norm3.weight",
|
| 293 |
+
"decoder.arm_decoder.layers.0.norm3.bias",
|
| 294 |
+
"decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
|
| 295 |
+
"decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
|
| 296 |
+
"decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
|
| 297 |
+
"decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
|
| 298 |
+
"decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
|
| 299 |
+
"decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
|
| 300 |
+
"decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
|
| 301 |
+
"decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
|
| 302 |
+
"decoder.arm_decoder.layers.1.linear1.weight",
|
| 303 |
+
"decoder.arm_decoder.layers.1.linear1.bias",
|
| 304 |
+
"decoder.arm_decoder.layers.1.linear2.weight",
|
| 305 |
+
"decoder.arm_decoder.layers.1.linear2.bias",
|
| 306 |
+
"decoder.arm_decoder.layers.1.norm1.weight",
|
| 307 |
+
"decoder.arm_decoder.layers.1.norm1.bias",
|
| 308 |
+
"decoder.arm_decoder.layers.1.norm2.weight",
|
| 309 |
+
"decoder.arm_decoder.layers.1.norm2.bias",
|
| 310 |
+
"decoder.arm_decoder.layers.1.norm3.weight",
|
| 311 |
+
"decoder.arm_decoder.layers.1.norm3.bias",
|
| 312 |
+
"decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
|
| 313 |
+
"decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
|
| 314 |
+
"decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
|
| 315 |
+
"decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
|
| 316 |
+
"decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
|
| 317 |
+
"decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
|
| 318 |
+
"decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
|
| 319 |
+
"decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
|
| 320 |
+
"decoder.arm_decoder.layers.2.linear1.weight",
|
| 321 |
+
"decoder.arm_decoder.layers.2.linear1.bias",
|
| 322 |
+
"decoder.arm_decoder.layers.2.linear2.weight",
|
| 323 |
+
"decoder.arm_decoder.layers.2.linear2.bias",
|
| 324 |
+
"decoder.arm_decoder.layers.2.norm1.weight",
|
| 325 |
+
"decoder.arm_decoder.layers.2.norm1.bias",
|
| 326 |
+
"decoder.arm_decoder.layers.2.norm2.weight",
|
| 327 |
+
"decoder.arm_decoder.layers.2.norm2.bias",
|
| 328 |
+
"decoder.arm_decoder.layers.2.norm3.weight",
|
| 329 |
+
"decoder.arm_decoder.layers.2.norm3.bias",
|
| 330 |
+
"decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
|
| 331 |
+
"decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
|
| 332 |
+
"decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
|
| 333 |
+
"decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
|
| 334 |
+
"decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
|
| 335 |
+
"decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
|
| 336 |
+
"decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
|
| 337 |
+
"decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
|
| 338 |
+
"decoder.arm_decoder.layers.3.linear1.weight",
|
| 339 |
+
"decoder.arm_decoder.layers.3.linear1.bias",
|
| 340 |
+
"decoder.arm_decoder.layers.3.linear2.weight",
|
| 341 |
+
"decoder.arm_decoder.layers.3.linear2.bias",
|
| 342 |
+
"decoder.arm_decoder.layers.3.norm1.weight",
|
| 343 |
+
"decoder.arm_decoder.layers.3.norm1.bias",
|
| 344 |
+
"decoder.arm_decoder.layers.3.norm2.weight",
|
| 345 |
+
"decoder.arm_decoder.layers.3.norm2.bias",
|
| 346 |
+
"decoder.arm_decoder.layers.3.norm3.weight",
|
| 347 |
+
"decoder.arm_decoder.layers.3.norm3.bias",
|
| 348 |
+
"decoder.arm_identity.weight",
|
| 349 |
+
"decoder.phase_adapter.weight",
|
| 350 |
+
"decoder.phase_adapter.bias",
|
| 351 |
+
"decoder.role_adapter.weight",
|
| 352 |
+
"decoder.role_adapter.bias",
|
| 353 |
+
"decoder.context_proj.0.weight",
|
| 354 |
+
"decoder.context_proj.0.bias",
|
| 355 |
+
"decoder.context_proj.1.weight",
|
| 356 |
+
"decoder.context_proj.1.bias",
|
| 357 |
+
"decoder.arm_head.0.weight",
|
| 358 |
+
"decoder.arm_head.0.bias",
|
| 359 |
+
"decoder.arm_head.1.weight",
|
| 360 |
+
"decoder.arm_head.1.bias",
|
| 361 |
+
"decoder.arm_mean.weight",
|
| 362 |
+
"decoder.arm_mean.bias",
|
| 363 |
+
"decoder.arm_log_std.weight",
|
| 364 |
+
"decoder.arm_log_std.bias",
|
| 365 |
+
"decoder.proposal_mode_head.0.weight",
|
| 366 |
+
"decoder.proposal_mode_head.0.bias",
|
| 367 |
+
"decoder.proposal_mode_head.1.weight",
|
| 368 |
+
"decoder.proposal_mode_head.1.bias",
|
| 369 |
+
"decoder.proposal_mode_head.3.weight",
|
| 370 |
+
"decoder.proposal_mode_head.3.bias",
|
| 371 |
+
"decoder.proposal_mode_embeddings.weight",
|
| 372 |
+
"decoder.proposal_slot_embeddings.weight",
|
| 373 |
+
"decoder.mode_residual_heads.0.0.weight",
|
| 374 |
+
"decoder.mode_residual_heads.0.0.bias",
|
| 375 |
+
"decoder.mode_residual_heads.0.1.weight",
|
| 376 |
+
"decoder.mode_residual_heads.0.1.bias",
|
| 377 |
+
"decoder.mode_residual_heads.0.3.weight",
|
| 378 |
+
"decoder.mode_residual_heads.0.3.bias",
|
| 379 |
+
"decoder.mode_residual_heads.1.0.weight",
|
| 380 |
+
"decoder.mode_residual_heads.1.0.bias",
|
| 381 |
+
"decoder.mode_residual_heads.1.1.weight",
|
| 382 |
+
"decoder.mode_residual_heads.1.1.bias",
|
| 383 |
+
"decoder.mode_residual_heads.1.3.weight",
|
| 384 |
+
"decoder.mode_residual_heads.1.3.bias",
|
| 385 |
+
"decoder.mode_residual_heads.2.0.weight",
|
| 386 |
+
"decoder.mode_residual_heads.2.0.bias",
|
| 387 |
+
"decoder.mode_residual_heads.2.1.weight",
|
| 388 |
+
"decoder.mode_residual_heads.2.1.bias",
|
| 389 |
+
"decoder.mode_residual_heads.2.3.weight",
|
| 390 |
+
"decoder.mode_residual_heads.2.3.bias",
|
| 391 |
+
"decoder.mode_residual_heads.3.0.weight",
|
| 392 |
+
"decoder.mode_residual_heads.3.0.bias",
|
| 393 |
+
"decoder.mode_residual_heads.3.1.weight",
|
| 394 |
+
"decoder.mode_residual_heads.3.1.bias",
|
| 395 |
+
"decoder.mode_residual_heads.3.3.weight",
|
| 396 |
+
"decoder.mode_residual_heads.3.3.bias",
|
| 397 |
+
"decoder.mode_residual_heads.4.0.weight",
|
| 398 |
+
"decoder.mode_residual_heads.4.0.bias",
|
| 399 |
+
"decoder.mode_residual_heads.4.1.weight",
|
| 400 |
+
"decoder.mode_residual_heads.4.1.bias",
|
| 401 |
+
"decoder.mode_residual_heads.4.3.weight",
|
| 402 |
+
"decoder.mode_residual_heads.4.3.bias",
|
| 403 |
+
"decoder.mode_residual_heads.5.0.weight",
|
| 404 |
+
"decoder.mode_residual_heads.5.0.bias",
|
| 405 |
+
"decoder.mode_residual_heads.5.1.weight",
|
| 406 |
+
"decoder.mode_residual_heads.5.1.bias",
|
| 407 |
+
"decoder.mode_residual_heads.5.3.weight",
|
| 408 |
+
"decoder.mode_residual_heads.5.3.bias",
|
| 409 |
+
"decoder.slot_delta.0.weight",
|
| 410 |
+
"decoder.slot_delta.0.bias",
|
| 411 |
+
"decoder.slot_delta.1.weight",
|
| 412 |
+
"decoder.slot_delta.1.bias",
|
| 413 |
+
"decoder.slot_delta.3.weight",
|
| 414 |
+
"decoder.slot_delta.3.bias",
|
| 415 |
+
"decoder.proposal_score.0.weight",
|
| 416 |
+
"decoder.proposal_score.0.bias",
|
| 417 |
+
"decoder.proposal_score.1.weight",
|
| 418 |
+
"decoder.proposal_score.1.bias",
|
| 419 |
+
"decoder.proposal_score.3.weight",
|
| 420 |
+
"decoder.proposal_score.3.bias",
|
| 421 |
+
"elastic_state_head.interaction_queries",
|
| 422 |
+
"elastic_state_head.interaction_attention.in_proj_weight",
|
| 423 |
+
"elastic_state_head.interaction_attention.in_proj_bias",
|
| 424 |
+
"elastic_state_head.interaction_attention.out_proj.weight",
|
| 425 |
+
"elastic_state_head.interaction_attention.out_proj.bias",
|
| 426 |
+
"elastic_state_head.interaction_mlp.0.weight",
|
| 427 |
+
"elastic_state_head.interaction_mlp.0.bias",
|
| 428 |
+
"elastic_state_head.interaction_mlp.1.weight",
|
| 429 |
+
"elastic_state_head.interaction_mlp.1.bias",
|
| 430 |
+
"elastic_state_head.interaction_mlp.3.weight",
|
| 431 |
+
"elastic_state_head.interaction_mlp.3.bias",
|
| 432 |
+
"elastic_state_head.decoder.field_queries",
|
| 433 |
+
"elastic_state_head.decoder.field_attention.in_proj_weight",
|
| 434 |
+
"elastic_state_head.decoder.field_attention.in_proj_bias",
|
| 435 |
+
"elastic_state_head.decoder.field_attention.out_proj.weight",
|
| 436 |
+
"elastic_state_head.decoder.field_attention.out_proj.bias",
|
| 437 |
+
"elastic_state_head.decoder.field_mlp.0.weight",
|
| 438 |
+
"elastic_state_head.decoder.field_mlp.0.bias",
|
| 439 |
+
"elastic_state_head.decoder.field_mlp.1.weight",
|
| 440 |
+
"elastic_state_head.decoder.field_mlp.1.bias",
|
| 441 |
+
"elastic_state_head.decoder.field_mlp.3.weight",
|
| 442 |
+
"elastic_state_head.decoder.field_mlp.3.bias",
|
| 443 |
+
"elastic_state_head.decoder.summary_proj.0.weight",
|
| 444 |
+
"elastic_state_head.decoder.summary_proj.0.bias",
|
| 445 |
+
"elastic_state_head.decoder.summary_proj.1.weight",
|
| 446 |
+
"elastic_state_head.decoder.summary_proj.1.bias",
|
| 447 |
+
"elastic_state_head.decoder.phase_head.0.weight",
|
| 448 |
+
"elastic_state_head.decoder.phase_head.0.bias",
|
| 449 |
+
"elastic_state_head.decoder.phase_head.1.weight",
|
| 450 |
+
"elastic_state_head.decoder.phase_head.1.bias",
|
| 451 |
+
"elastic_state_head.decoder.phase_head.3.weight",
|
| 452 |
+
"elastic_state_head.decoder.phase_head.3.bias",
|
| 453 |
+
"elastic_state_head.decoder.arm_role_head.0.weight",
|
| 454 |
+
"elastic_state_head.decoder.arm_role_head.0.bias",
|
| 455 |
+
"elastic_state_head.decoder.arm_role_head.1.weight",
|
| 456 |
+
"elastic_state_head.decoder.arm_role_head.1.bias",
|
| 457 |
+
"elastic_state_head.decoder.arm_role_head.3.weight",
|
| 458 |
+
"elastic_state_head.decoder.arm_role_head.3.bias",
|
| 459 |
+
"elastic_state_head.decoder.arm_identity.weight",
|
| 460 |
+
"elastic_state_head.decoder.support_mode.0.weight",
|
| 461 |
+
"elastic_state_head.decoder.support_mode.0.bias",
|
| 462 |
+
"elastic_state_head.decoder.support_mode.1.weight",
|
| 463 |
+
"elastic_state_head.decoder.support_mode.1.bias",
|
| 464 |
+
"elastic_state_head.decoder.support_mode.3.weight",
|
| 465 |
+
"elastic_state_head.decoder.support_mode.3.bias",
|
| 466 |
+
"elastic_state_head.decoder.access_field.weight",
|
| 467 |
+
"elastic_state_head.decoder.access_field.bias",
|
| 468 |
+
"elastic_state_head.decoder.target_belief_field.weight",
|
| 469 |
+
"elastic_state_head.decoder.target_belief_field.bias",
|
| 470 |
+
"elastic_state_head.decoder.visibility_field.weight",
|
| 471 |
+
"elastic_state_head.decoder.visibility_field.bias",
|
| 472 |
+
"elastic_state_head.decoder.clearance_field.weight",
|
| 473 |
+
"elastic_state_head.decoder.clearance_field.bias",
|
| 474 |
+
"elastic_state_head.decoder.occluder_contact_field.weight",
|
| 475 |
+
"elastic_state_head.decoder.occluder_contact_field.bias",
|
| 476 |
+
"elastic_state_head.decoder.grasp_affordance_field.weight",
|
| 477 |
+
"elastic_state_head.decoder.grasp_affordance_field.bias",
|
| 478 |
+
"elastic_state_head.decoder.support_stability_field.weight",
|
| 479 |
+
"elastic_state_head.decoder.support_stability_field.bias",
|
| 480 |
+
"elastic_state_head.decoder.persistence_field.weight",
|
| 481 |
+
"elastic_state_head.decoder.persistence_field.bias",
|
| 482 |
+
"elastic_state_head.decoder.reocclusion_field.weight",
|
| 483 |
+
"elastic_state_head.decoder.reocclusion_field.bias",
|
| 484 |
+
"elastic_state_head.decoder.disturbance_field.weight",
|
| 485 |
+
"elastic_state_head.decoder.disturbance_field.bias",
|
| 486 |
+
"elastic_state_head.decoder.uncertainty_field.weight",
|
| 487 |
+
"elastic_state_head.decoder.uncertainty_field.bias",
|
| 488 |
+
"elastic_state_head.decoder.reocclusion_head.0.weight",
|
| 489 |
+
"elastic_state_head.decoder.reocclusion_head.0.bias",
|
| 490 |
+
"elastic_state_head.decoder.reocclusion_head.1.weight",
|
| 491 |
+
"elastic_state_head.decoder.reocclusion_head.1.bias",
|
| 492 |
+
"elastic_state_head.decoder.reocclusion_head.3.weight",
|
| 493 |
+
"elastic_state_head.decoder.reocclusion_head.3.bias",
|
| 494 |
+
"world_model.state_encoder.0.weight",
|
| 495 |
+
"world_model.state_encoder.0.bias",
|
| 496 |
+
"world_model.state_encoder.1.weight",
|
| 497 |
+
"world_model.state_encoder.1.bias",
|
| 498 |
+
"world_model.scene_memory_proj.0.weight",
|
| 499 |
+
"world_model.scene_memory_proj.0.bias",
|
| 500 |
+
"world_model.scene_memory_proj.1.weight",
|
| 501 |
+
"world_model.scene_memory_proj.1.bias",
|
| 502 |
+
"world_model.belief_memory_proj.0.weight",
|
| 503 |
+
"world_model.belief_memory_proj.0.bias",
|
| 504 |
+
"world_model.belief_memory_proj.1.weight",
|
| 505 |
+
"world_model.belief_memory_proj.1.bias",
|
| 506 |
+
"world_model.action_encoder.0.weight",
|
| 507 |
+
"world_model.action_encoder.0.bias",
|
| 508 |
+
"world_model.action_encoder.1.weight",
|
| 509 |
+
"world_model.action_encoder.1.bias",
|
| 510 |
+
"world_model.transition.weight_ih",
|
| 511 |
+
"world_model.transition.weight_hh",
|
| 512 |
+
"world_model.transition.bias_ih",
|
| 513 |
+
"world_model.transition.bias_hh",
|
| 514 |
+
"world_model.scene_memory_update.weight",
|
| 515 |
+
"world_model.scene_memory_update.bias",
|
| 516 |
+
"world_model.belief_memory_update.weight",
|
| 517 |
+
"world_model.belief_memory_update.bias",
|
| 518 |
+
"world_model.compact_decoder.weight",
|
| 519 |
+
"world_model.compact_decoder.bias",
|
| 520 |
+
"world_model.target_belief_head.weight",
|
| 521 |
+
"world_model.target_belief_head.bias",
|
| 522 |
+
"world_model.visibility_head.weight",
|
| 523 |
+
"world_model.visibility_head.bias",
|
| 524 |
+
"world_model.clearance_head.weight",
|
| 525 |
+
"world_model.clearance_head.bias",
|
| 526 |
+
"world_model.occluder_contact_head.weight",
|
| 527 |
+
"world_model.occluder_contact_head.bias",
|
| 528 |
+
"world_model.grasp_affordance_head.weight",
|
| 529 |
+
"world_model.grasp_affordance_head.bias",
|
| 530 |
+
"world_model.support_stability_head.weight",
|
| 531 |
+
"world_model.support_stability_head.bias",
|
| 532 |
+
"world_model.persistence_head.weight",
|
| 533 |
+
"world_model.persistence_head.bias",
|
| 534 |
+
"world_model.reocclusion_head.weight",
|
| 535 |
+
"world_model.reocclusion_head.bias",
|
| 536 |
+
"world_model.disturbance_head.weight",
|
| 537 |
+
"world_model.disturbance_head.bias",
|
| 538 |
+
"world_model.uncertainty_head.weight",
|
| 539 |
+
"world_model.uncertainty_head.bias",
|
| 540 |
+
"world_model.access_head.weight",
|
| 541 |
+
"world_model.access_head.bias",
|
| 542 |
+
"planner.residual.trunk.0.weight",
|
| 543 |
+
"planner.residual.trunk.0.bias",
|
| 544 |
+
"planner.residual.trunk.1.weight",
|
| 545 |
+
"planner.residual.trunk.1.bias",
|
| 546 |
+
"planner.residual.trunk.3.weight",
|
| 547 |
+
"planner.residual.trunk.3.bias",
|
| 548 |
+
"planner.residual.success_head.weight",
|
| 549 |
+
"planner.residual.success_head.bias",
|
| 550 |
+
"planner.residual.risk_head.weight",
|
| 551 |
+
"planner.residual.risk_head.bias",
|
| 552 |
+
"planner.residual.residual_head.weight",
|
| 553 |
+
"planner.residual.residual_head.bias"
|
| 554 |
+
],
|
| 555 |
+
"unexpected_keys": []
|
| 556 |
+
}
|
| 557 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"full": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.5,
|
| 5 |
+
"bag_proxy": 0.5416666666666666,
|
| 6 |
+
"cloth_proxy": 0.6666666666666666
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5694444444444443,
|
| 9 |
+
"visibility_integral": 32.623872251146366,
|
| 10 |
+
"corridor_availability": 0.889709601799647,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 2.1627785900766536,
|
| 13 |
+
"disturbance_cost": 0.2332938505957524
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## full
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.569
|
| 6 |
+
- visibility_integral: 32.624
|
| 7 |
+
- corridor_availability: 0.890
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 2.163
|
| 10 |
+
- disturbance_cost: 0.233
|
| 11 |
+
- foliage_proxy_success: 0.500
|
| 12 |
+
- bag_proxy_success: 0.542
|
| 13 |
+
- cloth_proxy_success: 0.667
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/config_resolved.yaml
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_stage1_clip_seed9
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 9
|
| 5 |
+
init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
|
| 6 |
+
init_strict: false
|
| 7 |
+
data:
|
| 8 |
+
proxies:
|
| 9 |
+
- foliage_proxy
|
| 10 |
+
- bag_proxy
|
| 11 |
+
- cloth_proxy
|
| 12 |
+
resolution: 224
|
| 13 |
+
dataset_version: reveal_proxy_v6_rgbd_elastic_state
|
| 14 |
+
train_episodes_per_proxy: 48
|
| 15 |
+
val_episodes_per_proxy: 16
|
| 16 |
+
train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage1_seed9.pt
|
| 17 |
+
val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage1_seed9.pt
|
| 18 |
+
rebuild_dataset: false
|
| 19 |
+
chunk_horizon: 8
|
| 20 |
+
rollout_horizon: 5
|
| 21 |
+
history_steps: 6
|
| 22 |
+
planner_candidates: 8
|
| 23 |
+
seed: 9
|
| 24 |
+
optim:
|
| 25 |
+
epochs: 4
|
| 26 |
+
batch_size: 2
|
| 27 |
+
num_workers: 4
|
| 28 |
+
lr: 0.0003
|
| 29 |
+
weight_decay: 0.0001
|
| 30 |
+
trainer:
|
| 31 |
+
policy_type: elastic_reveal
|
| 32 |
+
use_bf16: true
|
| 33 |
+
grad_clip_norm: 1.0
|
| 34 |
+
freeze_backbone: true
|
| 35 |
+
gradient_checkpointing: false
|
| 36 |
+
plan_during_train: true
|
| 37 |
+
plan_during_eval: true
|
| 38 |
+
support_mode_conditioning: true
|
| 39 |
+
planner_mode: trainable
|
| 40 |
+
use_depth: false
|
| 41 |
+
use_world_model: true
|
| 42 |
+
use_role_tokens: true
|
| 43 |
+
compute_equivariance_probe: true
|
| 44 |
+
policy:
|
| 45 |
+
backbone:
|
| 46 |
+
model_name: openai/clip-vit-base-patch32
|
| 47 |
+
hidden_dim: 512
|
| 48 |
+
max_text_tokens: 32
|
| 49 |
+
freeze_backbone: true
|
| 50 |
+
gradient_checkpointing: false
|
| 51 |
+
use_dummy_backbone: false
|
| 52 |
+
fusion:
|
| 53 |
+
hidden_dim: 512
|
| 54 |
+
num_cameras: 3
|
| 55 |
+
num_layers: 4
|
| 56 |
+
num_heads: 8
|
| 57 |
+
ff_dim: 2048
|
| 58 |
+
dropout: 0.1
|
| 59 |
+
proprio_dim: 32
|
| 60 |
+
proprio_tokens: 1
|
| 61 |
+
memory:
|
| 62 |
+
hidden_dim: 512
|
| 63 |
+
action_dim: 14
|
| 64 |
+
history_steps: 6
|
| 65 |
+
scene_history_steps: 3
|
| 66 |
+
belief_history_steps: 8
|
| 67 |
+
num_layers: 2
|
| 68 |
+
dropout: 0.1
|
| 69 |
+
memory_bank_size: 4
|
| 70 |
+
scene_bank_size: 2
|
| 71 |
+
belief_bank_size: 2
|
| 72 |
+
num_heads: 8
|
| 73 |
+
max_history_steps: 8
|
| 74 |
+
decoder:
|
| 75 |
+
hidden_dim: 512
|
| 76 |
+
num_heads: 8
|
| 77 |
+
num_layers: 4
|
| 78 |
+
ff_dim: 2048
|
| 79 |
+
dropout: 0.1
|
| 80 |
+
chunk_size: 8
|
| 81 |
+
action_dim: 14
|
| 82 |
+
arm_action_dim: 7
|
| 83 |
+
num_candidates: 8
|
| 84 |
+
num_phases: 5
|
| 85 |
+
num_arm_roles: 4
|
| 86 |
+
num_proposal_modes: 6
|
| 87 |
+
planner_top_k: 4
|
| 88 |
+
reveal_head:
|
| 89 |
+
hidden_dim: 512
|
| 90 |
+
num_support_modes: 3
|
| 91 |
+
num_approach_templates: 32
|
| 92 |
+
rollout_horizon: 5
|
| 93 |
+
belief_map_size: 32
|
| 94 |
+
field_size: 16
|
| 95 |
+
num_heads: 8
|
| 96 |
+
predict_belief_map: true
|
| 97 |
+
num_phases: 5
|
| 98 |
+
num_arm_roles: 4
|
| 99 |
+
num_interaction_tokens: 8
|
| 100 |
+
world_model:
|
| 101 |
+
hidden_dim: 512
|
| 102 |
+
action_dim: 14
|
| 103 |
+
num_support_modes: 3
|
| 104 |
+
num_approach_templates: 32
|
| 105 |
+
rollout_horizon: 5
|
| 106 |
+
field_size: 16
|
| 107 |
+
num_heads: 8
|
| 108 |
+
num_phases: 5
|
| 109 |
+
num_arm_roles: 4
|
| 110 |
+
num_interaction_tokens: 8
|
| 111 |
+
belief_map_size: 32
|
| 112 |
+
predict_belief_map: true
|
| 113 |
+
scene_bank_size: 2
|
| 114 |
+
belief_bank_size: 2
|
| 115 |
+
planner:
|
| 116 |
+
hidden_dim: 512
|
| 117 |
+
num_candidates: 8
|
| 118 |
+
action_dim: 14
|
| 119 |
+
num_support_modes: 3
|
| 120 |
+
utility_margin: 0.1
|
| 121 |
+
num_heads: 8
|
| 122 |
+
num_layers: 2
|
| 123 |
+
num_phases: 5
|
| 124 |
+
num_arm_roles: 4
|
| 125 |
+
top_k: 4
|
| 126 |
+
loss_weights:
|
| 127 |
+
action: 1.0
|
| 128 |
+
phase: 0.1
|
| 129 |
+
arm_role: 0.15
|
| 130 |
+
support_mode: 0.1
|
| 131 |
+
corridor: 0.15
|
| 132 |
+
persistence: 0.05
|
| 133 |
+
disturbance: 0.05
|
| 134 |
+
world_model: 0.2
|
| 135 |
+
belief: 0.05
|
| 136 |
+
visibility: 0.05
|
| 137 |
+
clearance: 0.05
|
| 138 |
+
support_stability: 0.05
|
| 139 |
+
reocclusion: 0.05
|
| 140 |
+
occluder_contact: 0.05
|
| 141 |
+
grasp_affordance: 0.05
|
| 142 |
+
planner_success: 0.25
|
| 143 |
+
planner_risk: 0.1
|
| 144 |
+
planner_ranking: 0.2
|
| 145 |
+
proposal_reconstruction: 0.1
|
| 146 |
+
proposal_success: 0.15
|
| 147 |
+
proposal_ranking: 0.2
|
| 148 |
+
proposal_diversity: 0.05
|
| 149 |
+
role_swap_consistency: 0.05
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/diagnostics_full/proxy_diagnostics.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"planner_top1_accuracy": 0.2890625,
|
| 3 |
+
"planner_regret": 0.02300698682665825,
|
| 4 |
+
"planner_score_utility_spearman": 0.22968751192092896,
|
| 5 |
+
"risk_calibration_mse": 0.010304542258381844,
|
| 6 |
+
"role_collapse_rate": 0.0,
|
| 7 |
+
"proposal_diversity": 0.022611485794186592,
|
| 8 |
+
"left_right_equivariance_error": 8.689248215887346e-05,
|
| 9 |
+
"belief_calibration_brier": 0.0043337177485227585,
|
| 10 |
+
"reocclusion_calibration_brier": 0.22800305485725403,
|
| 11 |
+
"support_stability_mae": 0.02859283983707428,
|
| 12 |
+
"clearance_auc": 0.6329041426155311,
|
| 13 |
+
"memory_write_rate": 0.0,
|
| 14 |
+
"memory_saturation": 0.2469944953918457,
|
| 15 |
+
"num_samples": 128
|
| 16 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/metrics.json
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.027812569460978633,
|
| 6 |
+
"arm_role": 0.030137697646492406,
|
| 7 |
+
"belief": 0.12157152328444154,
|
| 8 |
+
"clearance": 0.09282162053216444,
|
| 9 |
+
"corridor": 0.2851656379864404,
|
| 10 |
+
"disturbance": 0.004553798715077344,
|
| 11 |
+
"grasp_affordance": 0.018851539715634365,
|
| 12 |
+
"occluder_contact": 0.2132460696916831,
|
| 13 |
+
"persistence": 5.642576662878807,
|
| 14 |
+
"phase": 0.7761939600894325,
|
| 15 |
+
"planner_ranking": 0.17902961440620282,
|
| 16 |
+
"planner_risk": 0.013923984336035668,
|
| 17 |
+
"planner_success": 0.6199151214800382,
|
| 18 |
+
"proposal_diversity": 0.0,
|
| 19 |
+
"proposal_ranking": 1.2823116054660395,
|
| 20 |
+
"proposal_reconstruction": 0.06912861580127164,
|
| 21 |
+
"proposal_success": 0.6811760576147782,
|
| 22 |
+
"reocclusion": 0.7353295496419856,
|
| 23 |
+
"role_swap_consistency": 0.0005873552748725113,
|
| 24 |
+
"support_mode": 0.7828435195119757,
|
| 25 |
+
"support_stability": 0.16347284512594343,
|
| 26 |
+
"total": 1.6866143584251403,
|
| 27 |
+
"uncertainty": 0.019001170223897423,
|
| 28 |
+
"visibility": 0.11754893544865282,
|
| 29 |
+
"world_model": 2.710779071795313
|
| 30 |
+
},
|
| 31 |
+
"val": {
|
| 32 |
+
"action": 0.02170204828144051,
|
| 33 |
+
"arm_role": 6.762321064002208e-06,
|
| 34 |
+
"belief": 0.10080993873998523,
|
| 35 |
+
"clearance": 0.08166962582617998,
|
| 36 |
+
"corridor": 0.23909102065954357,
|
| 37 |
+
"disturbance": 0.001983066906802833,
|
| 38 |
+
"grasp_affordance": 0.008535019573173486,
|
| 39 |
+
"occluder_contact": 0.2112727805506438,
|
| 40 |
+
"persistence": 3.857563339173794,
|
| 41 |
+
"phase": 0.6654304726980627,
|
| 42 |
+
"planner_ranking": 0.04032187890697969,
|
| 43 |
+
"planner_risk": 0.011350331830726645,
|
| 44 |
+
"planner_success": 0.5934910103678703,
|
| 45 |
+
"proposal_diversity": 0.0,
|
| 46 |
+
"proposal_ranking": 1.1493350621312857,
|
| 47 |
+
"proposal_reconstruction": 0.06338102876907215,
|
| 48 |
+
"proposal_success": 0.6806164355948567,
|
| 49 |
+
"reocclusion": 0.6909330077469349,
|
| 50 |
+
"role_swap_consistency": 0.0,
|
| 51 |
+
"support_mode": 0.6831411011517048,
|
| 52 |
+
"support_stability": 0.13910080850473605,
|
| 53 |
+
"total": 1.458911145105958,
|
| 54 |
+
"uncertainty": 0.0033405375688744243,
|
| 55 |
+
"visibility": 0.09547075629234314,
|
| 56 |
+
"world_model": 2.5560860373079777
|
| 57 |
+
}
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train": {
|
| 62 |
+
"action": 0.023493385471795733,
|
| 63 |
+
"arm_role": 0.0002928718140250758,
|
| 64 |
+
"belief": 0.10523007610126546,
|
| 65 |
+
"clearance": 0.08677955961933262,
|
| 66 |
+
"corridor": 0.25750191186211613,
|
| 67 |
+
"disturbance": 0.0031594517295421777,
|
| 68 |
+
"grasp_affordance": 0.01005841078187682,
|
| 69 |
+
"occluder_contact": 0.20920588836858148,
|
| 70 |
+
"persistence": 4.331643560058192,
|
| 71 |
+
"phase": 0.7189607319078948,
|
| 72 |
+
"planner_ranking": 0.05423959079287933,
|
| 73 |
+
"planner_risk": 0.010427037446980217,
|
| 74 |
+
"planner_success": 0.5849820621703801,
|
| 75 |
+
"proposal_diversity": 0.0,
|
| 76 |
+
"proposal_ranking": 1.1505002517449228,
|
| 77 |
+
"proposal_reconstruction": 0.06525950771021216,
|
| 78 |
+
"proposal_success": 0.6752778025049913,
|
| 79 |
+
"reocclusion": 0.7005268357302014,
|
| 80 |
+
"role_swap_consistency": 0.0007142310405278726,
|
| 81 |
+
"support_mode": 0.70107421875,
|
| 82 |
+
"support_stability": 0.14081861141480898,
|
| 83 |
+
"total": 1.432289683818817,
|
| 84 |
+
"uncertainty": 0.002551493341237993,
|
| 85 |
+
"visibility": 0.10134971671198544,
|
| 86 |
+
"world_model": 2.237849539204648
|
| 87 |
+
},
|
| 88 |
+
"val": {
|
| 89 |
+
"action": 0.021186921891057864,
|
| 90 |
+
"arm_role": 3.6694105953749556e-07,
|
| 91 |
+
"belief": 0.09995241661090404,
|
| 92 |
+
"clearance": 0.08146111795213073,
|
| 93 |
+
"corridor": 0.24082361184991896,
|
| 94 |
+
"disturbance": 0.001976304362585779,
|
| 95 |
+
"grasp_affordance": 0.00922958003502572,
|
| 96 |
+
"occluder_contact": 0.21127386414445937,
|
| 97 |
+
"persistence": 3.7571401111781597,
|
| 98 |
+
"phase": 0.6817005267366767,
|
| 99 |
+
"planner_ranking": 0.03515352255374182,
|
| 100 |
+
"planner_risk": 0.01038273600534012,
|
| 101 |
+
"planner_success": 0.5073812543414533,
|
| 102 |
+
"proposal_diversity": 0.0,
|
| 103 |
+
"proposal_ranking": 1.1285581476986408,
|
| 104 |
+
"proposal_reconstruction": 0.0629420520272106,
|
| 105 |
+
"proposal_success": 0.6745674163103104,
|
| 106 |
+
"reocclusion": 0.6919681001454592,
|
| 107 |
+
"role_swap_consistency": 0.0,
|
| 108 |
+
"support_mode": 0.6647901809774339,
|
| 109 |
+
"support_stability": 0.14570825529517606,
|
| 110 |
+
"total": 1.3415670674294233,
|
| 111 |
+
"uncertainty": 0.0013466343752952525,
|
| 112 |
+
"visibility": 0.09475092665525153,
|
| 113 |
+
"world_model": 2.1340785464271903
|
| 114 |
+
}
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 2,
|
| 118 |
+
"train": {
|
| 119 |
+
"action": 0.021538028542540576,
|
| 120 |
+
"arm_role": 2.1901883577045642e-05,
|
| 121 |
+
"belief": 0.10526431232298675,
|
| 122 |
+
"clearance": 0.08594944182979433,
|
| 123 |
+
"corridor": 0.24735975777240177,
|
| 124 |
+
"disturbance": 0.0026733651749964336,
|
| 125 |
+
"grasp_affordance": 0.010091915089440974,
|
| 126 |
+
"occluder_contact": 0.20871730721310566,
|
| 127 |
+
"persistence": 4.281911664887478,
|
| 128 |
+
"phase": 0.6870194284539474,
|
| 129 |
+
"planner_ranking": 0.04152601579832519,
|
| 130 |
+
"planner_risk": 0.01045033406331449,
|
| 131 |
+
"planner_success": 0.5353652712545897,
|
| 132 |
+
"proposal_diversity": 0.0,
|
| 133 |
+
"proposal_ranking": 1.1453557397189893,
|
| 134 |
+
"proposal_reconstruction": 0.06370952629337186,
|
| 135 |
+
"proposal_success": 0.6778088651205364,
|
| 136 |
+
"reocclusion": 0.6986164701612372,
|
| 137 |
+
"role_swap_consistency": 0.0004750598012929243,
|
| 138 |
+
"support_mode": 0.6878212376644737,
|
| 139 |
+
"support_stability": 0.1362508504700504,
|
| 140 |
+
"total": 1.384049719885776,
|
| 141 |
+
"uncertainty": 0.001396400365047157,
|
| 142 |
+
"visibility": 0.09892214826847377,
|
| 143 |
+
"world_model": 2.1307888821551675
|
| 144 |
+
},
|
| 145 |
+
"val": {
|
| 146 |
+
"action": 0.021681111145881005,
|
| 147 |
+
"arm_role": 0.0003864255304506514,
|
| 148 |
+
"belief": 0.10844068287406117,
|
| 149 |
+
"clearance": 0.08775011514080688,
|
| 150 |
+
"corridor": 0.23830276518128812,
|
| 151 |
+
"disturbance": 0.0019835491895037194,
|
| 152 |
+
"grasp_affordance": 0.011450761739979498,
|
| 153 |
+
"occluder_contact": 0.21598492935299873,
|
| 154 |
+
"persistence": 3.682887438684702,
|
| 155 |
+
"phase": 0.6754010105505586,
|
| 156 |
+
"planner_ranking": 0.03584061572041719,
|
| 157 |
+
"planner_risk": 0.010325502114255869,
|
| 158 |
+
"planner_success": 0.49944606237113476,
|
| 159 |
+
"proposal_diversity": 0.0,
|
| 160 |
+
"proposal_ranking": 1.1196386851370335,
|
| 161 |
+
"proposal_reconstruction": 0.0637086319620721,
|
| 162 |
+
"proposal_success": 0.6784614324569702,
|
| 163 |
+
"reocclusion": 0.6908501861616969,
|
| 164 |
+
"role_swap_consistency": 0.0,
|
| 165 |
+
"support_mode": 0.6635435968637466,
|
| 166 |
+
"support_stability": 0.14290154923219234,
|
| 167 |
+
"total": 1.3013203730806708,
|
| 168 |
+
"uncertainty": 0.002612559406315995,
|
| 169 |
+
"visibility": 0.10054636449785903,
|
| 170 |
+
"world_model": 1.9632274899631739
|
| 171 |
+
}
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 3,
|
| 175 |
+
"train": {
|
| 176 |
+
"action": 0.02116909674123714,
|
| 177 |
+
"arm_role": 0.00017300687338176526,
|
| 178 |
+
"belief": 0.10208533270970771,
|
| 179 |
+
"clearance": 0.08287150121637081,
|
| 180 |
+
"corridor": 0.24314571875882776,
|
| 181 |
+
"disturbance": 0.002553280315360577,
|
| 182 |
+
"grasp_affordance": 0.010202447837218642,
|
| 183 |
+
"occluder_contact": 0.20370756677891078,
|
| 184 |
+
"persistence": 3.4343402633541507,
|
| 185 |
+
"phase": 0.6811472039473684,
|
| 186 |
+
"planner_ranking": 0.03300265433170257,
|
| 187 |
+
"planner_risk": 0.010154466018828221,
|
| 188 |
+
"planner_success": 0.5132313249338615,
|
| 189 |
+
"proposal_diversity": 0.0,
|
| 190 |
+
"proposal_ranking": 1.1288216785380716,
|
| 191 |
+
"proposal_reconstruction": 0.06323393973472871,
|
| 192 |
+
"proposal_success": 0.6770071575516149,
|
| 193 |
+
"reocclusion": 0.7064933630980943,
|
| 194 |
+
"role_swap_consistency": 0.0003766025873023625,
|
| 195 |
+
"support_mode": 0.7007555509868421,
|
| 196 |
+
"support_stability": 0.1340178519732466,
|
| 197 |
+
"total": 1.314924956309168,
|
| 198 |
+
"uncertainty": 0.0012071453580622467,
|
| 199 |
+
"visibility": 0.09558045302370662,
|
| 200 |
+
"world_model": 2.054408212398228
|
| 201 |
+
},
|
| 202 |
+
"val": {
|
| 203 |
+
"action": 0.021696553943911567,
|
| 204 |
+
"arm_role": 6.053594985289124e-07,
|
| 205 |
+
"belief": 0.0983218071050942,
|
| 206 |
+
"clearance": 0.07689482159912586,
|
| 207 |
+
"corridor": 0.29242096332018264,
|
| 208 |
+
"disturbance": 0.0041615761442699295,
|
| 209 |
+
"grasp_affordance": 0.0100187708158046,
|
| 210 |
+
"occluder_contact": 0.19618010916747153,
|
| 211 |
+
"persistence": 4.662721422035247,
|
| 212 |
+
"phase": 0.6692422716878355,
|
| 213 |
+
"planner_ranking": 0.030305169929533804,
|
| 214 |
+
"planner_risk": 0.010842124038390466,
|
| 215 |
+
"planner_success": 0.5005343491211534,
|
| 216 |
+
"proposal_diversity": 0.0,
|
| 217 |
+
"proposal_ranking": 1.1591037698090076,
|
| 218 |
+
"proposal_reconstruction": 0.06389545585261658,
|
| 219 |
+
"proposal_success": 0.6826766086742282,
|
| 220 |
+
"reocclusion": 0.7785650952719152,
|
| 221 |
+
"role_swap_consistency": 0.0,
|
| 222 |
+
"support_mode": 0.6616131067276001,
|
| 223 |
+
"support_stability": 0.1388778503460344,
|
| 224 |
+
"total": 1.3739404007792473,
|
| 225 |
+
"uncertainty": 2.288464340693963e-05,
|
| 226 |
+
"visibility": 0.09415236074710265,
|
| 227 |
+
"world_model": 1.9970475500449538
|
| 228 |
+
}
|
| 229 |
+
}
|
| 230 |
+
]
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/summary.json
ADDED
|
@@ -0,0 +1,557 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"experiment_name": "proxy_interaction_r3d_stage1_clip_seed9",
|
| 3 |
+
"device": "cuda",
|
| 4 |
+
"best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/checkpoint_best.pt",
|
| 5 |
+
"final_train_total": 1.314924956309168,
|
| 6 |
+
"final_val_total": 1.3739404007792473,
|
| 7 |
+
"train_time_sec": 146.7574381828308,
|
| 8 |
+
"peak_gpu_memory_mb": 1915.8154296875,
|
| 9 |
+
"num_train_samples": 380,
|
| 10 |
+
"num_val_samples": 128,
|
| 11 |
+
"planner_mode": "trainable",
|
| 12 |
+
"frozen_modules": [],
|
| 13 |
+
"init_info": {
|
| 14 |
+
"path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
|
| 15 |
+
"loaded_keys": 461,
|
| 16 |
+
"skipped_shape_mismatch_keys": [
|
| 17 |
+
"memory.gru.weight_ih_l0",
|
| 18 |
+
"memory.gru.weight_hh_l0",
|
| 19 |
+
"memory.gru.bias_ih_l0",
|
| 20 |
+
"memory.gru.bias_hh_l0",
|
| 21 |
+
"memory.token_proj.0.weight",
|
| 22 |
+
"memory.token_proj.0.bias",
|
| 23 |
+
"memory.token_proj.1.weight",
|
| 24 |
+
"memory.token_proj.1.bias",
|
| 25 |
+
"decoder.actor_role_bias",
|
| 26 |
+
"decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
|
| 27 |
+
"decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
|
| 28 |
+
"decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
|
| 29 |
+
"decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
|
| 30 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
|
| 31 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
|
| 32 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
|
| 33 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
|
| 34 |
+
"decoder.revealer_decoder.layers.0.linear1.weight",
|
| 35 |
+
"decoder.revealer_decoder.layers.0.linear1.bias",
|
| 36 |
+
"decoder.revealer_decoder.layers.0.linear2.weight",
|
| 37 |
+
"decoder.revealer_decoder.layers.0.linear2.bias",
|
| 38 |
+
"decoder.revealer_decoder.layers.0.norm1.weight",
|
| 39 |
+
"decoder.revealer_decoder.layers.0.norm1.bias",
|
| 40 |
+
"decoder.revealer_decoder.layers.0.norm2.weight",
|
| 41 |
+
"decoder.revealer_decoder.layers.0.norm2.bias",
|
| 42 |
+
"decoder.revealer_decoder.layers.0.norm3.weight",
|
| 43 |
+
"decoder.revealer_decoder.layers.0.norm3.bias",
|
| 44 |
+
"decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
|
| 45 |
+
"decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
|
| 46 |
+
"decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
|
| 47 |
+
"decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
|
| 48 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
|
| 49 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
|
| 50 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
|
| 51 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
|
| 52 |
+
"decoder.revealer_decoder.layers.1.linear1.weight",
|
| 53 |
+
"decoder.revealer_decoder.layers.1.linear1.bias",
|
| 54 |
+
"decoder.revealer_decoder.layers.1.linear2.weight",
|
| 55 |
+
"decoder.revealer_decoder.layers.1.linear2.bias",
|
| 56 |
+
"decoder.revealer_decoder.layers.1.norm1.weight",
|
| 57 |
+
"decoder.revealer_decoder.layers.1.norm1.bias",
|
| 58 |
+
"decoder.revealer_decoder.layers.1.norm2.weight",
|
| 59 |
+
"decoder.revealer_decoder.layers.1.norm2.bias",
|
| 60 |
+
"decoder.revealer_decoder.layers.1.norm3.weight",
|
| 61 |
+
"decoder.revealer_decoder.layers.1.norm3.bias",
|
| 62 |
+
"decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
|
| 63 |
+
"decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
|
| 64 |
+
"decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
|
| 65 |
+
"decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
|
| 66 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
|
| 67 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
|
| 68 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
|
| 69 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
|
| 70 |
+
"decoder.revealer_decoder.layers.2.linear1.weight",
|
| 71 |
+
"decoder.revealer_decoder.layers.2.linear1.bias",
|
| 72 |
+
"decoder.revealer_decoder.layers.2.linear2.weight",
|
| 73 |
+
"decoder.revealer_decoder.layers.2.linear2.bias",
|
| 74 |
+
"decoder.revealer_decoder.layers.2.norm1.weight",
|
| 75 |
+
"decoder.revealer_decoder.layers.2.norm1.bias",
|
| 76 |
+
"decoder.revealer_decoder.layers.2.norm2.weight",
|
| 77 |
+
"decoder.revealer_decoder.layers.2.norm2.bias",
|
| 78 |
+
"decoder.revealer_decoder.layers.2.norm3.weight",
|
| 79 |
+
"decoder.revealer_decoder.layers.2.norm3.bias",
|
| 80 |
+
"decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
|
| 81 |
+
"decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
|
| 82 |
+
"decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
|
| 83 |
+
"decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
|
| 84 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
|
| 85 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
|
| 86 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
|
| 87 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
|
| 88 |
+
"decoder.revealer_decoder.layers.3.linear1.weight",
|
| 89 |
+
"decoder.revealer_decoder.layers.3.linear1.bias",
|
| 90 |
+
"decoder.revealer_decoder.layers.3.linear2.weight",
|
| 91 |
+
"decoder.revealer_decoder.layers.3.linear2.bias",
|
| 92 |
+
"decoder.revealer_decoder.layers.3.norm1.weight",
|
| 93 |
+
"decoder.revealer_decoder.layers.3.norm1.bias",
|
| 94 |
+
"decoder.revealer_decoder.layers.3.norm2.weight",
|
| 95 |
+
"decoder.revealer_decoder.layers.3.norm2.bias",
|
| 96 |
+
"decoder.revealer_decoder.layers.3.norm3.weight",
|
| 97 |
+
"decoder.revealer_decoder.layers.3.norm3.bias",
|
| 98 |
+
"decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
|
| 99 |
+
"decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
|
| 100 |
+
"decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
|
| 101 |
+
"decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
|
| 102 |
+
"decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
|
| 103 |
+
"decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
|
| 104 |
+
"decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
|
| 105 |
+
"decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
|
| 106 |
+
"decoder.actor_decoder.layers.0.linear1.weight",
|
| 107 |
+
"decoder.actor_decoder.layers.0.linear1.bias",
|
| 108 |
+
"decoder.actor_decoder.layers.0.linear2.weight",
|
| 109 |
+
"decoder.actor_decoder.layers.0.linear2.bias",
|
| 110 |
+
"decoder.actor_decoder.layers.0.norm1.weight",
|
| 111 |
+
"decoder.actor_decoder.layers.0.norm1.bias",
|
| 112 |
+
"decoder.actor_decoder.layers.0.norm2.weight",
|
| 113 |
+
"decoder.actor_decoder.layers.0.norm2.bias",
|
| 114 |
+
"decoder.actor_decoder.layers.0.norm3.weight",
|
| 115 |
+
"decoder.actor_decoder.layers.0.norm3.bias",
|
| 116 |
+
"decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
|
| 117 |
+
"decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
|
| 118 |
+
"decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
|
| 119 |
+
"decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
|
| 120 |
+
"decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
|
| 121 |
+
"decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
|
| 122 |
+
"decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
|
| 123 |
+
"decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
|
| 124 |
+
"decoder.actor_decoder.layers.1.linear1.weight",
|
| 125 |
+
"decoder.actor_decoder.layers.1.linear1.bias",
|
| 126 |
+
"decoder.actor_decoder.layers.1.linear2.weight",
|
| 127 |
+
"decoder.actor_decoder.layers.1.linear2.bias",
|
| 128 |
+
"decoder.actor_decoder.layers.1.norm1.weight",
|
| 129 |
+
"decoder.actor_decoder.layers.1.norm1.bias",
|
| 130 |
+
"decoder.actor_decoder.layers.1.norm2.weight",
|
| 131 |
+
"decoder.actor_decoder.layers.1.norm2.bias",
|
| 132 |
+
"decoder.actor_decoder.layers.1.norm3.weight",
|
| 133 |
+
"decoder.actor_decoder.layers.1.norm3.bias",
|
| 134 |
+
"decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
|
| 135 |
+
"decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
|
| 136 |
+
"decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
|
| 137 |
+
"decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
|
| 138 |
+
"decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
|
| 139 |
+
"decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
|
| 140 |
+
"decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
|
| 141 |
+
"decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
|
| 142 |
+
"decoder.actor_decoder.layers.2.linear1.weight",
|
| 143 |
+
"decoder.actor_decoder.layers.2.linear1.bias",
|
| 144 |
+
"decoder.actor_decoder.layers.2.linear2.weight",
|
| 145 |
+
"decoder.actor_decoder.layers.2.linear2.bias",
|
| 146 |
+
"decoder.actor_decoder.layers.2.norm1.weight",
|
| 147 |
+
"decoder.actor_decoder.layers.2.norm1.bias",
|
| 148 |
+
"decoder.actor_decoder.layers.2.norm2.weight",
|
| 149 |
+
"decoder.actor_decoder.layers.2.norm2.bias",
|
| 150 |
+
"decoder.actor_decoder.layers.2.norm3.weight",
|
| 151 |
+
"decoder.actor_decoder.layers.2.norm3.bias",
|
| 152 |
+
"decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
|
| 153 |
+
"decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
|
| 154 |
+
"decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
|
| 155 |
+
"decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
|
| 156 |
+
"decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
|
| 157 |
+
"decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
|
| 158 |
+
"decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
|
| 159 |
+
"decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
|
| 160 |
+
"decoder.actor_decoder.layers.3.linear1.weight",
|
| 161 |
+
"decoder.actor_decoder.layers.3.linear1.bias",
|
| 162 |
+
"decoder.actor_decoder.layers.3.linear2.weight",
|
| 163 |
+
"decoder.actor_decoder.layers.3.linear2.bias",
|
| 164 |
+
"decoder.actor_decoder.layers.3.norm1.weight",
|
| 165 |
+
"decoder.actor_decoder.layers.3.norm1.bias",
|
| 166 |
+
"decoder.actor_decoder.layers.3.norm2.weight",
|
| 167 |
+
"decoder.actor_decoder.layers.3.norm2.bias",
|
| 168 |
+
"decoder.actor_decoder.layers.3.norm3.weight",
|
| 169 |
+
"decoder.actor_decoder.layers.3.norm3.bias",
|
| 170 |
+
"decoder.revealer_mean.weight",
|
| 171 |
+
"decoder.revealer_mean.bias",
|
| 172 |
+
"decoder.revealer_log_std.weight",
|
| 173 |
+
"decoder.revealer_log_std.bias",
|
| 174 |
+
"decoder.actor_mean.weight",
|
| 175 |
+
"decoder.actor_mean.bias",
|
| 176 |
+
"decoder.actor_log_std.weight",
|
| 177 |
+
"decoder.actor_log_std.bias",
|
| 178 |
+
"decoder.proposal_score.0.weight",
|
| 179 |
+
"decoder.proposal_score.0.bias",
|
| 180 |
+
"decoder.proposal_score.1.weight",
|
| 181 |
+
"decoder.proposal_score.1.bias"
|
| 182 |
+
],
|
| 183 |
+
"missing_keys": [
|
| 184 |
+
"backbone.depth_adapter.depth_proj.0.weight",
|
| 185 |
+
"backbone.depth_adapter.depth_proj.0.bias",
|
| 186 |
+
"backbone.depth_adapter.depth_proj.1.weight",
|
| 187 |
+
"backbone.depth_adapter.depth_proj.1.bias",
|
| 188 |
+
"backbone.depth_adapter.depth_proj.3.weight",
|
| 189 |
+
"backbone.depth_adapter.depth_proj.3.bias",
|
| 190 |
+
"backbone.depth_adapter.geometry_proj.0.weight",
|
| 191 |
+
"backbone.depth_adapter.geometry_proj.0.bias",
|
| 192 |
+
"backbone.depth_adapter.geometry_proj.1.weight",
|
| 193 |
+
"backbone.depth_adapter.geometry_proj.1.bias",
|
| 194 |
+
"backbone.depth_adapter.camera_proj.0.weight",
|
| 195 |
+
"backbone.depth_adapter.camera_proj.0.bias",
|
| 196 |
+
"backbone.depth_adapter.camera_proj.1.weight",
|
| 197 |
+
"backbone.depth_adapter.camera_proj.1.bias",
|
| 198 |
+
"fusion.geometry_fusion.attn.in_proj_weight",
|
| 199 |
+
"fusion.geometry_fusion.attn.in_proj_bias",
|
| 200 |
+
"fusion.geometry_fusion.attn.out_proj.weight",
|
| 201 |
+
"fusion.geometry_fusion.attn.out_proj.bias",
|
| 202 |
+
"fusion.geometry_fusion.gate.0.weight",
|
| 203 |
+
"fusion.geometry_fusion.gate.0.bias",
|
| 204 |
+
"fusion.geometry_fusion.gate.1.weight",
|
| 205 |
+
"fusion.geometry_fusion.gate.1.bias",
|
| 206 |
+
"fusion.geometry_fusion.gate.3.weight",
|
| 207 |
+
"fusion.geometry_fusion.gate.3.bias",
|
| 208 |
+
"fusion.geometry_fusion.out.0.weight",
|
| 209 |
+
"fusion.geometry_fusion.out.0.bias",
|
| 210 |
+
"fusion.geometry_fusion.out.1.weight",
|
| 211 |
+
"fusion.geometry_fusion.out.1.bias",
|
| 212 |
+
"memory.scene_memory.position_embedding",
|
| 213 |
+
"memory.scene_memory.bank_queries",
|
| 214 |
+
"memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
|
| 215 |
+
"memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
|
| 216 |
+
"memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
|
| 217 |
+
"memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
|
| 218 |
+
"memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
|
| 219 |
+
"memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
|
| 220 |
+
"memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
|
| 221 |
+
"memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
|
| 222 |
+
"memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
|
| 223 |
+
"memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
|
| 224 |
+
"memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
|
| 225 |
+
"memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
|
| 226 |
+
"memory.scene_memory.bank_attention.in_proj_weight",
|
| 227 |
+
"memory.scene_memory.bank_attention.in_proj_bias",
|
| 228 |
+
"memory.scene_memory.bank_attention.out_proj.weight",
|
| 229 |
+
"memory.scene_memory.bank_attention.out_proj.bias",
|
| 230 |
+
"memory.scene_memory.action_proj.0.weight",
|
| 231 |
+
"memory.scene_memory.action_proj.0.bias",
|
| 232 |
+
"memory.scene_memory.action_proj.1.weight",
|
| 233 |
+
"memory.scene_memory.action_proj.1.bias",
|
| 234 |
+
"memory.scene_memory.write_gate.0.weight",
|
| 235 |
+
"memory.scene_memory.write_gate.0.bias",
|
| 236 |
+
"memory.scene_memory.write_gate.1.weight",
|
| 237 |
+
"memory.scene_memory.write_gate.1.bias",
|
| 238 |
+
"memory.scene_memory.write_gate.3.weight",
|
| 239 |
+
"memory.scene_memory.write_gate.3.bias",
|
| 240 |
+
"memory.scene_memory.token_proj.0.weight",
|
| 241 |
+
"memory.scene_memory.token_proj.0.bias",
|
| 242 |
+
"memory.scene_memory.token_proj.1.weight",
|
| 243 |
+
"memory.scene_memory.token_proj.1.bias",
|
| 244 |
+
"memory.belief_memory.position_embedding",
|
| 245 |
+
"memory.belief_memory.bank_queries",
|
| 246 |
+
"memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
|
| 247 |
+
"memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
|
| 248 |
+
"memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
|
| 249 |
+
"memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
|
| 250 |
+
"memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
|
| 251 |
+
"memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
|
| 252 |
+
"memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
|
| 253 |
+
"memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
|
| 254 |
+
"memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
|
| 255 |
+
"memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
|
| 256 |
+
"memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
|
| 257 |
+
"memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
|
| 258 |
+
"memory.belief_memory.bank_attention.in_proj_weight",
|
| 259 |
+
"memory.belief_memory.bank_attention.in_proj_bias",
|
| 260 |
+
"memory.belief_memory.bank_attention.out_proj.weight",
|
| 261 |
+
"memory.belief_memory.bank_attention.out_proj.bias",
|
| 262 |
+
"memory.belief_memory.action_proj.0.weight",
|
| 263 |
+
"memory.belief_memory.action_proj.0.bias",
|
| 264 |
+
"memory.belief_memory.action_proj.1.weight",
|
| 265 |
+
"memory.belief_memory.action_proj.1.bias",
|
| 266 |
+
"memory.belief_memory.write_gate.0.weight",
|
| 267 |
+
"memory.belief_memory.write_gate.0.bias",
|
| 268 |
+
"memory.belief_memory.write_gate.1.weight",
|
| 269 |
+
"memory.belief_memory.write_gate.1.bias",
|
| 270 |
+
"memory.belief_memory.write_gate.3.weight",
|
| 271 |
+
"memory.belief_memory.write_gate.3.bias",
|
| 272 |
+
"memory.belief_memory.token_proj.0.weight",
|
| 273 |
+
"memory.belief_memory.token_proj.0.bias",
|
| 274 |
+
"memory.belief_memory.token_proj.1.weight",
|
| 275 |
+
"memory.belief_memory.token_proj.1.bias",
|
| 276 |
+
"decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
|
| 277 |
+
"decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
|
| 278 |
+
"decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
|
| 279 |
+
"decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
|
| 280 |
+
"decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
|
| 281 |
+
"decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
|
| 282 |
+
"decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
|
| 283 |
+
"decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
|
| 284 |
+
"decoder.arm_decoder.layers.0.linear1.weight",
|
| 285 |
+
"decoder.arm_decoder.layers.0.linear1.bias",
|
| 286 |
+
"decoder.arm_decoder.layers.0.linear2.weight",
|
| 287 |
+
"decoder.arm_decoder.layers.0.linear2.bias",
|
| 288 |
+
"decoder.arm_decoder.layers.0.norm1.weight",
|
| 289 |
+
"decoder.arm_decoder.layers.0.norm1.bias",
|
| 290 |
+
"decoder.arm_decoder.layers.0.norm2.weight",
|
| 291 |
+
"decoder.arm_decoder.layers.0.norm2.bias",
|
| 292 |
+
"decoder.arm_decoder.layers.0.norm3.weight",
|
| 293 |
+
"decoder.arm_decoder.layers.0.norm3.bias",
|
| 294 |
+
"decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
|
| 295 |
+
"decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
|
| 296 |
+
"decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
|
| 297 |
+
"decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
|
| 298 |
+
"decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
|
| 299 |
+
"decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
|
| 300 |
+
"decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
|
| 301 |
+
"decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
|
| 302 |
+
"decoder.arm_decoder.layers.1.linear1.weight",
|
| 303 |
+
"decoder.arm_decoder.layers.1.linear1.bias",
|
| 304 |
+
"decoder.arm_decoder.layers.1.linear2.weight",
|
| 305 |
+
"decoder.arm_decoder.layers.1.linear2.bias",
|
| 306 |
+
"decoder.arm_decoder.layers.1.norm1.weight",
|
| 307 |
+
"decoder.arm_decoder.layers.1.norm1.bias",
|
| 308 |
+
"decoder.arm_decoder.layers.1.norm2.weight",
|
| 309 |
+
"decoder.arm_decoder.layers.1.norm2.bias",
|
| 310 |
+
"decoder.arm_decoder.layers.1.norm3.weight",
|
| 311 |
+
"decoder.arm_decoder.layers.1.norm3.bias",
|
| 312 |
+
"decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
|
| 313 |
+
"decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
|
| 314 |
+
"decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
|
| 315 |
+
"decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
|
| 316 |
+
"decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
|
| 317 |
+
"decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
|
| 318 |
+
"decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
|
| 319 |
+
"decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
|
| 320 |
+
"decoder.arm_decoder.layers.2.linear1.weight",
|
| 321 |
+
"decoder.arm_decoder.layers.2.linear1.bias",
|
| 322 |
+
"decoder.arm_decoder.layers.2.linear2.weight",
|
| 323 |
+
"decoder.arm_decoder.layers.2.linear2.bias",
|
| 324 |
+
"decoder.arm_decoder.layers.2.norm1.weight",
|
| 325 |
+
"decoder.arm_decoder.layers.2.norm1.bias",
|
| 326 |
+
"decoder.arm_decoder.layers.2.norm2.weight",
|
| 327 |
+
"decoder.arm_decoder.layers.2.norm2.bias",
|
| 328 |
+
"decoder.arm_decoder.layers.2.norm3.weight",
|
| 329 |
+
"decoder.arm_decoder.layers.2.norm3.bias",
|
| 330 |
+
"decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
|
| 331 |
+
"decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
|
| 332 |
+
"decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
|
| 333 |
+
"decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
|
| 334 |
+
"decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
|
| 335 |
+
"decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
|
| 336 |
+
"decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
|
| 337 |
+
"decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
|
| 338 |
+
"decoder.arm_decoder.layers.3.linear1.weight",
|
| 339 |
+
"decoder.arm_decoder.layers.3.linear1.bias",
|
| 340 |
+
"decoder.arm_decoder.layers.3.linear2.weight",
|
| 341 |
+
"decoder.arm_decoder.layers.3.linear2.bias",
|
| 342 |
+
"decoder.arm_decoder.layers.3.norm1.weight",
|
| 343 |
+
"decoder.arm_decoder.layers.3.norm1.bias",
|
| 344 |
+
"decoder.arm_decoder.layers.3.norm2.weight",
|
| 345 |
+
"decoder.arm_decoder.layers.3.norm2.bias",
|
| 346 |
+
"decoder.arm_decoder.layers.3.norm3.weight",
|
| 347 |
+
"decoder.arm_decoder.layers.3.norm3.bias",
|
| 348 |
+
"decoder.arm_identity.weight",
|
| 349 |
+
"decoder.phase_adapter.weight",
|
| 350 |
+
"decoder.phase_adapter.bias",
|
| 351 |
+
"decoder.role_adapter.weight",
|
| 352 |
+
"decoder.role_adapter.bias",
|
| 353 |
+
"decoder.context_proj.0.weight",
|
| 354 |
+
"decoder.context_proj.0.bias",
|
| 355 |
+
"decoder.context_proj.1.weight",
|
| 356 |
+
"decoder.context_proj.1.bias",
|
| 357 |
+
"decoder.arm_head.0.weight",
|
| 358 |
+
"decoder.arm_head.0.bias",
|
| 359 |
+
"decoder.arm_head.1.weight",
|
| 360 |
+
"decoder.arm_head.1.bias",
|
| 361 |
+
"decoder.arm_mean.weight",
|
| 362 |
+
"decoder.arm_mean.bias",
|
| 363 |
+
"decoder.arm_log_std.weight",
|
| 364 |
+
"decoder.arm_log_std.bias",
|
| 365 |
+
"decoder.proposal_mode_head.0.weight",
|
| 366 |
+
"decoder.proposal_mode_head.0.bias",
|
| 367 |
+
"decoder.proposal_mode_head.1.weight",
|
| 368 |
+
"decoder.proposal_mode_head.1.bias",
|
| 369 |
+
"decoder.proposal_mode_head.3.weight",
|
| 370 |
+
"decoder.proposal_mode_head.3.bias",
|
| 371 |
+
"decoder.proposal_mode_embeddings.weight",
|
| 372 |
+
"decoder.proposal_slot_embeddings.weight",
|
| 373 |
+
"decoder.mode_residual_heads.0.0.weight",
|
| 374 |
+
"decoder.mode_residual_heads.0.0.bias",
|
| 375 |
+
"decoder.mode_residual_heads.0.1.weight",
|
| 376 |
+
"decoder.mode_residual_heads.0.1.bias",
|
| 377 |
+
"decoder.mode_residual_heads.0.3.weight",
|
| 378 |
+
"decoder.mode_residual_heads.0.3.bias",
|
| 379 |
+
"decoder.mode_residual_heads.1.0.weight",
|
| 380 |
+
"decoder.mode_residual_heads.1.0.bias",
|
| 381 |
+
"decoder.mode_residual_heads.1.1.weight",
|
| 382 |
+
"decoder.mode_residual_heads.1.1.bias",
|
| 383 |
+
"decoder.mode_residual_heads.1.3.weight",
|
| 384 |
+
"decoder.mode_residual_heads.1.3.bias",
|
| 385 |
+
"decoder.mode_residual_heads.2.0.weight",
|
| 386 |
+
"decoder.mode_residual_heads.2.0.bias",
|
| 387 |
+
"decoder.mode_residual_heads.2.1.weight",
|
| 388 |
+
"decoder.mode_residual_heads.2.1.bias",
|
| 389 |
+
"decoder.mode_residual_heads.2.3.weight",
|
| 390 |
+
"decoder.mode_residual_heads.2.3.bias",
|
| 391 |
+
"decoder.mode_residual_heads.3.0.weight",
|
| 392 |
+
"decoder.mode_residual_heads.3.0.bias",
|
| 393 |
+
"decoder.mode_residual_heads.3.1.weight",
|
| 394 |
+
"decoder.mode_residual_heads.3.1.bias",
|
| 395 |
+
"decoder.mode_residual_heads.3.3.weight",
|
| 396 |
+
"decoder.mode_residual_heads.3.3.bias",
|
| 397 |
+
"decoder.mode_residual_heads.4.0.weight",
|
| 398 |
+
"decoder.mode_residual_heads.4.0.bias",
|
| 399 |
+
"decoder.mode_residual_heads.4.1.weight",
|
| 400 |
+
"decoder.mode_residual_heads.4.1.bias",
|
| 401 |
+
"decoder.mode_residual_heads.4.3.weight",
|
| 402 |
+
"decoder.mode_residual_heads.4.3.bias",
|
| 403 |
+
"decoder.mode_residual_heads.5.0.weight",
|
| 404 |
+
"decoder.mode_residual_heads.5.0.bias",
|
| 405 |
+
"decoder.mode_residual_heads.5.1.weight",
|
| 406 |
+
"decoder.mode_residual_heads.5.1.bias",
|
| 407 |
+
"decoder.mode_residual_heads.5.3.weight",
|
| 408 |
+
"decoder.mode_residual_heads.5.3.bias",
|
| 409 |
+
"decoder.slot_delta.0.weight",
|
| 410 |
+
"decoder.slot_delta.0.bias",
|
| 411 |
+
"decoder.slot_delta.1.weight",
|
| 412 |
+
"decoder.slot_delta.1.bias",
|
| 413 |
+
"decoder.slot_delta.3.weight",
|
| 414 |
+
"decoder.slot_delta.3.bias",
|
| 415 |
+
"decoder.proposal_score.0.weight",
|
| 416 |
+
"decoder.proposal_score.0.bias",
|
| 417 |
+
"decoder.proposal_score.1.weight",
|
| 418 |
+
"decoder.proposal_score.1.bias",
|
| 419 |
+
"decoder.proposal_score.3.weight",
|
| 420 |
+
"decoder.proposal_score.3.bias",
|
| 421 |
+
"elastic_state_head.interaction_queries",
|
| 422 |
+
"elastic_state_head.interaction_attention.in_proj_weight",
|
| 423 |
+
"elastic_state_head.interaction_attention.in_proj_bias",
|
| 424 |
+
"elastic_state_head.interaction_attention.out_proj.weight",
|
| 425 |
+
"elastic_state_head.interaction_attention.out_proj.bias",
|
| 426 |
+
"elastic_state_head.interaction_mlp.0.weight",
|
| 427 |
+
"elastic_state_head.interaction_mlp.0.bias",
|
| 428 |
+
"elastic_state_head.interaction_mlp.1.weight",
|
| 429 |
+
"elastic_state_head.interaction_mlp.1.bias",
|
| 430 |
+
"elastic_state_head.interaction_mlp.3.weight",
|
| 431 |
+
"elastic_state_head.interaction_mlp.3.bias",
|
| 432 |
+
"elastic_state_head.decoder.field_queries",
|
| 433 |
+
"elastic_state_head.decoder.field_attention.in_proj_weight",
|
| 434 |
+
"elastic_state_head.decoder.field_attention.in_proj_bias",
|
| 435 |
+
"elastic_state_head.decoder.field_attention.out_proj.weight",
|
| 436 |
+
"elastic_state_head.decoder.field_attention.out_proj.bias",
|
| 437 |
+
"elastic_state_head.decoder.field_mlp.0.weight",
|
| 438 |
+
"elastic_state_head.decoder.field_mlp.0.bias",
|
| 439 |
+
"elastic_state_head.decoder.field_mlp.1.weight",
|
| 440 |
+
"elastic_state_head.decoder.field_mlp.1.bias",
|
| 441 |
+
"elastic_state_head.decoder.field_mlp.3.weight",
|
| 442 |
+
"elastic_state_head.decoder.field_mlp.3.bias",
|
| 443 |
+
"elastic_state_head.decoder.summary_proj.0.weight",
|
| 444 |
+
"elastic_state_head.decoder.summary_proj.0.bias",
|
| 445 |
+
"elastic_state_head.decoder.summary_proj.1.weight",
|
| 446 |
+
"elastic_state_head.decoder.summary_proj.1.bias",
|
| 447 |
+
"elastic_state_head.decoder.phase_head.0.weight",
|
| 448 |
+
"elastic_state_head.decoder.phase_head.0.bias",
|
| 449 |
+
"elastic_state_head.decoder.phase_head.1.weight",
|
| 450 |
+
"elastic_state_head.decoder.phase_head.1.bias",
|
| 451 |
+
"elastic_state_head.decoder.phase_head.3.weight",
|
| 452 |
+
"elastic_state_head.decoder.phase_head.3.bias",
|
| 453 |
+
"elastic_state_head.decoder.arm_role_head.0.weight",
|
| 454 |
+
"elastic_state_head.decoder.arm_role_head.0.bias",
|
| 455 |
+
"elastic_state_head.decoder.arm_role_head.1.weight",
|
| 456 |
+
"elastic_state_head.decoder.arm_role_head.1.bias",
|
| 457 |
+
"elastic_state_head.decoder.arm_role_head.3.weight",
|
| 458 |
+
"elastic_state_head.decoder.arm_role_head.3.bias",
|
| 459 |
+
"elastic_state_head.decoder.arm_identity.weight",
|
| 460 |
+
"elastic_state_head.decoder.support_mode.0.weight",
|
| 461 |
+
"elastic_state_head.decoder.support_mode.0.bias",
|
| 462 |
+
"elastic_state_head.decoder.support_mode.1.weight",
|
| 463 |
+
"elastic_state_head.decoder.support_mode.1.bias",
|
| 464 |
+
"elastic_state_head.decoder.support_mode.3.weight",
|
| 465 |
+
"elastic_state_head.decoder.support_mode.3.bias",
|
| 466 |
+
"elastic_state_head.decoder.access_field.weight",
|
| 467 |
+
"elastic_state_head.decoder.access_field.bias",
|
| 468 |
+
"elastic_state_head.decoder.target_belief_field.weight",
|
| 469 |
+
"elastic_state_head.decoder.target_belief_field.bias",
|
| 470 |
+
"elastic_state_head.decoder.visibility_field.weight",
|
| 471 |
+
"elastic_state_head.decoder.visibility_field.bias",
|
| 472 |
+
"elastic_state_head.decoder.clearance_field.weight",
|
| 473 |
+
"elastic_state_head.decoder.clearance_field.bias",
|
| 474 |
+
"elastic_state_head.decoder.occluder_contact_field.weight",
|
| 475 |
+
"elastic_state_head.decoder.occluder_contact_field.bias",
|
| 476 |
+
"elastic_state_head.decoder.grasp_affordance_field.weight",
|
| 477 |
+
"elastic_state_head.decoder.grasp_affordance_field.bias",
|
| 478 |
+
"elastic_state_head.decoder.support_stability_field.weight",
|
| 479 |
+
"elastic_state_head.decoder.support_stability_field.bias",
|
| 480 |
+
"elastic_state_head.decoder.persistence_field.weight",
|
| 481 |
+
"elastic_state_head.decoder.persistence_field.bias",
|
| 482 |
+
"elastic_state_head.decoder.reocclusion_field.weight",
|
| 483 |
+
"elastic_state_head.decoder.reocclusion_field.bias",
|
| 484 |
+
"elastic_state_head.decoder.disturbance_field.weight",
|
| 485 |
+
"elastic_state_head.decoder.disturbance_field.bias",
|
| 486 |
+
"elastic_state_head.decoder.uncertainty_field.weight",
|
| 487 |
+
"elastic_state_head.decoder.uncertainty_field.bias",
|
| 488 |
+
"elastic_state_head.decoder.reocclusion_head.0.weight",
|
| 489 |
+
"elastic_state_head.decoder.reocclusion_head.0.bias",
|
| 490 |
+
"elastic_state_head.decoder.reocclusion_head.1.weight",
|
| 491 |
+
"elastic_state_head.decoder.reocclusion_head.1.bias",
|
| 492 |
+
"elastic_state_head.decoder.reocclusion_head.3.weight",
|
| 493 |
+
"elastic_state_head.decoder.reocclusion_head.3.bias",
|
| 494 |
+
"world_model.state_encoder.0.weight",
|
| 495 |
+
"world_model.state_encoder.0.bias",
|
| 496 |
+
"world_model.state_encoder.1.weight",
|
| 497 |
+
"world_model.state_encoder.1.bias",
|
| 498 |
+
"world_model.scene_memory_proj.0.weight",
|
| 499 |
+
"world_model.scene_memory_proj.0.bias",
|
| 500 |
+
"world_model.scene_memory_proj.1.weight",
|
| 501 |
+
"world_model.scene_memory_proj.1.bias",
|
| 502 |
+
"world_model.belief_memory_proj.0.weight",
|
| 503 |
+
"world_model.belief_memory_proj.0.bias",
|
| 504 |
+
"world_model.belief_memory_proj.1.weight",
|
| 505 |
+
"world_model.belief_memory_proj.1.bias",
|
| 506 |
+
"world_model.action_encoder.0.weight",
|
| 507 |
+
"world_model.action_encoder.0.bias",
|
| 508 |
+
"world_model.action_encoder.1.weight",
|
| 509 |
+
"world_model.action_encoder.1.bias",
|
| 510 |
+
"world_model.transition.weight_ih",
|
| 511 |
+
"world_model.transition.weight_hh",
|
| 512 |
+
"world_model.transition.bias_ih",
|
| 513 |
+
"world_model.transition.bias_hh",
|
| 514 |
+
"world_model.scene_memory_update.weight",
|
| 515 |
+
"world_model.scene_memory_update.bias",
|
| 516 |
+
"world_model.belief_memory_update.weight",
|
| 517 |
+
"world_model.belief_memory_update.bias",
|
| 518 |
+
"world_model.compact_decoder.weight",
|
| 519 |
+
"world_model.compact_decoder.bias",
|
| 520 |
+
"world_model.target_belief_head.weight",
|
| 521 |
+
"world_model.target_belief_head.bias",
|
| 522 |
+
"world_model.visibility_head.weight",
|
| 523 |
+
"world_model.visibility_head.bias",
|
| 524 |
+
"world_model.clearance_head.weight",
|
| 525 |
+
"world_model.clearance_head.bias",
|
| 526 |
+
"world_model.occluder_contact_head.weight",
|
| 527 |
+
"world_model.occluder_contact_head.bias",
|
| 528 |
+
"world_model.grasp_affordance_head.weight",
|
| 529 |
+
"world_model.grasp_affordance_head.bias",
|
| 530 |
+
"world_model.support_stability_head.weight",
|
| 531 |
+
"world_model.support_stability_head.bias",
|
| 532 |
+
"world_model.persistence_head.weight",
|
| 533 |
+
"world_model.persistence_head.bias",
|
| 534 |
+
"world_model.reocclusion_head.weight",
|
| 535 |
+
"world_model.reocclusion_head.bias",
|
| 536 |
+
"world_model.disturbance_head.weight",
|
| 537 |
+
"world_model.disturbance_head.bias",
|
| 538 |
+
"world_model.uncertainty_head.weight",
|
| 539 |
+
"world_model.uncertainty_head.bias",
|
| 540 |
+
"world_model.access_head.weight",
|
| 541 |
+
"world_model.access_head.bias",
|
| 542 |
+
"planner.residual.trunk.0.weight",
|
| 543 |
+
"planner.residual.trunk.0.bias",
|
| 544 |
+
"planner.residual.trunk.1.weight",
|
| 545 |
+
"planner.residual.trunk.1.bias",
|
| 546 |
+
"planner.residual.trunk.3.weight",
|
| 547 |
+
"planner.residual.trunk.3.bias",
|
| 548 |
+
"planner.residual.success_head.weight",
|
| 549 |
+
"planner.residual.success_head.bias",
|
| 550 |
+
"planner.residual.risk_head.weight",
|
| 551 |
+
"planner.residual.risk_head.bias",
|
| 552 |
+
"planner.residual.residual_head.weight",
|
| 553 |
+
"planner.residual.residual_head.bias"
|
| 554 |
+
],
|
| 555 |
+
"unexpected_keys": []
|
| 556 |
+
}
|
| 557 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"full": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4583333333333333,
|
| 5 |
+
"bag_proxy": 0.5833333333333334,
|
| 6 |
+
"cloth_proxy": 0.6666666666666666
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5694444444444445,
|
| 9 |
+
"visibility_integral": 32.2005988392565,
|
| 10 |
+
"corridor_availability": 0.8664570152759552,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 2.1903364318709135,
|
| 13 |
+
"disturbance_cost": 0.35011103795841336
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## full
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.569
|
| 6 |
+
- visibility_integral: 32.201
|
| 7 |
+
- corridor_availability: 0.866
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 2.190
|
| 10 |
+
- disturbance_cost: 0.350
|
| 11 |
+
- foliage_proxy_success: 0.458
|
| 12 |
+
- bag_proxy_success: 0.583
|
| 13 |
+
- cloth_proxy_success: 0.667
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"full": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4166666666666667,
|
| 5 |
+
"bag_proxy": 0.5833333333333334,
|
| 6 |
+
"cloth_proxy": 0.6666666666666666
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5555555555555555,
|
| 9 |
+
"visibility_integral": 33.31703626612822,
|
| 10 |
+
"corridor_availability": 0.886079938047462,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 2.1836884579143008,
|
| 13 |
+
"disturbance_cost": 0.3696938648612963
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## full
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.556
|
| 6 |
+
- visibility_integral: 33.317
|
| 7 |
+
- corridor_availability: 0.886
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 2.184
|
| 10 |
+
- disturbance_cost: 0.370
|
| 11 |
+
- foliage_proxy_success: 0.417
|
| 12 |
+
- bag_proxy_success: 0.583
|
| 13 |
+
- cloth_proxy_success: 0.667
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"full": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4583333333333333,
|
| 5 |
+
"bag_proxy": 0.5833333333333334,
|
| 6 |
+
"cloth_proxy": 0.6666666666666666
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5694444444444445,
|
| 9 |
+
"visibility_integral": 32.571378606888985,
|
| 10 |
+
"corridor_availability": 0.8744470203916231,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 2.249059588784357,
|
| 13 |
+
"disturbance_cost": 0.34120469799058306
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## full
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.569
|
| 6 |
+
- visibility_integral: 32.571
|
| 7 |
+
- corridor_availability: 0.874
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 2.249
|
| 10 |
+
- disturbance_cost: 0.341
|
| 11 |
+
- foliage_proxy_success: 0.458
|
| 12 |
+
- bag_proxy_success: 0.583
|
| 13 |
+
- cloth_proxy_success: 0.667
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/config_resolved.yaml
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_stage1_dummy_seed13
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 13
|
| 5 |
+
data:
|
| 6 |
+
proxies:
|
| 7 |
+
- foliage_proxy
|
| 8 |
+
- bag_proxy
|
| 9 |
+
- cloth_proxy
|
| 10 |
+
resolution: 96
|
| 11 |
+
dataset_version: reveal_proxy_v6_rgbd_elastic_state
|
| 12 |
+
train_episodes_per_proxy: 48
|
| 13 |
+
val_episodes_per_proxy: 16
|
| 14 |
+
train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage1_dummy_seed13.pt
|
| 15 |
+
val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage1_dummy_seed13.pt
|
| 16 |
+
rebuild_dataset: false
|
| 17 |
+
chunk_horizon: 8
|
| 18 |
+
rollout_horizon: 5
|
| 19 |
+
history_steps: 6
|
| 20 |
+
planner_candidates: 8
|
| 21 |
+
seed: 13
|
| 22 |
+
optim:
|
| 23 |
+
epochs: 4
|
| 24 |
+
batch_size: 16
|
| 25 |
+
num_workers: 4
|
| 26 |
+
lr: 0.001
|
| 27 |
+
weight_decay: 0.0001
|
| 28 |
+
trainer:
|
| 29 |
+
policy_type: elastic_reveal
|
| 30 |
+
use_bf16: false
|
| 31 |
+
grad_clip_norm: 1.0
|
| 32 |
+
freeze_backbone: true
|
| 33 |
+
gradient_checkpointing: false
|
| 34 |
+
plan_during_train: true
|
| 35 |
+
plan_during_eval: true
|
| 36 |
+
support_mode_conditioning: true
|
| 37 |
+
planner_mode: trainable
|
| 38 |
+
use_depth: false
|
| 39 |
+
use_world_model: true
|
| 40 |
+
use_role_tokens: true
|
| 41 |
+
compute_equivariance_probe: true
|
| 42 |
+
policy:
|
| 43 |
+
backbone:
|
| 44 |
+
model_name: openai/clip-vit-base-patch32
|
| 45 |
+
hidden_dim: 192
|
| 46 |
+
max_text_tokens: 32
|
| 47 |
+
freeze_backbone: true
|
| 48 |
+
gradient_checkpointing: false
|
| 49 |
+
use_dummy_backbone: true
|
| 50 |
+
fusion:
|
| 51 |
+
hidden_dim: 192
|
| 52 |
+
num_cameras: 3
|
| 53 |
+
num_layers: 2
|
| 54 |
+
num_heads: 4
|
| 55 |
+
ff_dim: 384
|
| 56 |
+
dropout: 0.1
|
| 57 |
+
proprio_dim: 32
|
| 58 |
+
proprio_tokens: 1
|
| 59 |
+
memory:
|
| 60 |
+
hidden_dim: 192
|
| 61 |
+
action_dim: 14
|
| 62 |
+
history_steps: 6
|
| 63 |
+
scene_history_steps: 3
|
| 64 |
+
belief_history_steps: 8
|
| 65 |
+
num_layers: 2
|
| 66 |
+
dropout: 0.1
|
| 67 |
+
memory_bank_size: 4
|
| 68 |
+
scene_bank_size: 2
|
| 69 |
+
belief_bank_size: 2
|
| 70 |
+
num_heads: 4
|
| 71 |
+
max_history_steps: 8
|
| 72 |
+
decoder:
|
| 73 |
+
hidden_dim: 192
|
| 74 |
+
num_heads: 4
|
| 75 |
+
num_layers: 2
|
| 76 |
+
ff_dim: 384
|
| 77 |
+
dropout: 0.1
|
| 78 |
+
chunk_size: 8
|
| 79 |
+
action_dim: 14
|
| 80 |
+
arm_action_dim: 7
|
| 81 |
+
num_candidates: 8
|
| 82 |
+
num_phases: 5
|
| 83 |
+
num_arm_roles: 4
|
| 84 |
+
num_proposal_modes: 6
|
| 85 |
+
planner_top_k: 4
|
| 86 |
+
reveal_head:
|
| 87 |
+
hidden_dim: 192
|
| 88 |
+
num_support_modes: 3
|
| 89 |
+
num_approach_templates: 32
|
| 90 |
+
rollout_horizon: 5
|
| 91 |
+
belief_map_size: 32
|
| 92 |
+
field_size: 16
|
| 93 |
+
num_heads: 4
|
| 94 |
+
predict_belief_map: true
|
| 95 |
+
num_phases: 5
|
| 96 |
+
num_arm_roles: 4
|
| 97 |
+
num_interaction_tokens: 8
|
| 98 |
+
world_model:
|
| 99 |
+
hidden_dim: 192
|
| 100 |
+
action_dim: 14
|
| 101 |
+
num_support_modes: 3
|
| 102 |
+
num_approach_templates: 32
|
| 103 |
+
rollout_horizon: 5
|
| 104 |
+
field_size: 16
|
| 105 |
+
num_heads: 4
|
| 106 |
+
num_phases: 5
|
| 107 |
+
num_arm_roles: 4
|
| 108 |
+
num_interaction_tokens: 8
|
| 109 |
+
belief_map_size: 32
|
| 110 |
+
predict_belief_map: true
|
| 111 |
+
scene_bank_size: 2
|
| 112 |
+
belief_bank_size: 2
|
| 113 |
+
planner:
|
| 114 |
+
hidden_dim: 192
|
| 115 |
+
num_candidates: 8
|
| 116 |
+
action_dim: 14
|
| 117 |
+
num_support_modes: 3
|
| 118 |
+
utility_margin: 0.1
|
| 119 |
+
num_heads: 4
|
| 120 |
+
num_layers: 2
|
| 121 |
+
num_phases: 5
|
| 122 |
+
num_arm_roles: 4
|
| 123 |
+
top_k: 4
|
| 124 |
+
loss_weights:
|
| 125 |
+
action: 1.0
|
| 126 |
+
phase: 0.15
|
| 127 |
+
arm_role: 0.2
|
| 128 |
+
support_mode: 0.15
|
| 129 |
+
corridor: 0.2
|
| 130 |
+
persistence: 0.1
|
| 131 |
+
disturbance: 0.1
|
| 132 |
+
world_model: 0.25
|
| 133 |
+
belief: 0.05
|
| 134 |
+
visibility: 0.05
|
| 135 |
+
clearance: 0.05
|
| 136 |
+
support_stability: 0.05
|
| 137 |
+
reocclusion: 0.05
|
| 138 |
+
occluder_contact: 0.05
|
| 139 |
+
grasp_affordance: 0.05
|
| 140 |
+
planner_success: 0.2
|
| 141 |
+
planner_risk: 0.1
|
| 142 |
+
planner_ranking: 0.1
|
| 143 |
+
proposal_reconstruction: 0.2
|
| 144 |
+
proposal_success: 0.1
|
| 145 |
+
proposal_ranking: 0.1
|
| 146 |
+
proposal_diversity: 0.05
|
| 147 |
+
role_swap_consistency: 0.05
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/diagnostics_full/proxy_diagnostics.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"planner_top1_accuracy": 0.2595419847328244,
|
| 3 |
+
"planner_regret": 0.015185066498816013,
|
| 4 |
+
"planner_score_utility_spearman": 0.25190839171409607,
|
| 5 |
+
"risk_calibration_mse": 0.011332111433148384,
|
| 6 |
+
"role_collapse_rate": 0.0,
|
| 7 |
+
"proposal_diversity": 0.02456846833229065,
|
| 8 |
+
"left_right_equivariance_error": 0.007538194466820534,
|
| 9 |
+
"belief_calibration_brier": 0.0055354926735162735,
|
| 10 |
+
"reocclusion_calibration_brier": 0.2274838089942932,
|
| 11 |
+
"support_stability_mae": 0.030257930979132652,
|
| 12 |
+
"clearance_auc": 0.7414014153848468,
|
| 13 |
+
"memory_write_rate": 0.0,
|
| 14 |
+
"memory_saturation": 0.7680174112319946,
|
| 15 |
+
"num_samples": 131
|
| 16 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/metrics.json
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.029530804604291916,
|
| 6 |
+
"arm_role": 0.19113596672893132,
|
| 7 |
+
"belief": 0.19201900158077478,
|
| 8 |
+
"clearance": 0.1937584774568677,
|
| 9 |
+
"corridor": 0.30155759242673713,
|
| 10 |
+
"disturbance": 0.018230090441647917,
|
| 11 |
+
"grasp_affordance": 0.1115249302238226,
|
| 12 |
+
"occluder_contact": 0.29577948339283466,
|
| 13 |
+
"persistence": 5.046393771966298,
|
| 14 |
+
"phase": 0.835017109910647,
|
| 15 |
+
"planner_ranking": 0.6733469751973947,
|
| 16 |
+
"planner_risk": 0.04033496890527507,
|
| 17 |
+
"planner_success": 0.6355331862966219,
|
| 18 |
+
"proposal_diversity": 0.0,
|
| 19 |
+
"proposal_ranking": 1.276770144701004,
|
| 20 |
+
"proposal_reconstruction": 0.07184042579804857,
|
| 21 |
+
"proposal_success": 0.6676094954212507,
|
| 22 |
+
"reocclusion": 0.6988904004295667,
|
| 23 |
+
"role_swap_consistency": 0.0006935761872834215,
|
| 24 |
+
"support_mode": 0.7387049297491709,
|
| 25 |
+
"support_stability": 0.22416748199611902,
|
| 26 |
+
"total": 2.4212693075339,
|
| 27 |
+
"uncertainty": 0.32931591259936493,
|
| 28 |
+
"visibility": 0.23356754829486212,
|
| 29 |
+
"world_model": 4.170340110858281
|
| 30 |
+
},
|
| 31 |
+
"val": {
|
| 32 |
+
"action": 0.023605089427696332,
|
| 33 |
+
"arm_role": 8.891185360779572e-05,
|
| 34 |
+
"belief": 0.112466166416804,
|
| 35 |
+
"clearance": 0.08774211009343465,
|
| 36 |
+
"corridor": 0.2502693798806932,
|
| 37 |
+
"disturbance": 0.0037313879001885653,
|
| 38 |
+
"grasp_affordance": 0.013532540657454066,
|
| 39 |
+
"occluder_contact": 0.2236137886842092,
|
| 40 |
+
"persistence": 4.796973652309841,
|
| 41 |
+
"phase": 0.6506193346447415,
|
| 42 |
+
"planner_ranking": 0.45240074396133423,
|
| 43 |
+
"planner_risk": 0.012336155710120996,
|
| 44 |
+
"planner_success": 0.6348234679963853,
|
| 45 |
+
"proposal_diversity": 0.0,
|
| 46 |
+
"proposal_ranking": 1.1647081640031602,
|
| 47 |
+
"proposal_reconstruction": 0.06623147221075164,
|
| 48 |
+
"proposal_success": 0.6723773082097372,
|
| 49 |
+
"reocclusion": 0.6799028648270501,
|
| 50 |
+
"role_swap_consistency": 0.0,
|
| 51 |
+
"support_mode": 0.6129622724321153,
|
| 52 |
+
"support_stability": 0.14574629151158863,
|
| 53 |
+
"total": 1.9533665710025363,
|
| 54 |
+
"uncertainty": 0.057104989886283875,
|
| 55 |
+
"visibility": 0.09962501211298837,
|
| 56 |
+
"world_model": 3.08394538031684
|
| 57 |
+
}
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train": {
|
| 62 |
+
"action": 0.02052135338696341,
|
| 63 |
+
"arm_role": 0.00010673219821910607,
|
| 64 |
+
"belief": 0.11743779480457306,
|
| 65 |
+
"clearance": 0.09043452050536871,
|
| 66 |
+
"corridor": 0.24632801488041878,
|
| 67 |
+
"disturbance": 0.003475519949764324,
|
| 68 |
+
"grasp_affordance": 0.01625332736875862,
|
| 69 |
+
"occluder_contact": 0.2240921917061011,
|
| 70 |
+
"persistence": 4.695922573407491,
|
| 71 |
+
"phase": 0.49508154888947803,
|
| 72 |
+
"planner_ranking": 0.14279444872712097,
|
| 73 |
+
"planner_risk": 0.0141817982463787,
|
| 74 |
+
"planner_success": 0.593176061908404,
|
| 75 |
+
"proposal_diversity": 0.0,
|
| 76 |
+
"proposal_ranking": 1.165678009390831,
|
| 77 |
+
"proposal_reconstruction": 0.06292749894782901,
|
| 78 |
+
"proposal_success": 0.674570898214976,
|
| 79 |
+
"reocclusion": 0.3844434078782797,
|
| 80 |
+
"role_swap_consistency": 0.00039524554207067314,
|
| 81 |
+
"support_mode": 0.17358588459561966,
|
| 82 |
+
"support_stability": 0.1374168156956633,
|
| 83 |
+
"total": 1.6440163105726242,
|
| 84 |
+
"uncertainty": 0.047071967429171004,
|
| 85 |
+
"visibility": 0.11256152174125116,
|
| 86 |
+
"world_model": 2.4736096411943436
|
| 87 |
+
},
|
| 88 |
+
"val": {
|
| 89 |
+
"action": 0.020492848422792222,
|
| 90 |
+
"arm_role": 0.0002776960156754487,
|
| 91 |
+
"belief": 0.1081986419028706,
|
| 92 |
+
"clearance": 0.08335375868611866,
|
| 93 |
+
"corridor": 0.24787565734651354,
|
| 94 |
+
"disturbance": 0.0022675159141524797,
|
| 95 |
+
"grasp_affordance": 0.012290253303945065,
|
| 96 |
+
"occluder_contact": 0.21959979832172394,
|
| 97 |
+
"persistence": 4.647055625915527,
|
| 98 |
+
"phase": 0.4316861795054542,
|
| 99 |
+
"planner_ranking": 0.06341143821676572,
|
| 100 |
+
"planner_risk": 0.015357115098999606,
|
| 101 |
+
"planner_success": 0.5689369605647193,
|
| 102 |
+
"proposal_diversity": 0.0,
|
| 103 |
+
"proposal_ranking": 1.1283477942148845,
|
| 104 |
+
"proposal_reconstruction": 0.06308732968237665,
|
| 105 |
+
"proposal_success": 0.6809348861376444,
|
| 106 |
+
"reocclusion": 0.2748950504594379,
|
| 107 |
+
"role_swap_consistency": 0.0,
|
| 108 |
+
"support_mode": 0.0006280758987284369,
|
| 109 |
+
"support_stability": 0.14622381826241812,
|
| 110 |
+
"total": 1.6025353935029771,
|
| 111 |
+
"uncertainty": 0.02438033703300688,
|
| 112 |
+
"visibility": 0.10466726124286652,
|
| 113 |
+
"world_model": 2.558868553903368
|
| 114 |
+
}
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 2,
|
| 118 |
+
"train": {
|
| 119 |
+
"action": 0.01646478761297961,
|
| 120 |
+
"arm_role": 9.377782756322024e-05,
|
| 121 |
+
"belief": 0.10991635639220476,
|
| 122 |
+
"clearance": 0.0843405183404684,
|
| 123 |
+
"corridor": 0.2701566057900588,
|
| 124 |
+
"disturbance": 0.0031300995663817353,
|
| 125 |
+
"grasp_affordance": 0.012393822447241595,
|
| 126 |
+
"occluder_contact": 0.21479063170651594,
|
| 127 |
+
"persistence": 2.6339182580510774,
|
| 128 |
+
"phase": 0.431367311005791,
|
| 129 |
+
"planner_ranking": 0.06486702508603533,
|
| 130 |
+
"planner_risk": 0.013548698586722216,
|
| 131 |
+
"planner_success": 0.5643768397470316,
|
| 132 |
+
"proposal_diversity": 0.0,
|
| 133 |
+
"proposal_ranking": 1.1353335281213124,
|
| 134 |
+
"proposal_reconstruction": 0.05951391921068231,
|
| 135 |
+
"proposal_success": 0.6731756230195364,
|
| 136 |
+
"reocclusion": 0.2623978331685066,
|
| 137 |
+
"role_swap_consistency": 0.00040521422973445925,
|
| 138 |
+
"support_mode": 0.000605581031171217,
|
| 139 |
+
"support_stability": 0.1400139912342032,
|
| 140 |
+
"total": 1.2923575937747955,
|
| 141 |
+
"uncertainty": 0.02004621450517637,
|
| 142 |
+
"visibility": 0.10328224146117766,
|
| 143 |
+
"world_model": 2.1331751296917596
|
| 144 |
+
},
|
| 145 |
+
"val": {
|
| 146 |
+
"action": 0.018090524814195104,
|
| 147 |
+
"arm_role": 4.204427063490989e-05,
|
| 148 |
+
"belief": 0.11348766502406862,
|
| 149 |
+
"clearance": 0.0778748012251324,
|
| 150 |
+
"corridor": 0.24816315703921848,
|
| 151 |
+
"disturbance": 0.0018734507805978258,
|
| 152 |
+
"grasp_affordance": 0.008446878753602505,
|
| 153 |
+
"occluder_contact": 0.2068953894906574,
|
| 154 |
+
"persistence": 1.9170836640728846,
|
| 155 |
+
"phase": 0.4777056227127711,
|
| 156 |
+
"planner_ranking": 0.07497243583202362,
|
| 157 |
+
"planner_risk": 0.012007931971715556,
|
| 158 |
+
"planner_success": 0.5846167008082072,
|
| 159 |
+
"proposal_diversity": 0.0,
|
| 160 |
+
"proposal_ranking": 1.1227490504582722,
|
| 161 |
+
"proposal_reconstruction": 0.06178469873136944,
|
| 162 |
+
"proposal_success": 0.6768591006596884,
|
| 163 |
+
"reocclusion": 0.2698347626460923,
|
| 164 |
+
"role_swap_consistency": 0.0,
|
| 165 |
+
"support_mode": 0.0005942495643264718,
|
| 166 |
+
"support_stability": 0.14820611890819338,
|
| 167 |
+
"total": 1.2714158693949382,
|
| 168 |
+
"uncertainty": 0.004030831908393238,
|
| 169 |
+
"visibility": 0.09794799155659145,
|
| 170 |
+
"world_model": 2.303717931111654
|
| 171 |
+
}
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 3,
|
| 175 |
+
"train": {
|
| 176 |
+
"action": 0.015296258614398539,
|
| 177 |
+
"arm_role": 9.897743439069018e-05,
|
| 178 |
+
"belief": 0.10741911331812541,
|
| 179 |
+
"clearance": 0.07931565772742033,
|
| 180 |
+
"corridor": 0.23081608302891254,
|
| 181 |
+
"disturbance": 0.00287542298125724,
|
| 182 |
+
"grasp_affordance": 0.008955261165586611,
|
| 183 |
+
"occluder_contact": 0.21085621416568756,
|
| 184 |
+
"persistence": 1.6830786913633347,
|
| 185 |
+
"phase": 0.4407324629525344,
|
| 186 |
+
"planner_ranking": 0.053573422211532794,
|
| 187 |
+
"planner_risk": 0.011835894741428396,
|
| 188 |
+
"planner_success": 0.5389373525977135,
|
| 189 |
+
"proposal_diversity": 0.0,
|
| 190 |
+
"proposal_ranking": 1.1375357458988826,
|
| 191 |
+
"proposal_reconstruction": 0.05875217309221625,
|
| 192 |
+
"proposal_success": 0.669308491051197,
|
| 193 |
+
"reocclusion": 0.26737124752253294,
|
| 194 |
+
"role_swap_consistency": 0.00044258072254403186,
|
| 195 |
+
"support_mode": 0.0058784369854644565,
|
| 196 |
+
"support_stability": 0.13682511821389198,
|
| 197 |
+
"total": 1.1672432621320088,
|
| 198 |
+
"uncertainty": 0.007140855586233859,
|
| 199 |
+
"visibility": 0.094703309237957,
|
| 200 |
+
"world_model": 2.072191367546717
|
| 201 |
+
},
|
| 202 |
+
"val": {
|
| 203 |
+
"action": 0.016218292733861342,
|
| 204 |
+
"arm_role": 0.00022501617463098632,
|
| 205 |
+
"belief": 0.10660513407654232,
|
| 206 |
+
"clearance": 0.07916852169566685,
|
| 207 |
+
"corridor": 0.23598399923907387,
|
| 208 |
+
"disturbance": 0.0013176489026389187,
|
| 209 |
+
"grasp_affordance": 0.009249631315469742,
|
| 210 |
+
"occluder_contact": 0.2084801279836231,
|
| 211 |
+
"persistence": 1.9978744321399264,
|
| 212 |
+
"phase": 0.46462951434983146,
|
| 213 |
+
"planner_ranking": 0.04140180618398719,
|
| 214 |
+
"planner_risk": 0.011076963868820004,
|
| 215 |
+
"planner_success": 0.5154120292928484,
|
| 216 |
+
"proposal_diversity": 0.0,
|
| 217 |
+
"proposal_ranking": 1.1469912661446466,
|
| 218 |
+
"proposal_reconstruction": 0.05962582967347569,
|
| 219 |
+
"proposal_success": 0.6495795779758029,
|
| 220 |
+
"reocclusion": 0.2503652158710692,
|
| 221 |
+
"role_swap_consistency": 0.0,
|
| 222 |
+
"support_mode": 0.0004595977985041423,
|
| 223 |
+
"support_stability": 0.14600716531276703,
|
| 224 |
+
"total": 1.2128634585274591,
|
| 225 |
+
"uncertainty": 0.007759603775209851,
|
| 226 |
+
"visibility": 0.09225249456034766,
|
| 227 |
+
"world_model": 2.1404969029956393
|
| 228 |
+
}
|
| 229 |
+
}
|
| 230 |
+
]
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/summary.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"experiment_name": "proxy_interaction_r3d_stage1_dummy_seed13",
|
| 3 |
+
"device": "cuda",
|
| 4 |
+
"best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/checkpoint_best.pt",
|
| 5 |
+
"final_train_total": 1.1672432621320088,
|
| 6 |
+
"final_val_total": 1.2128634585274591,
|
| 7 |
+
"train_time_sec": 18.091050624847412,
|
| 8 |
+
"peak_gpu_memory_mb": 631.1953125,
|
| 9 |
+
"num_train_samples": 380,
|
| 10 |
+
"num_val_samples": 131,
|
| 11 |
+
"planner_mode": "trainable",
|
| 12 |
+
"frozen_modules": [],
|
| 13 |
+
"init_info": null
|
| 14 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"full": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4166666666666667,
|
| 5 |
+
"bag_proxy": 0.625,
|
| 6 |
+
"cloth_proxy": 0.6666666666666666
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5694444444444445,
|
| 9 |
+
"visibility_integral": 32.801942747500206,
|
| 10 |
+
"corridor_availability": 0.8877548724412918,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 1.4711664057066363,
|
| 13 |
+
"disturbance_cost": 0.37882790300581193
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## full
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.569
|
| 6 |
+
- visibility_integral: 32.802
|
| 7 |
+
- corridor_availability: 0.888
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 1.471
|
| 10 |
+
- disturbance_cost: 0.379
|
| 11 |
+
- foliage_proxy_success: 0.417
|
| 12 |
+
- bag_proxy_success: 0.625
|
| 13 |
+
- cloth_proxy_success: 0.667
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_planner/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"full": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4166666666666667,
|
| 5 |
+
"bag_proxy": 0.5833333333333334,
|
| 6 |
+
"cloth_proxy": 0.625
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5416666666666666,
|
| 9 |
+
"visibility_integral": 34.428366212381256,
|
| 10 |
+
"corridor_availability": 0.8909231291876899,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 1.4917179537341767,
|
| 13 |
+
"disturbance_cost": 0.39409097459995085
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_planner/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## full
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.542
|
| 6 |
+
- visibility_integral: 34.428
|
| 7 |
+
- corridor_availability: 0.891
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 1.492
|
| 10 |
+
- disturbance_cost: 0.394
|
| 11 |
+
- foliage_proxy_success: 0.417
|
| 12 |
+
- bag_proxy_success: 0.583
|
| 13 |
+
- cloth_proxy_success: 0.625
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_role_symmetry/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"full": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4166666666666667,
|
| 5 |
+
"bag_proxy": 0.625,
|
| 6 |
+
"cloth_proxy": 0.6666666666666666
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5694444444444445,
|
| 9 |
+
"visibility_integral": 33.27109728753567,
|
| 10 |
+
"corridor_availability": 0.8943836614489555,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 1.488106187582016,
|
| 13 |
+
"disturbance_cost": 0.3667886131960485
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_role_symmetry/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## full
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.569
|
| 6 |
+
- visibility_integral: 33.271
|
| 7 |
+
- corridor_availability: 0.894
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 1.488
|
| 10 |
+
- disturbance_cost: 0.367
|
| 11 |
+
- foliage_proxy_success: 0.417
|
| 12 |
+
- bag_proxy_success: 0.625
|
| 13 |
+
- cloth_proxy_success: 0.667
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/config_resolved.yaml
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_stage1_dummy_seed14
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 14
|
| 5 |
+
data:
|
| 6 |
+
proxies:
|
| 7 |
+
- foliage_proxy
|
| 8 |
+
- bag_proxy
|
| 9 |
+
- cloth_proxy
|
| 10 |
+
resolution: 96
|
| 11 |
+
dataset_version: reveal_proxy_v6_rgbd_elastic_state
|
| 12 |
+
train_episodes_per_proxy: 48
|
| 13 |
+
val_episodes_per_proxy: 16
|
| 14 |
+
train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage1_dummy_seed14.pt
|
| 15 |
+
val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage1_dummy_seed14.pt
|
| 16 |
+
rebuild_dataset: false
|
| 17 |
+
chunk_horizon: 8
|
| 18 |
+
rollout_horizon: 5
|
| 19 |
+
history_steps: 6
|
| 20 |
+
planner_candidates: 8
|
| 21 |
+
seed: 14
|
| 22 |
+
optim:
|
| 23 |
+
epochs: 4
|
| 24 |
+
batch_size: 16
|
| 25 |
+
num_workers: 4
|
| 26 |
+
lr: 0.001
|
| 27 |
+
weight_decay: 0.0001
|
| 28 |
+
trainer:
|
| 29 |
+
policy_type: elastic_reveal
|
| 30 |
+
use_bf16: false
|
| 31 |
+
grad_clip_norm: 1.0
|
| 32 |
+
freeze_backbone: true
|
| 33 |
+
gradient_checkpointing: false
|
| 34 |
+
plan_during_train: true
|
| 35 |
+
plan_during_eval: true
|
| 36 |
+
support_mode_conditioning: true
|
| 37 |
+
planner_mode: trainable
|
| 38 |
+
use_depth: false
|
| 39 |
+
use_world_model: true
|
| 40 |
+
use_role_tokens: true
|
| 41 |
+
compute_equivariance_probe: true
|
| 42 |
+
policy:
|
| 43 |
+
backbone:
|
| 44 |
+
model_name: openai/clip-vit-base-patch32
|
| 45 |
+
hidden_dim: 192
|
| 46 |
+
max_text_tokens: 32
|
| 47 |
+
freeze_backbone: true
|
| 48 |
+
gradient_checkpointing: false
|
| 49 |
+
use_dummy_backbone: true
|
| 50 |
+
fusion:
|
| 51 |
+
hidden_dim: 192
|
| 52 |
+
num_cameras: 3
|
| 53 |
+
num_layers: 2
|
| 54 |
+
num_heads: 4
|
| 55 |
+
ff_dim: 384
|
| 56 |
+
dropout: 0.1
|
| 57 |
+
proprio_dim: 32
|
| 58 |
+
proprio_tokens: 1
|
| 59 |
+
memory:
|
| 60 |
+
hidden_dim: 192
|
| 61 |
+
action_dim: 14
|
| 62 |
+
history_steps: 6
|
| 63 |
+
scene_history_steps: 3
|
| 64 |
+
belief_history_steps: 8
|
| 65 |
+
num_layers: 2
|
| 66 |
+
dropout: 0.1
|
| 67 |
+
memory_bank_size: 4
|
| 68 |
+
scene_bank_size: 2
|
| 69 |
+
belief_bank_size: 2
|
| 70 |
+
num_heads: 4
|
| 71 |
+
max_history_steps: 8
|
| 72 |
+
decoder:
|
| 73 |
+
hidden_dim: 192
|
| 74 |
+
num_heads: 4
|
| 75 |
+
num_layers: 2
|
| 76 |
+
ff_dim: 384
|
| 77 |
+
dropout: 0.1
|
| 78 |
+
chunk_size: 8
|
| 79 |
+
action_dim: 14
|
| 80 |
+
arm_action_dim: 7
|
| 81 |
+
num_candidates: 8
|
| 82 |
+
num_phases: 5
|
| 83 |
+
num_arm_roles: 4
|
| 84 |
+
num_proposal_modes: 6
|
| 85 |
+
planner_top_k: 4
|
| 86 |
+
reveal_head:
|
| 87 |
+
hidden_dim: 192
|
| 88 |
+
num_support_modes: 3
|
| 89 |
+
num_approach_templates: 32
|
| 90 |
+
rollout_horizon: 5
|
| 91 |
+
belief_map_size: 32
|
| 92 |
+
field_size: 16
|
| 93 |
+
num_heads: 4
|
| 94 |
+
predict_belief_map: true
|
| 95 |
+
num_phases: 5
|
| 96 |
+
num_arm_roles: 4
|
| 97 |
+
num_interaction_tokens: 8
|
| 98 |
+
world_model:
|
| 99 |
+
hidden_dim: 192
|
| 100 |
+
action_dim: 14
|
| 101 |
+
num_support_modes: 3
|
| 102 |
+
num_approach_templates: 32
|
| 103 |
+
rollout_horizon: 5
|
| 104 |
+
field_size: 16
|
| 105 |
+
num_heads: 4
|
| 106 |
+
num_phases: 5
|
| 107 |
+
num_arm_roles: 4
|
| 108 |
+
num_interaction_tokens: 8
|
| 109 |
+
belief_map_size: 32
|
| 110 |
+
predict_belief_map: true
|
| 111 |
+
scene_bank_size: 2
|
| 112 |
+
belief_bank_size: 2
|
| 113 |
+
planner:
|
| 114 |
+
hidden_dim: 192
|
| 115 |
+
num_candidates: 8
|
| 116 |
+
action_dim: 14
|
| 117 |
+
num_support_modes: 3
|
| 118 |
+
utility_margin: 0.1
|
| 119 |
+
num_heads: 4
|
| 120 |
+
num_layers: 2
|
| 121 |
+
num_phases: 5
|
| 122 |
+
num_arm_roles: 4
|
| 123 |
+
top_k: 4
|
| 124 |
+
loss_weights:
|
| 125 |
+
action: 1.0
|
| 126 |
+
phase: 0.15
|
| 127 |
+
arm_role: 0.2
|
| 128 |
+
support_mode: 0.15
|
| 129 |
+
corridor: 0.2
|
| 130 |
+
persistence: 0.1
|
| 131 |
+
disturbance: 0.1
|
| 132 |
+
world_model: 0.25
|
| 133 |
+
belief: 0.05
|
| 134 |
+
visibility: 0.05
|
| 135 |
+
clearance: 0.05
|
| 136 |
+
support_stability: 0.05
|
| 137 |
+
reocclusion: 0.05
|
| 138 |
+
occluder_contact: 0.05
|
| 139 |
+
grasp_affordance: 0.05
|
| 140 |
+
planner_success: 0.2
|
| 141 |
+
planner_risk: 0.1
|
| 142 |
+
planner_ranking: 0.1
|
| 143 |
+
proposal_reconstruction: 0.2
|
| 144 |
+
proposal_success: 0.1
|
| 145 |
+
proposal_ranking: 0.1
|
| 146 |
+
proposal_diversity: 0.05
|
| 147 |
+
role_swap_consistency: 0.05
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/diagnostics_full/proxy_diagnostics.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"planner_top1_accuracy": 0.2846153846153846,
|
| 3 |
+
"planner_regret": 0.014314642176032066,
|
| 4 |
+
"planner_score_utility_spearman": 0.2153846174478531,
|
| 5 |
+
"risk_calibration_mse": 0.010775926522910595,
|
| 6 |
+
"role_collapse_rate": 0.0,
|
| 7 |
+
"proposal_diversity": 0.02589959278702736,
|
| 8 |
+
"left_right_equivariance_error": 0.008901518605211201,
|
| 9 |
+
"belief_calibration_brier": 0.005614265333861113,
|
| 10 |
+
"reocclusion_calibration_brier": 0.28406235575675964,
|
| 11 |
+
"support_stability_mae": 0.025872904807329178,
|
| 12 |
+
"clearance_auc": 0.5220335124994485,
|
| 13 |
+
"memory_write_rate": 0.0,
|
| 14 |
+
"memory_saturation": 0.7309081554412842,
|
| 15 |
+
"num_samples": 130
|
| 16 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/metrics.json
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.033738364155093827,
|
| 6 |
+
"arm_role": 0.2658534389071671,
|
| 7 |
+
"belief": 0.1663714082290729,
|
| 8 |
+
"clearance": 0.1995344152674079,
|
| 9 |
+
"corridor": 0.2937144724031289,
|
| 10 |
+
"disturbance": 0.01641949706633265,
|
| 11 |
+
"grasp_affordance": 0.07253360034277041,
|
| 12 |
+
"occluder_contact": 0.262634892637531,
|
| 13 |
+
"persistence": 5.348720759153366,
|
| 14 |
+
"phase": 0.9128680676221848,
|
| 15 |
+
"planner_ranking": 0.7161665211121241,
|
| 16 |
+
"planner_risk": 0.03542382351588458,
|
| 17 |
+
"planner_success": 0.6313644871115685,
|
| 18 |
+
"proposal_diversity": 0.0,
|
| 19 |
+
"proposal_ranking": 1.328845535715421,
|
| 20 |
+
"proposal_reconstruction": 0.07508338304857413,
|
| 21 |
+
"proposal_success": 0.6797524491945902,
|
| 22 |
+
"reocclusion": 0.7106639867027601,
|
| 23 |
+
"role_swap_consistency": 0.0008167610091428893,
|
| 24 |
+
"support_mode": 0.7801499888300896,
|
| 25 |
+
"support_stability": 0.21256058973570666,
|
| 26 |
+
"total": 2.46435983479023,
|
| 27 |
+
"uncertainty": 0.17734388983808458,
|
| 28 |
+
"visibility": 0.16707653552293777,
|
| 29 |
+
"world_model": 4.078198651472728
|
| 30 |
+
},
|
| 31 |
+
"val": {
|
| 32 |
+
"action": 0.023770140690935984,
|
| 33 |
+
"arm_role": 0.0004891494075612476,
|
| 34 |
+
"belief": 0.11787863655222787,
|
| 35 |
+
"clearance": 0.08211326102415721,
|
| 36 |
+
"corridor": 0.2646504044532776,
|
| 37 |
+
"disturbance": 0.0077974022262626225,
|
| 38 |
+
"grasp_affordance": 0.010528300681875812,
|
| 39 |
+
"occluder_contact": 0.23685429162449306,
|
| 40 |
+
"persistence": 4.643319712744819,
|
| 41 |
+
"phase": 0.6877350012461344,
|
| 42 |
+
"planner_ranking": 0.5576971173286438,
|
| 43 |
+
"planner_risk": 0.012001174760775434,
|
| 44 |
+
"planner_success": 0.6474077436659071,
|
| 45 |
+
"proposal_diversity": 0.0,
|
| 46 |
+
"proposal_ranking": 1.2473273674647014,
|
| 47 |
+
"proposal_reconstruction": 0.06659724977281359,
|
| 48 |
+
"proposal_success": 0.6868854032622443,
|
| 49 |
+
"reocclusion": 0.6894112494256761,
|
| 50 |
+
"role_swap_consistency": 0.0,
|
| 51 |
+
"support_mode": 0.7945182191001045,
|
| 52 |
+
"support_stability": 0.13977908922566307,
|
| 53 |
+
"total": 1.9791885084576077,
|
| 54 |
+
"uncertainty": 0.016744557561145887,
|
| 55 |
+
"visibility": 0.09745695524745518,
|
| 56 |
+
"world_model": 3.0115205181969538
|
| 57 |
+
}
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train": {
|
| 62 |
+
"action": 0.02093995890269677,
|
| 63 |
+
"arm_role": 0.00021873527142209545,
|
| 64 |
+
"belief": 0.1156839697311322,
|
| 65 |
+
"clearance": 0.09139195084571838,
|
| 66 |
+
"corridor": 0.2529828678816557,
|
| 67 |
+
"disturbance": 0.003422619032789953,
|
| 68 |
+
"grasp_affordance": 0.017661277670413256,
|
| 69 |
+
"occluder_contact": 0.22792026090125242,
|
| 70 |
+
"persistence": 4.702208956082662,
|
| 71 |
+
"phase": 0.5312556164960066,
|
| 72 |
+
"planner_ranking": 0.20636002533137798,
|
| 73 |
+
"planner_risk": 0.015822513572250802,
|
| 74 |
+
"planner_success": 0.5910777151584625,
|
| 75 |
+
"proposal_diversity": 0.0,
|
| 76 |
+
"proposal_ranking": 1.1696062982082367,
|
| 77 |
+
"proposal_reconstruction": 0.06334876082837582,
|
| 78 |
+
"proposal_success": 0.6708702544371287,
|
| 79 |
+
"reocclusion": 0.5039266211291155,
|
| 80 |
+
"role_swap_consistency": 0.0005020403975019386,
|
| 81 |
+
"support_mode": 0.3201311229883383,
|
| 82 |
+
"support_stability": 0.13968352818240723,
|
| 83 |
+
"total": 1.6841449290513992,
|
| 84 |
+
"uncertainty": 0.026018289965577424,
|
| 85 |
+
"visibility": 0.11011519034703572,
|
| 86 |
+
"world_model": 2.466151461005211
|
| 87 |
+
},
|
| 88 |
+
"val": {
|
| 89 |
+
"action": 0.020535202903880015,
|
| 90 |
+
"arm_role": 0.00012925987215971368,
|
| 91 |
+
"belief": 0.10588792545927896,
|
| 92 |
+
"clearance": 0.08000239895449744,
|
| 93 |
+
"corridor": 0.23227471278773415,
|
| 94 |
+
"disturbance": 0.0022439691221936503,
|
| 95 |
+
"grasp_affordance": 0.011653332453635003,
|
| 96 |
+
"occluder_contact": 0.21834516359700096,
|
| 97 |
+
"persistence": 4.46406364440918,
|
| 98 |
+
"phase": 0.4118766354189979,
|
| 99 |
+
"planner_ranking": 0.0892416491276688,
|
| 100 |
+
"planner_risk": 0.0152344209038549,
|
| 101 |
+
"planner_success": 0.6057713859611087,
|
| 102 |
+
"proposal_diversity": 0.0,
|
| 103 |
+
"proposal_ranking": 1.133669826719496,
|
| 104 |
+
"proposal_reconstruction": 0.06398758581942982,
|
| 105 |
+
"proposal_success": 0.6783458656734891,
|
| 106 |
+
"reocclusion": 0.2840655545393626,
|
| 107 |
+
"role_swap_consistency": 0.0,
|
| 108 |
+
"support_mode": 0.0015922162112676436,
|
| 109 |
+
"support_stability": 0.13890525698661804,
|
| 110 |
+
"total": 1.584020005332099,
|
| 111 |
+
"uncertainty": 0.014379701991048124,
|
| 112 |
+
"visibility": 0.09630187600851059,
|
| 113 |
+
"world_model": 2.5434003671010337
|
| 114 |
+
}
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 2,
|
| 118 |
+
"train": {
|
| 119 |
+
"action": 0.017165315182258684,
|
| 120 |
+
"arm_role": 0.00014243966719125942,
|
| 121 |
+
"belief": 0.1267746559654673,
|
| 122 |
+
"clearance": 0.09291451362272103,
|
| 123 |
+
"corridor": 0.2539026445398728,
|
| 124 |
+
"disturbance": 0.0040997157484525815,
|
| 125 |
+
"grasp_affordance": 0.016216314087311428,
|
| 126 |
+
"occluder_contact": 0.2287510900447766,
|
| 127 |
+
"persistence": 2.7297142073512077,
|
| 128 |
+
"phase": 0.4553527260820071,
|
| 129 |
+
"planner_ranking": 0.0675589762783299,
|
| 130 |
+
"planner_risk": 0.012244323831206808,
|
| 131 |
+
"planner_success": 0.5227356925606728,
|
| 132 |
+
"proposal_diversity": 0.0,
|
| 133 |
+
"proposal_ranking": 1.1367994795242946,
|
| 134 |
+
"proposal_reconstruction": 0.06006583757698536,
|
| 135 |
+
"proposal_success": 0.6718559389313062,
|
| 136 |
+
"reocclusion": 0.28394716791808605,
|
| 137 |
+
"role_swap_consistency": 0.000532965175807476,
|
| 138 |
+
"support_mode": 0.0007756326898136953,
|
| 139 |
+
"support_stability": 0.14084124999741712,
|
| 140 |
+
"total": 1.2956190605958302,
|
| 141 |
+
"uncertainty": 0.011363255020114593,
|
| 142 |
+
"visibility": 0.11323032714426517,
|
| 143 |
+
"world_model": 2.120655362804731
|
| 144 |
+
},
|
| 145 |
+
"val": {
|
| 146 |
+
"action": 0.016470486712124612,
|
| 147 |
+
"arm_role": 0.00015339441274085807,
|
| 148 |
+
"belief": 0.15912896229161155,
|
| 149 |
+
"clearance": 0.07826702462302314,
|
| 150 |
+
"corridor": 0.21473425957891676,
|
| 151 |
+
"disturbance": 0.0018082650106710692,
|
| 152 |
+
"grasp_affordance": 0.008080463701238235,
|
| 153 |
+
"occluder_contact": 0.22728429403569964,
|
| 154 |
+
"persistence": 1.846471561325921,
|
| 155 |
+
"phase": 0.4164143088791106,
|
| 156 |
+
"planner_ranking": 0.05541756912134588,
|
| 157 |
+
"planner_risk": 0.011288604181673791,
|
| 158 |
+
"planner_success": 0.5237696303261651,
|
| 159 |
+
"proposal_diversity": 0.0,
|
| 160 |
+
"proposal_ranking": 1.1311746835708618,
|
| 161 |
+
"proposal_reconstruction": 0.06064582823051347,
|
| 162 |
+
"proposal_success": 0.6669412983788384,
|
| 163 |
+
"reocclusion": 0.27248211950063705,
|
| 164 |
+
"role_swap_consistency": 0.0,
|
| 165 |
+
"support_mode": 0.00040661103816496,
|
| 166 |
+
"support_stability": 0.13817799753612942,
|
| 167 |
+
"total": 1.241025275654263,
|
| 168 |
+
"uncertainty": 0.003020187374204397,
|
| 169 |
+
"visibility": 0.11647009683979882,
|
| 170 |
+
"world_model": 2.323344442579481
|
| 171 |
+
}
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 3,
|
| 175 |
+
"train": {
|
| 176 |
+
"action": 0.015070427674800158,
|
| 177 |
+
"arm_role": 0.0002641689807205694,
|
| 178 |
+
"belief": 0.141230215318501,
|
| 179 |
+
"clearance": 0.07984113336230318,
|
| 180 |
+
"corridor": 0.225482989102602,
|
| 181 |
+
"disturbance": 0.0017908170169296984,
|
| 182 |
+
"grasp_affordance": 0.008550037746317685,
|
| 183 |
+
"occluder_contact": 0.21477928136785826,
|
| 184 |
+
"persistence": 1.6129546587665875,
|
| 185 |
+
"phase": 0.42590194568037987,
|
| 186 |
+
"planner_ranking": 0.04456973075866699,
|
| 187 |
+
"planner_risk": 0.010397601523436606,
|
| 188 |
+
"planner_success": 0.49412518242994946,
|
| 189 |
+
"proposal_diversity": 0.0,
|
| 190 |
+
"proposal_ranking": 1.1504750202099483,
|
| 191 |
+
"proposal_reconstruction": 0.058567725432415806,
|
| 192 |
+
"proposal_success": 0.6462936575214068,
|
| 193 |
+
"reocclusion": 0.2506879176944494,
|
| 194 |
+
"role_swap_consistency": 0.000550856914439161,
|
| 195 |
+
"support_mode": 0.0003065853112881693,
|
| 196 |
+
"support_stability": 0.1366732595488429,
|
| 197 |
+
"total": 1.134415107468764,
|
| 198 |
+
"uncertainty": 0.0035936666245106608,
|
| 199 |
+
"visibility": 0.10351777387162049,
|
| 200 |
+
"world_model": 2.024999057253202
|
| 201 |
+
},
|
| 202 |
+
"val": {
|
| 203 |
+
"action": 0.016186242405739095,
|
| 204 |
+
"arm_role": 0.0002410423346898622,
|
| 205 |
+
"belief": 0.12203978498776753,
|
| 206 |
+
"clearance": 0.07702170064051946,
|
| 207 |
+
"corridor": 0.21113747523890602,
|
| 208 |
+
"disturbance": 0.0014993647216922706,
|
| 209 |
+
"grasp_affordance": 0.008119617278377214,
|
| 210 |
+
"occluder_contact": 0.21474246515168083,
|
| 211 |
+
"persistence": 1.9725701610247295,
|
| 212 |
+
"phase": 0.4842751953336928,
|
| 213 |
+
"planner_ranking": 0.04342265882425838,
|
| 214 |
+
"planner_risk": 0.01107009764139851,
|
| 215 |
+
"planner_success": 0.5070097777578566,
|
| 216 |
+
"proposal_diversity": 0.0,
|
| 217 |
+
"proposal_ranking": 1.1282474862204657,
|
| 218 |
+
"proposal_reconstruction": 0.05997827731900745,
|
| 219 |
+
"proposal_success": 0.6469291316138374,
|
| 220 |
+
"reocclusion": 0.2716698878341251,
|
| 221 |
+
"role_swap_consistency": 0.0,
|
| 222 |
+
"support_mode": 0.00020467836778455725,
|
| 223 |
+
"support_stability": 0.13836157073577246,
|
| 224 |
+
"total": 1.2091523673799303,
|
| 225 |
+
"uncertainty": 0.0025335378272251952,
|
| 226 |
+
"visibility": 0.09879730641841888,
|
| 227 |
+
"world_model": 2.1507359743118286
|
| 228 |
+
}
|
| 229 |
+
}
|
| 230 |
+
]
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/summary.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"experiment_name": "proxy_interaction_r3d_stage1_dummy_seed14",
|
| 3 |
+
"device": "cuda",
|
| 4 |
+
"best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/checkpoint_best.pt",
|
| 5 |
+
"final_train_total": 1.134415107468764,
|
| 6 |
+
"final_val_total": 1.2091523673799303,
|
| 7 |
+
"train_time_sec": 23.220722675323486,
|
| 8 |
+
"peak_gpu_memory_mb": 626.4716796875,
|
| 9 |
+
"num_train_samples": 381,
|
| 10 |
+
"num_val_samples": 130,
|
| 11 |
+
"planner_mode": "trainable",
|
| 12 |
+
"frozen_modules": [],
|
| 13 |
+
"init_info": null
|
| 14 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"full": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4583333333333333,
|
| 5 |
+
"bag_proxy": 0.625,
|
| 6 |
+
"cloth_proxy": 0.7083333333333334
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5972222222222222,
|
| 9 |
+
"visibility_integral": 29.697570121950573,
|
| 10 |
+
"corridor_availability": 0.8675610861844487,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 2.20430763148842,
|
| 13 |
+
"disturbance_cost": 0.36563710583787823
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## full
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.597
|
| 6 |
+
- visibility_integral: 29.698
|
| 7 |
+
- corridor_availability: 0.868
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 2.204
|
| 10 |
+
- disturbance_cost: 0.366
|
| 11 |
+
- foliage_proxy_success: 0.458
|
| 12 |
+
- bag_proxy_success: 0.625
|
| 13 |
+
- cloth_proxy_success: 0.708
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_planner/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"full": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4583333333333333,
|
| 5 |
+
"bag_proxy": 0.625,
|
| 6 |
+
"cloth_proxy": 0.7083333333333334
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5972222222222222,
|
| 9 |
+
"visibility_integral": 29.697570121950573,
|
| 10 |
+
"corridor_availability": 0.8675610861844487,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 2.20430763148842,
|
| 13 |
+
"disturbance_cost": 0.36563710583787823
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_planner/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## full
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.597
|
| 6 |
+
- visibility_integral: 29.698
|
| 7 |
+
- corridor_availability: 0.868
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 2.204
|
| 10 |
+
- disturbance_cost: 0.366
|
| 11 |
+
- foliage_proxy_success: 0.458
|
| 12 |
+
- bag_proxy_success: 0.625
|
| 13 |
+
- cloth_proxy_success: 0.708
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_role_symmetry/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"full": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.5,
|
| 5 |
+
"bag_proxy": 0.625,
|
| 6 |
+
"cloth_proxy": 0.7083333333333334
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.6111111111111112,
|
| 9 |
+
"visibility_integral": 28.954636810554398,
|
| 10 |
+
"corridor_availability": 0.8660841253068712,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 2.10539705814184,
|
| 13 |
+
"disturbance_cost": 0.35598844579524463
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_role_symmetry/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## full
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.611
|
| 6 |
+
- visibility_integral: 28.955
|
| 7 |
+
- corridor_availability: 0.866
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 2.105
|
| 10 |
+
- disturbance_cost: 0.356
|
| 11 |
+
- foliage_proxy_success: 0.500
|
| 12 |
+
- bag_proxy_success: 0.625
|
| 13 |
+
- cloth_proxy_success: 0.708
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/config_resolved.yaml
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_r3d_stage1_dummy_seed15
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 15
|
| 5 |
+
data:
|
| 6 |
+
proxies:
|
| 7 |
+
- foliage_proxy
|
| 8 |
+
- bag_proxy
|
| 9 |
+
- cloth_proxy
|
| 10 |
+
resolution: 96
|
| 11 |
+
dataset_version: reveal_proxy_v6_rgbd_elastic_state
|
| 12 |
+
train_episodes_per_proxy: 48
|
| 13 |
+
val_episodes_per_proxy: 16
|
| 14 |
+
train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage1_dummy_seed15.pt
|
| 15 |
+
val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage1_dummy_seed15.pt
|
| 16 |
+
rebuild_dataset: false
|
| 17 |
+
chunk_horizon: 8
|
| 18 |
+
rollout_horizon: 5
|
| 19 |
+
history_steps: 6
|
| 20 |
+
planner_candidates: 8
|
| 21 |
+
seed: 15
|
| 22 |
+
optim:
|
| 23 |
+
epochs: 4
|
| 24 |
+
batch_size: 16
|
| 25 |
+
num_workers: 4
|
| 26 |
+
lr: 0.001
|
| 27 |
+
weight_decay: 0.0001
|
| 28 |
+
trainer:
|
| 29 |
+
policy_type: elastic_reveal
|
| 30 |
+
use_bf16: false
|
| 31 |
+
grad_clip_norm: 1.0
|
| 32 |
+
freeze_backbone: true
|
| 33 |
+
gradient_checkpointing: false
|
| 34 |
+
plan_during_train: true
|
| 35 |
+
plan_during_eval: true
|
| 36 |
+
support_mode_conditioning: true
|
| 37 |
+
planner_mode: trainable
|
| 38 |
+
use_depth: false
|
| 39 |
+
use_world_model: true
|
| 40 |
+
use_role_tokens: true
|
| 41 |
+
compute_equivariance_probe: true
|
| 42 |
+
policy:
|
| 43 |
+
backbone:
|
| 44 |
+
model_name: openai/clip-vit-base-patch32
|
| 45 |
+
hidden_dim: 192
|
| 46 |
+
max_text_tokens: 32
|
| 47 |
+
freeze_backbone: true
|
| 48 |
+
gradient_checkpointing: false
|
| 49 |
+
use_dummy_backbone: true
|
| 50 |
+
fusion:
|
| 51 |
+
hidden_dim: 192
|
| 52 |
+
num_cameras: 3
|
| 53 |
+
num_layers: 2
|
| 54 |
+
num_heads: 4
|
| 55 |
+
ff_dim: 384
|
| 56 |
+
dropout: 0.1
|
| 57 |
+
proprio_dim: 32
|
| 58 |
+
proprio_tokens: 1
|
| 59 |
+
memory:
|
| 60 |
+
hidden_dim: 192
|
| 61 |
+
action_dim: 14
|
| 62 |
+
history_steps: 6
|
| 63 |
+
scene_history_steps: 3
|
| 64 |
+
belief_history_steps: 8
|
| 65 |
+
num_layers: 2
|
| 66 |
+
dropout: 0.1
|
| 67 |
+
memory_bank_size: 4
|
| 68 |
+
scene_bank_size: 2
|
| 69 |
+
belief_bank_size: 2
|
| 70 |
+
num_heads: 4
|
| 71 |
+
max_history_steps: 8
|
| 72 |
+
decoder:
|
| 73 |
+
hidden_dim: 192
|
| 74 |
+
num_heads: 4
|
| 75 |
+
num_layers: 2
|
| 76 |
+
ff_dim: 384
|
| 77 |
+
dropout: 0.1
|
| 78 |
+
chunk_size: 8
|
| 79 |
+
action_dim: 14
|
| 80 |
+
arm_action_dim: 7
|
| 81 |
+
num_candidates: 8
|
| 82 |
+
num_phases: 5
|
| 83 |
+
num_arm_roles: 4
|
| 84 |
+
num_proposal_modes: 6
|
| 85 |
+
planner_top_k: 4
|
| 86 |
+
reveal_head:
|
| 87 |
+
hidden_dim: 192
|
| 88 |
+
num_support_modes: 3
|
| 89 |
+
num_approach_templates: 32
|
| 90 |
+
rollout_horizon: 5
|
| 91 |
+
belief_map_size: 32
|
| 92 |
+
field_size: 16
|
| 93 |
+
num_heads: 4
|
| 94 |
+
predict_belief_map: true
|
| 95 |
+
num_phases: 5
|
| 96 |
+
num_arm_roles: 4
|
| 97 |
+
num_interaction_tokens: 8
|
| 98 |
+
world_model:
|
| 99 |
+
hidden_dim: 192
|
| 100 |
+
action_dim: 14
|
| 101 |
+
num_support_modes: 3
|
| 102 |
+
num_approach_templates: 32
|
| 103 |
+
rollout_horizon: 5
|
| 104 |
+
field_size: 16
|
| 105 |
+
num_heads: 4
|
| 106 |
+
num_phases: 5
|
| 107 |
+
num_arm_roles: 4
|
| 108 |
+
num_interaction_tokens: 8
|
| 109 |
+
belief_map_size: 32
|
| 110 |
+
predict_belief_map: true
|
| 111 |
+
scene_bank_size: 2
|
| 112 |
+
belief_bank_size: 2
|
| 113 |
+
planner:
|
| 114 |
+
hidden_dim: 192
|
| 115 |
+
num_candidates: 8
|
| 116 |
+
action_dim: 14
|
| 117 |
+
num_support_modes: 3
|
| 118 |
+
utility_margin: 0.1
|
| 119 |
+
num_heads: 4
|
| 120 |
+
num_layers: 2
|
| 121 |
+
num_phases: 5
|
| 122 |
+
num_arm_roles: 4
|
| 123 |
+
top_k: 4
|
| 124 |
+
loss_weights:
|
| 125 |
+
action: 1.0
|
| 126 |
+
phase: 0.15
|
| 127 |
+
arm_role: 0.2
|
| 128 |
+
support_mode: 0.15
|
| 129 |
+
corridor: 0.2
|
| 130 |
+
persistence: 0.1
|
| 131 |
+
disturbance: 0.1
|
| 132 |
+
world_model: 0.25
|
| 133 |
+
belief: 0.05
|
| 134 |
+
visibility: 0.05
|
| 135 |
+
clearance: 0.05
|
| 136 |
+
support_stability: 0.05
|
| 137 |
+
reocclusion: 0.05
|
| 138 |
+
occluder_contact: 0.05
|
| 139 |
+
grasp_affordance: 0.05
|
| 140 |
+
planner_success: 0.2
|
| 141 |
+
planner_risk: 0.1
|
| 142 |
+
planner_ranking: 0.1
|
| 143 |
+
proposal_reconstruction: 0.2
|
| 144 |
+
proposal_success: 0.1
|
| 145 |
+
proposal_ranking: 0.1
|
| 146 |
+
proposal_diversity: 0.05
|
| 147 |
+
role_swap_consistency: 0.05
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/diagnostics_full/proxy_diagnostics.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"planner_top1_accuracy": 0.3053435114503817,
|
| 3 |
+
"planner_regret": 0.013406210578978062,
|
| 4 |
+
"planner_score_utility_spearman": 0.2839694619178772,
|
| 5 |
+
"risk_calibration_mse": 0.010891024023294449,
|
| 6 |
+
"role_collapse_rate": 0.0,
|
| 7 |
+
"proposal_diversity": 0.02313310280442238,
|
| 8 |
+
"left_right_equivariance_error": 0.006598936667775407,
|
| 9 |
+
"belief_calibration_brier": 0.00368268764577806,
|
| 10 |
+
"reocclusion_calibration_brier": 0.2288682460784912,
|
| 11 |
+
"support_stability_mae": 0.025202222168445587,
|
| 12 |
+
"clearance_auc": 0.9189163634555108,
|
| 13 |
+
"memory_write_rate": 0.0,
|
| 14 |
+
"memory_saturation": 0.8174758553504944,
|
| 15 |
+
"num_samples": 131
|
| 16 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/metrics.json
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.028008008919035394,
|
| 6 |
+
"arm_role": 0.2316993211661611,
|
| 7 |
+
"belief": 0.21131388066957393,
|
| 8 |
+
"clearance": 0.19917472638189793,
|
| 9 |
+
"corridor": 0.3046618662774563,
|
| 10 |
+
"disturbance": 0.020259966540227953,
|
| 11 |
+
"grasp_affordance": 0.15939014249791703,
|
| 12 |
+
"occluder_contact": 0.3023037730405728,
|
| 13 |
+
"persistence": 5.1030773023764295,
|
| 14 |
+
"phase": 0.7391876379648844,
|
| 15 |
+
"planner_ranking": 0.6672491803765297,
|
| 16 |
+
"planner_risk": 0.035407664448333286,
|
| 17 |
+
"planner_success": 0.6247484882672628,
|
| 18 |
+
"proposal_diversity": 0.0,
|
| 19 |
+
"proposal_ranking": 1.2685468345880508,
|
| 20 |
+
"proposal_reconstruction": 0.07012522220611572,
|
| 21 |
+
"proposal_success": 0.6749546950062116,
|
| 22 |
+
"reocclusion": 0.6581779879828294,
|
| 23 |
+
"role_swap_consistency": 0.0007787000698347887,
|
| 24 |
+
"support_mode": 0.6318444466839234,
|
| 25 |
+
"support_stability": 0.21354713415106139,
|
| 26 |
+
"total": 2.377249076962471,
|
| 27 |
+
"uncertainty": 0.2297215286331872,
|
| 28 |
+
"visibility": 0.20075704219440618,
|
| 29 |
+
"world_model": 4.083281387885411
|
| 30 |
+
},
|
| 31 |
+
"val": {
|
| 32 |
+
"action": 0.023762268117732473,
|
| 33 |
+
"arm_role": 0.00020197388787184737,
|
| 34 |
+
"belief": 0.1366901993751526,
|
| 35 |
+
"clearance": 0.10309203879700767,
|
| 36 |
+
"corridor": 0.26862603922684986,
|
| 37 |
+
"disturbance": 0.0037259276594138807,
|
| 38 |
+
"grasp_affordance": 0.044725324544641704,
|
| 39 |
+
"occluder_contact": 0.2536553243796031,
|
| 40 |
+
"persistence": 4.777863184611003,
|
| 41 |
+
"phase": 0.5066013468636407,
|
| 42 |
+
"planner_ranking": 0.44456031918525696,
|
| 43 |
+
"planner_risk": 0.01433694911085897,
|
| 44 |
+
"planner_success": 0.6283807026015388,
|
| 45 |
+
"proposal_diversity": 0.0,
|
| 46 |
+
"proposal_ranking": 1.1667029857635498,
|
| 47 |
+
"proposal_reconstruction": 0.0664608735177252,
|
| 48 |
+
"proposal_success": 0.6838224861356947,
|
| 49 |
+
"reocclusion": 0.3364369339413113,
|
| 50 |
+
"role_swap_consistency": 0.0,
|
| 51 |
+
"support_mode": 0.06715444227059682,
|
| 52 |
+
"support_stability": 0.14777708219157326,
|
| 53 |
+
"total": 1.8394301467471652,
|
| 54 |
+
"uncertainty": 0.07208604945076837,
|
| 55 |
+
"visibility": 0.12188677820894453,
|
| 56 |
+
"world_model": 3.079341014226278
|
| 57 |
+
}
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"epoch": 1,
|
| 61 |
+
"train": {
|
| 62 |
+
"action": 0.018888041842728853,
|
| 63 |
+
"arm_role": 0.00043030476990679745,
|
| 64 |
+
"belief": 0.11719414374480645,
|
| 65 |
+
"clearance": 0.08535642797748248,
|
| 66 |
+
"corridor": 0.24796467771132788,
|
| 67 |
+
"disturbance": 0.0024048478032151857,
|
| 68 |
+
"grasp_affordance": 0.022171703943361838,
|
| 69 |
+
"occluder_contact": 0.22088239962855974,
|
| 70 |
+
"persistence": 4.555501798788707,
|
| 71 |
+
"phase": 0.43327916599810123,
|
| 72 |
+
"planner_ranking": 0.15463371171305576,
|
| 73 |
+
"planner_risk": 0.01981719226265947,
|
| 74 |
+
"planner_success": 0.5631782834728559,
|
| 75 |
+
"proposal_diversity": 0.0,
|
| 76 |
+
"proposal_ranking": 1.1632012923558552,
|
| 77 |
+
"proposal_reconstruction": 0.0615519261918962,
|
| 78 |
+
"proposal_success": 0.6722564473748207,
|
| 79 |
+
"reocclusion": 0.287830734004577,
|
| 80 |
+
"role_swap_consistency": 0.00048373279059887864,
|
| 81 |
+
"support_mode": 0.008119381836574272,
|
| 82 |
+
"support_stability": 0.13662359025329351,
|
| 83 |
+
"total": 1.567106415828069,
|
| 84 |
+
"uncertainty": 0.03243653344300886,
|
| 85 |
+
"visibility": 0.11203592922538519,
|
| 86 |
+
"world_model": 2.404594744245211
|
| 87 |
+
},
|
| 88 |
+
"val": {
|
| 89 |
+
"action": 0.019907095055613253,
|
| 90 |
+
"arm_role": 0.00038116834993060265,
|
| 91 |
+
"belief": 0.1014507081773546,
|
| 92 |
+
"clearance": 0.07728531956672668,
|
| 93 |
+
"corridor": 0.22947043677171072,
|
| 94 |
+
"disturbance": 0.0014698771928023133,
|
| 95 |
+
"grasp_affordance": 0.02056772096289529,
|
| 96 |
+
"occluder_contact": 0.20453951425022548,
|
| 97 |
+
"persistence": 3.6124378045399985,
|
| 98 |
+
"phase": 0.47070127063327366,
|
| 99 |
+
"planner_ranking": 0.08099263947870997,
|
| 100 |
+
"planner_risk": 0.017360565563042957,
|
| 101 |
+
"planner_success": 0.5593770245711008,
|
| 102 |
+
"proposal_diversity": 0.0,
|
| 103 |
+
"proposal_ranking": 1.11685311794281,
|
| 104 |
+
"proposal_reconstruction": 0.0633203275501728,
|
| 105 |
+
"proposal_success": 0.683642049630483,
|
| 106 |
+
"reocclusion": 0.42518342865837944,
|
| 107 |
+
"role_swap_consistency": 0.0,
|
| 108 |
+
"support_mode": 8.963614042537908e-05,
|
| 109 |
+
"support_stability": 0.1495772964424557,
|
| 110 |
+
"total": 1.5412384668986003,
|
| 111 |
+
"uncertainty": 0.024036270876725514,
|
| 112 |
+
"visibility": 0.10443270951509476,
|
| 113 |
+
"world_model": 2.6981404887305365
|
| 114 |
+
}
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 2,
|
| 118 |
+
"train": {
|
| 119 |
+
"action": 0.01506453799083829,
|
| 120 |
+
"arm_role": 0.0002299571582019174,
|
| 121 |
+
"belief": 0.10169448765615623,
|
| 122 |
+
"clearance": 0.08062320730338494,
|
| 123 |
+
"corridor": 0.23694788571447134,
|
| 124 |
+
"disturbance": 0.002010827219540564,
|
| 125 |
+
"grasp_affordance": 0.012944541425288966,
|
| 126 |
+
"occluder_contact": 0.20663638102511564,
|
| 127 |
+
"persistence": 2.024513818323612,
|
| 128 |
+
"phase": 0.4406547602266073,
|
| 129 |
+
"planner_ranking": 0.052334820929293834,
|
| 130 |
+
"planner_risk": 0.012688904457415143,
|
| 131 |
+
"planner_success": 0.4998842130104701,
|
| 132 |
+
"proposal_diversity": 0.0,
|
| 133 |
+
"proposal_ranking": 1.1411344707012177,
|
| 134 |
+
"proposal_reconstruction": 0.058503514621406794,
|
| 135 |
+
"proposal_success": 0.663138655324777,
|
| 136 |
+
"reocclusion": 0.28770653810352087,
|
| 137 |
+
"role_swap_consistency": 0.0005917157322983257,
|
| 138 |
+
"support_mode": 0.00027886544603461516,
|
| 139 |
+
"support_stability": 0.14369840795795122,
|
| 140 |
+
"total": 1.2098931844035785,
|
| 141 |
+
"uncertainty": 0.009047253523021936,
|
| 142 |
+
"visibility": 0.09652530650297801,
|
| 143 |
+
"world_model": 2.1335272987683616
|
| 144 |
+
},
|
| 145 |
+
"val": {
|
| 146 |
+
"action": 0.0173407852028807,
|
| 147 |
+
"arm_role": 0.00028451886545452807,
|
| 148 |
+
"belief": 0.09623022625843684,
|
| 149 |
+
"clearance": 0.07612819969654083,
|
| 150 |
+
"corridor": 0.22281885809368557,
|
| 151 |
+
"disturbance": 0.001401680282368842,
|
| 152 |
+
"grasp_affordance": 0.00781761777276794,
|
| 153 |
+
"occluder_contact": 0.20622349116537306,
|
| 154 |
+
"persistence": 2.1598196625709534,
|
| 155 |
+
"phase": 0.47410638795958626,
|
| 156 |
+
"planner_ranking": 0.0378283916765617,
|
| 157 |
+
"planner_risk": 0.013348096515983343,
|
| 158 |
+
"planner_success": 0.4943488637606303,
|
| 159 |
+
"proposal_diversity": 0.0,
|
| 160 |
+
"proposal_ranking": 1.1125682062572904,
|
| 161 |
+
"proposal_reconstruction": 0.06057575262255139,
|
| 162 |
+
"proposal_success": 0.6509590811199613,
|
| 163 |
+
"reocclusion": 0.2778696550263299,
|
| 164 |
+
"role_swap_consistency": 0.0,
|
| 165 |
+
"support_mode": 7.348006571798275e-05,
|
| 166 |
+
"support_stability": 0.14099042697085273,
|
| 167 |
+
"total": 1.2928278247515361,
|
| 168 |
+
"uncertainty": 0.0023198039270937443,
|
| 169 |
+
"visibility": 0.08993011878596412,
|
| 170 |
+
"world_model": 2.425517029232449
|
| 171 |
+
}
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 3,
|
| 175 |
+
"train": {
|
| 176 |
+
"action": 0.015032132350218793,
|
| 177 |
+
"arm_role": 0.00015960596041016592,
|
| 178 |
+
"belief": 0.10330141056329012,
|
| 179 |
+
"clearance": 0.0756644958940645,
|
| 180 |
+
"corridor": 0.22099452962478003,
|
| 181 |
+
"disturbance": 0.0017974149668589234,
|
| 182 |
+
"grasp_affordance": 0.008848114540645232,
|
| 183 |
+
"occluder_contact": 0.20204609570403895,
|
| 184 |
+
"persistence": 1.6058371538917224,
|
| 185 |
+
"phase": 0.42861080542206764,
|
| 186 |
+
"planner_ranking": 0.040083787171170115,
|
| 187 |
+
"planner_risk": 0.010861996522483727,
|
| 188 |
+
"planner_success": 0.48133989547689754,
|
| 189 |
+
"proposal_diversity": 0.0,
|
| 190 |
+
"proposal_ranking": 1.1467161824305852,
|
| 191 |
+
"proposal_reconstruction": 0.058588774874806404,
|
| 192 |
+
"proposal_success": 0.6429290076096853,
|
| 193 |
+
"reocclusion": 0.24268781704207262,
|
| 194 |
+
"role_swap_consistency": 0.00047596763154918637,
|
| 195 |
+
"support_mode": 2.783346417345456e-05,
|
| 196 |
+
"support_stability": 0.1325785775358478,
|
| 197 |
+
"total": 1.1217727214097977,
|
| 198 |
+
"uncertainty": 0.003058687725570053,
|
| 199 |
+
"visibility": 0.09524129331111908,
|
| 200 |
+
"world_model": 2.0093316386143365
|
| 201 |
+
},
|
| 202 |
+
"val": {
|
| 203 |
+
"action": 0.016727436126934156,
|
| 204 |
+
"arm_role": 0.0002483524456490866,
|
| 205 |
+
"belief": 0.09281252986854976,
|
| 206 |
+
"clearance": 0.0730266264743275,
|
| 207 |
+
"corridor": 0.22520612014664543,
|
| 208 |
+
"disturbance": 0.0031746443160550874,
|
| 209 |
+
"grasp_affordance": 0.00780139294349485,
|
| 210 |
+
"occluder_contact": 0.20420674648549822,
|
| 211 |
+
"persistence": 1.9897065493795607,
|
| 212 |
+
"phase": 0.42935120397143894,
|
| 213 |
+
"planner_ranking": 0.03520135974718465,
|
| 214 |
+
"planner_risk": 0.012488630910714468,
|
| 215 |
+
"planner_success": 0.5116605394416385,
|
| 216 |
+
"proposal_diversity": 0.0,
|
| 217 |
+
"proposal_ranking": 1.1216257943047419,
|
| 218 |
+
"proposal_reconstruction": 0.05996803608205584,
|
| 219 |
+
"proposal_success": 0.6389667987823486,
|
| 220 |
+
"reocclusion": 0.26481906490193474,
|
| 221 |
+
"role_swap_consistency": 0.0,
|
| 222 |
+
"support_mode": 4.154515813247094e-05,
|
| 223 |
+
"support_stability": 0.13968953986962637,
|
| 224 |
+
"total": 1.1943119830555387,
|
| 225 |
+
"uncertainty": 0.0017189466937755544,
|
| 226 |
+
"visibility": 0.09683923174937566,
|
| 227 |
+
"world_model": 2.1186628209220038
|
| 228 |
+
}
|
| 229 |
+
}
|
| 230 |
+
]
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/summary.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"experiment_name": "proxy_interaction_r3d_stage1_dummy_seed15",
|
| 3 |
+
"device": "cuda",
|
| 4 |
+
"best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/checkpoint_best.pt",
|
| 5 |
+
"final_train_total": 1.1217727214097977,
|
| 6 |
+
"final_val_total": 1.1943119830555387,
|
| 7 |
+
"train_time_sec": 20.030457735061646,
|
| 8 |
+
"peak_gpu_memory_mb": 631.1953125,
|
| 9 |
+
"num_train_samples": 380,
|
| 10 |
+
"num_val_samples": 131,
|
| 11 |
+
"planner_mode": "trainable",
|
| 12 |
+
"frozen_modules": [],
|
| 13 |
+
"init_info": null
|
| 14 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"full": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4166666666666667,
|
| 5 |
+
"bag_proxy": 0.5833333333333334,
|
| 6 |
+
"cloth_proxy": 0.625
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5416666666666666,
|
| 9 |
+
"visibility_integral": 34.34427807728449,
|
| 10 |
+
"corridor_availability": 0.893132723040051,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 2.3119179729333856,
|
| 13 |
+
"disturbance_cost": 0.39262517919350004
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## full
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.542
|
| 6 |
+
- visibility_integral: 34.344
|
| 7 |
+
- corridor_availability: 0.893
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 2.312
|
| 10 |
+
- disturbance_cost: 0.393
|
| 11 |
+
- foliage_proxy_success: 0.417
|
| 12 |
+
- bag_proxy_success: 0.583
|
| 13 |
+
- cloth_proxy_success: 0.625
|