Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- artifacts/outputs/reveal_runs/proxy_backbone_only/config_resolved.yaml +85 -0
- artifacts/outputs/reveal_runs/proxy_backbone_only/metrics.json +106 -0
- artifacts/outputs/reveal_runs/proxy_backbone_only_clip/config_resolved.yaml +88 -0
- artifacts/outputs/reveal_runs/proxy_backbone_only_clip/metrics.json +54 -0
- artifacts/outputs/reveal_runs/proxy_reveal_state/config_resolved.yaml +85 -0
- artifacts/outputs/reveal_runs/proxy_reveal_state/metrics.json +186 -0
- artifacts/outputs/reveal_runs/proxy_reveal_state_clip/config_resolved.yaml +88 -0
- artifacts/outputs/reveal_runs/proxy_reveal_state_clip/metrics.json +94 -0
- artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/config_resolved.yaml +89 -0
- artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/metrics.json +28 -0
- artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/summary.json +44 -0
- artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/config_resolved.yaml +89 -0
- artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/metrics.json +28 -0
- artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/summary.json +44 -0
- artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/config_resolved.yaml +89 -0
- artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/metrics.json +28 -0
- artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/summary.json +44 -0
- artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/config_resolved.yaml +89 -0
- artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/metrics.json +28 -0
- artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/summary.json +44 -0
- artifacts/reports/reveal_eval/reveal_benchmark.json +28 -0
- artifacts/reports/reveal_eval/reveal_benchmark.md +25 -0
- artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.json +41 -0
- artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.md +12 -0
- artifacts/reports/rlbench_custom_clip/reveal_state_rollout_noplan/rollout_eval.json +41 -0
- code/reveal_vla_bimanual/.gitignore +12 -0
- code/reveal_vla_bimanual/README.md +82 -0
- code/reveal_vla_bimanual/docs/upstream_pins.md +24 -0
- code/reveal_vla_bimanual/docs/xorg.rtx6000.conf +33 -0
- code/reveal_vla_bimanual/envs/reveal310.yaml +38 -0
- code/reveal_vla_bimanual/envs/rlbench310.yaml +50 -0
- code/reveal_vla_bimanual/eval/__init__.py +3 -0
- code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-310.pyc +0 -0
- code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-311.pyc +0 -0
- code/reveal_vla_bimanual/eval/__pycache__/ablations.cpython-310.pyc +0 -0
- code/reveal_vla_bimanual/eval/__pycache__/ablations.cpython-311.pyc +0 -0
- code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-310.pyc +0 -0
- code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-311.pyc +0 -0
- code/reveal_vla_bimanual/eval/__pycache__/report.cpython-310.pyc +0 -0
- code/reveal_vla_bimanual/eval/__pycache__/report.cpython-311.pyc +0 -0
- code/reveal_vla_bimanual/eval/__pycache__/run_ablations.cpython-310.pyc +0 -0
- code/reveal_vla_bimanual/eval/__pycache__/run_ablations.cpython-311.pyc +0 -0
- code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-310.pyc +0 -0
- code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-311.pyc +0 -0
- code/reveal_vla_bimanual/eval/__pycache__/run_rlbench_rollout_eval.cpython-310.pyc +0 -0
- code/reveal_vla_bimanual/eval/ablations.py +8 -0
- code/reveal_vla_bimanual/eval/metrics.py +52 -0
- code/reveal_vla_bimanual/eval/report.py +50 -0
- code/reveal_vla_bimanual/eval/run_ablations.py +68 -0
- code/reveal_vla_bimanual/eval/run_reveal_benchmark.py +231 -0
artifacts/outputs/reveal_runs/proxy_backbone_only/config_resolved.yaml
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_backbone_only
|
| 2 |
+
output_dir: /workspace/outputs/reveal_runs
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 7
|
| 5 |
+
data:
|
| 6 |
+
proxies:
|
| 7 |
+
- foliage_proxy
|
| 8 |
+
- bag_proxy
|
| 9 |
+
- cloth_proxy
|
| 10 |
+
resolution: 96
|
| 11 |
+
train_episodes_per_proxy: 48
|
| 12 |
+
val_episodes_per_proxy: 16
|
| 13 |
+
train_dataset_path: /workspace/data/reveal_proxy/proxy_train_v2.pt
|
| 14 |
+
val_dataset_path: /workspace/data/reveal_proxy/proxy_val_v2.pt
|
| 15 |
+
rebuild_dataset: true
|
| 16 |
+
chunk_horizon: 8
|
| 17 |
+
rollout_horizon: 5
|
| 18 |
+
seed: 7
|
| 19 |
+
optim:
|
| 20 |
+
epochs: 8
|
| 21 |
+
batch_size: 16
|
| 22 |
+
num_workers: 0
|
| 23 |
+
lr: 0.001
|
| 24 |
+
weight_decay: 0.0001
|
| 25 |
+
trainer:
|
| 26 |
+
policy_type: backbone_only
|
| 27 |
+
use_bf16: true
|
| 28 |
+
grad_clip_norm: 1.0
|
| 29 |
+
freeze_backbone: true
|
| 30 |
+
gradient_checkpointing: false
|
| 31 |
+
policy:
|
| 32 |
+
backbone:
|
| 33 |
+
model_name: openai/clip-vit-base-patch32
|
| 34 |
+
hidden_dim: 128
|
| 35 |
+
max_text_tokens: 32
|
| 36 |
+
freeze_backbone: true
|
| 37 |
+
gradient_checkpointing: false
|
| 38 |
+
use_dummy_backbone: true
|
| 39 |
+
fusion:
|
| 40 |
+
hidden_dim: 128
|
| 41 |
+
num_cameras: 3
|
| 42 |
+
num_layers: 2
|
| 43 |
+
num_heads: 4
|
| 44 |
+
ff_dim: 256
|
| 45 |
+
dropout: 0.1
|
| 46 |
+
proprio_dim: 32
|
| 47 |
+
proprio_tokens: 1
|
| 48 |
+
decoder:
|
| 49 |
+
hidden_dim: 128
|
| 50 |
+
num_heads: 4
|
| 51 |
+
num_layers: 2
|
| 52 |
+
ff_dim: 256
|
| 53 |
+
dropout: 0.1
|
| 54 |
+
chunk_size: 8
|
| 55 |
+
action_dim: 14
|
| 56 |
+
num_candidates: 8
|
| 57 |
+
reveal_head:
|
| 58 |
+
hidden_dim: 128
|
| 59 |
+
num_support_modes: 3
|
| 60 |
+
num_approach_templates: 32
|
| 61 |
+
rollout_horizon: 5
|
| 62 |
+
belief_map_size: 32
|
| 63 |
+
predict_belief_map: true
|
| 64 |
+
world_model:
|
| 65 |
+
hidden_dim: 128
|
| 66 |
+
action_dim: 14
|
| 67 |
+
num_support_modes: 3
|
| 68 |
+
num_approach_templates: 32
|
| 69 |
+
rollout_horizon: 5
|
| 70 |
+
planner:
|
| 71 |
+
num_candidates: 8
|
| 72 |
+
corridor_weight: 1.0
|
| 73 |
+
persistence_weight: 0.5
|
| 74 |
+
proposal_weight: 0.5
|
| 75 |
+
disturbance_weight: 0.75
|
| 76 |
+
reocclusion_weight: 0.5
|
| 77 |
+
visibility_weight: 0.25
|
| 78 |
+
loss_weights:
|
| 79 |
+
action: 1.0
|
| 80 |
+
support_mode: 0.1
|
| 81 |
+
corridor: 0.1
|
| 82 |
+
persistence: 0.05
|
| 83 |
+
disturbance: 0.05
|
| 84 |
+
world_model: 0.1
|
| 85 |
+
belief: 0.05
|
artifacts/outputs/reveal_runs/proxy_backbone_only/metrics.json
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.06700062464612226,
|
| 6 |
+
"total": 0.06700062464612226,
|
| 7 |
+
"world_model": 0.0
|
| 8 |
+
},
|
| 9 |
+
"val": {
|
| 10 |
+
"action": 0.02209080010652542,
|
| 11 |
+
"total": 0.02209080010652542,
|
| 12 |
+
"world_model": 0.0
|
| 13 |
+
}
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"epoch": 1,
|
| 17 |
+
"train": {
|
| 18 |
+
"action": 0.02441179845482111,
|
| 19 |
+
"total": 0.02441179845482111,
|
| 20 |
+
"world_model": 0.0
|
| 21 |
+
},
|
| 22 |
+
"val": {
|
| 23 |
+
"action": 0.01861108955927193,
|
| 24 |
+
"total": 0.01861108955927193,
|
| 25 |
+
"world_model": 0.0
|
| 26 |
+
}
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 2,
|
| 30 |
+
"train": {
|
| 31 |
+
"action": 0.020652000947544973,
|
| 32 |
+
"total": 0.020652000947544973,
|
| 33 |
+
"world_model": 0.0
|
| 34 |
+
},
|
| 35 |
+
"val": {
|
| 36 |
+
"action": 0.01581601658836007,
|
| 37 |
+
"total": 0.01581601658836007,
|
| 38 |
+
"world_model": 0.0
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"epoch": 3,
|
| 43 |
+
"train": {
|
| 44 |
+
"action": 0.01735153196689983,
|
| 45 |
+
"total": 0.01735153196689983,
|
| 46 |
+
"world_model": 0.0
|
| 47 |
+
},
|
| 48 |
+
"val": {
|
| 49 |
+
"action": 0.01413003564812243,
|
| 50 |
+
"total": 0.01413003564812243,
|
| 51 |
+
"world_model": 0.0
|
| 52 |
+
}
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 4,
|
| 56 |
+
"train": {
|
| 57 |
+
"action": 0.015502698409060637,
|
| 58 |
+
"total": 0.015502698409060637,
|
| 59 |
+
"world_model": 0.0
|
| 60 |
+
},
|
| 61 |
+
"val": {
|
| 62 |
+
"action": 0.012679400155320764,
|
| 63 |
+
"total": 0.012679400155320764,
|
| 64 |
+
"world_model": 0.0
|
| 65 |
+
}
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 5,
|
| 69 |
+
"train": {
|
| 70 |
+
"action": 0.015521424783704182,
|
| 71 |
+
"total": 0.015521424783704182,
|
| 72 |
+
"world_model": 0.0
|
| 73 |
+
},
|
| 74 |
+
"val": {
|
| 75 |
+
"action": 0.011973066837526858,
|
| 76 |
+
"total": 0.011973066837526858,
|
| 77 |
+
"world_model": 0.0
|
| 78 |
+
}
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"epoch": 6,
|
| 82 |
+
"train": {
|
| 83 |
+
"action": 0.014476912096142769,
|
| 84 |
+
"total": 0.014476912096142769,
|
| 85 |
+
"world_model": 0.0
|
| 86 |
+
},
|
| 87 |
+
"val": {
|
| 88 |
+
"action": 0.011093099834397435,
|
| 89 |
+
"total": 0.011093099834397435,
|
| 90 |
+
"world_model": 0.0
|
| 91 |
+
}
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"epoch": 7,
|
| 95 |
+
"train": {
|
| 96 |
+
"action": 0.012226066280466815,
|
| 97 |
+
"total": 0.012226066280466815,
|
| 98 |
+
"world_model": 0.0
|
| 99 |
+
},
|
| 100 |
+
"val": {
|
| 101 |
+
"action": 0.012411019764840603,
|
| 102 |
+
"total": 0.012411019764840603,
|
| 103 |
+
"world_model": 0.0
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
]
|
artifacts/outputs/reveal_runs/proxy_backbone_only_clip/config_resolved.yaml
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_backbone_only_clip
|
| 2 |
+
output_dir: /workspace/outputs/reveal_runs
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 7
|
| 5 |
+
data:
|
| 6 |
+
proxies:
|
| 7 |
+
- foliage_proxy
|
| 8 |
+
- bag_proxy
|
| 9 |
+
- cloth_proxy
|
| 10 |
+
resolution: 224
|
| 11 |
+
train_episodes_per_proxy: 48
|
| 12 |
+
val_episodes_per_proxy: 16
|
| 13 |
+
train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224.pt
|
| 14 |
+
val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224.pt
|
| 15 |
+
rebuild_dataset: true
|
| 16 |
+
chunk_horizon: 8
|
| 17 |
+
rollout_horizon: 5
|
| 18 |
+
seed: 7
|
| 19 |
+
optim:
|
| 20 |
+
epochs: 4
|
| 21 |
+
batch_size: 2
|
| 22 |
+
num_workers: 0
|
| 23 |
+
lr: 0.0003
|
| 24 |
+
weight_decay: 0.0001
|
| 25 |
+
trainer:
|
| 26 |
+
policy_type: backbone_only
|
| 27 |
+
use_bf16: true
|
| 28 |
+
grad_clip_norm: 1.0
|
| 29 |
+
freeze_backbone: true
|
| 30 |
+
gradient_checkpointing: false
|
| 31 |
+
plan_during_train: false
|
| 32 |
+
plan_during_eval: false
|
| 33 |
+
support_mode_conditioning: true
|
| 34 |
+
policy:
|
| 35 |
+
backbone:
|
| 36 |
+
model_name: openai/clip-vit-base-patch32
|
| 37 |
+
hidden_dim: 512
|
| 38 |
+
max_text_tokens: 32
|
| 39 |
+
freeze_backbone: true
|
| 40 |
+
gradient_checkpointing: false
|
| 41 |
+
use_dummy_backbone: false
|
| 42 |
+
fusion:
|
| 43 |
+
hidden_dim: 512
|
| 44 |
+
num_cameras: 3
|
| 45 |
+
num_layers: 4
|
| 46 |
+
num_heads: 8
|
| 47 |
+
ff_dim: 2048
|
| 48 |
+
dropout: 0.1
|
| 49 |
+
proprio_dim: 32
|
| 50 |
+
proprio_tokens: 1
|
| 51 |
+
decoder:
|
| 52 |
+
hidden_dim: 512
|
| 53 |
+
num_heads: 8
|
| 54 |
+
num_layers: 4
|
| 55 |
+
ff_dim: 2048
|
| 56 |
+
dropout: 0.1
|
| 57 |
+
chunk_size: 8
|
| 58 |
+
action_dim: 14
|
| 59 |
+
num_candidates: 8
|
| 60 |
+
reveal_head:
|
| 61 |
+
hidden_dim: 512
|
| 62 |
+
num_support_modes: 3
|
| 63 |
+
num_approach_templates: 32
|
| 64 |
+
rollout_horizon: 5
|
| 65 |
+
belief_map_size: 32
|
| 66 |
+
predict_belief_map: true
|
| 67 |
+
world_model:
|
| 68 |
+
hidden_dim: 512
|
| 69 |
+
action_dim: 14
|
| 70 |
+
num_support_modes: 3
|
| 71 |
+
num_approach_templates: 32
|
| 72 |
+
rollout_horizon: 5
|
| 73 |
+
planner:
|
| 74 |
+
num_candidates: 8
|
| 75 |
+
corridor_weight: 1.0
|
| 76 |
+
persistence_weight: 0.5
|
| 77 |
+
proposal_weight: 0.5
|
| 78 |
+
disturbance_weight: 0.75
|
| 79 |
+
reocclusion_weight: 0.5
|
| 80 |
+
visibility_weight: 0.25
|
| 81 |
+
loss_weights:
|
| 82 |
+
action: 1.0
|
| 83 |
+
support_mode: 0.1
|
| 84 |
+
corridor: 0.1
|
| 85 |
+
persistence: 0.05
|
| 86 |
+
disturbance: 0.05
|
| 87 |
+
world_model: 0.1
|
| 88 |
+
belief: 0.05
|
artifacts/outputs/reveal_runs/proxy_backbone_only_clip/metrics.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.14342915779711063,
|
| 6 |
+
"total": 0.14342915779711063,
|
| 7 |
+
"world_model": 0.0
|
| 8 |
+
},
|
| 9 |
+
"val": {
|
| 10 |
+
"action": 0.026520084648851364,
|
| 11 |
+
"total": 0.026520084648851364,
|
| 12 |
+
"world_model": 0.0
|
| 13 |
+
}
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"epoch": 1,
|
| 17 |
+
"train": {
|
| 18 |
+
"action": 0.01376689436079944,
|
| 19 |
+
"total": 0.01376689436079944,
|
| 20 |
+
"world_model": 0.0
|
| 21 |
+
},
|
| 22 |
+
"val": {
|
| 23 |
+
"action": 0.00792281218390498,
|
| 24 |
+
"total": 0.00792281218390498,
|
| 25 |
+
"world_model": 0.0
|
| 26 |
+
}
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 2,
|
| 30 |
+
"train": {
|
| 31 |
+
"action": 0.009396829446095057,
|
| 32 |
+
"total": 0.009396829446095057,
|
| 33 |
+
"world_model": 0.0
|
| 34 |
+
},
|
| 35 |
+
"val": {
|
| 36 |
+
"action": 0.006728713663058385,
|
| 37 |
+
"total": 0.006728713663058385,
|
| 38 |
+
"world_model": 0.0
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"epoch": 3,
|
| 43 |
+
"train": {
|
| 44 |
+
"action": 0.007774835790102784,
|
| 45 |
+
"total": 0.007774835790102784,
|
| 46 |
+
"world_model": 0.0
|
| 47 |
+
},
|
| 48 |
+
"val": {
|
| 49 |
+
"action": 0.005187951255634073,
|
| 50 |
+
"total": 0.005187951255634073,
|
| 51 |
+
"world_model": 0.0
|
| 52 |
+
}
|
| 53 |
+
}
|
| 54 |
+
]
|
artifacts/outputs/reveal_runs/proxy_reveal_state/config_resolved.yaml
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_reveal_state
|
| 2 |
+
output_dir: /workspace/outputs/reveal_runs
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 7
|
| 5 |
+
data:
|
| 6 |
+
proxies:
|
| 7 |
+
- foliage_proxy
|
| 8 |
+
- bag_proxy
|
| 9 |
+
- cloth_proxy
|
| 10 |
+
resolution: 96
|
| 11 |
+
train_episodes_per_proxy: 48
|
| 12 |
+
val_episodes_per_proxy: 16
|
| 13 |
+
train_dataset_path: /workspace/data/reveal_proxy/proxy_train_v2.pt
|
| 14 |
+
val_dataset_path: /workspace/data/reveal_proxy/proxy_val_v2.pt
|
| 15 |
+
rebuild_dataset: false
|
| 16 |
+
chunk_horizon: 8
|
| 17 |
+
rollout_horizon: 5
|
| 18 |
+
seed: 7
|
| 19 |
+
optim:
|
| 20 |
+
epochs: 8
|
| 21 |
+
batch_size: 16
|
| 22 |
+
num_workers: 0
|
| 23 |
+
lr: 0.001
|
| 24 |
+
weight_decay: 0.0001
|
| 25 |
+
trainer:
|
| 26 |
+
policy_type: reveal_state
|
| 27 |
+
use_bf16: true
|
| 28 |
+
grad_clip_norm: 1.0
|
| 29 |
+
freeze_backbone: true
|
| 30 |
+
gradient_checkpointing: false
|
| 31 |
+
policy:
|
| 32 |
+
backbone:
|
| 33 |
+
model_name: openai/clip-vit-base-patch32
|
| 34 |
+
hidden_dim: 128
|
| 35 |
+
max_text_tokens: 32
|
| 36 |
+
freeze_backbone: true
|
| 37 |
+
gradient_checkpointing: false
|
| 38 |
+
use_dummy_backbone: true
|
| 39 |
+
fusion:
|
| 40 |
+
hidden_dim: 128
|
| 41 |
+
num_cameras: 3
|
| 42 |
+
num_layers: 2
|
| 43 |
+
num_heads: 4
|
| 44 |
+
ff_dim: 256
|
| 45 |
+
dropout: 0.1
|
| 46 |
+
proprio_dim: 32
|
| 47 |
+
proprio_tokens: 1
|
| 48 |
+
decoder:
|
| 49 |
+
hidden_dim: 128
|
| 50 |
+
num_heads: 4
|
| 51 |
+
num_layers: 2
|
| 52 |
+
ff_dim: 256
|
| 53 |
+
dropout: 0.1
|
| 54 |
+
chunk_size: 8
|
| 55 |
+
action_dim: 14
|
| 56 |
+
num_candidates: 8
|
| 57 |
+
reveal_head:
|
| 58 |
+
hidden_dim: 128
|
| 59 |
+
num_support_modes: 3
|
| 60 |
+
num_approach_templates: 32
|
| 61 |
+
rollout_horizon: 5
|
| 62 |
+
belief_map_size: 32
|
| 63 |
+
predict_belief_map: true
|
| 64 |
+
world_model:
|
| 65 |
+
hidden_dim: 128
|
| 66 |
+
action_dim: 14
|
| 67 |
+
num_support_modes: 3
|
| 68 |
+
num_approach_templates: 32
|
| 69 |
+
rollout_horizon: 5
|
| 70 |
+
planner:
|
| 71 |
+
num_candidates: 8
|
| 72 |
+
corridor_weight: 1.0
|
| 73 |
+
persistence_weight: 0.65
|
| 74 |
+
proposal_weight: 0.35
|
| 75 |
+
disturbance_weight: 0.8
|
| 76 |
+
reocclusion_weight: 0.6
|
| 77 |
+
visibility_weight: 0.35
|
| 78 |
+
loss_weights:
|
| 79 |
+
action: 1.0
|
| 80 |
+
support_mode: 0.15
|
| 81 |
+
corridor: 0.2
|
| 82 |
+
persistence: 0.1
|
| 83 |
+
disturbance: 0.1
|
| 84 |
+
world_model: 0.2
|
| 85 |
+
belief: 0.05
|
artifacts/outputs/reveal_runs/proxy_reveal_state/metrics.json
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.2602546961667637,
|
| 6 |
+
"belief": 0.4802860766649246,
|
| 7 |
+
"corridor": 0.6443073948224386,
|
| 8 |
+
"disturbance": 0.006578955658672688,
|
| 9 |
+
"persistence": 4.514919241269429,
|
| 10 |
+
"support_mode": 0.8015391031901041,
|
| 11 |
+
"total": 2.0875226110219955,
|
| 12 |
+
"world_model": 5.510057131449382
|
| 13 |
+
},
|
| 14 |
+
"val": {
|
| 15 |
+
"action": 0.04658499173820019,
|
| 16 |
+
"belief": 0.280171237885952,
|
| 17 |
+
"corridor": 0.5032978095114231,
|
| 18 |
+
"disturbance": 0.003645064221927896,
|
| 19 |
+
"persistence": 3.8178451359272003,
|
| 20 |
+
"support_mode": 0.6714280992746353,
|
| 21 |
+
"total": 1.012940600514412,
|
| 22 |
+
"world_model": 1.8441212028265
|
| 23 |
+
}
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 1,
|
| 27 |
+
"train": {
|
| 28 |
+
"action": 0.03881739747400085,
|
| 29 |
+
"belief": 0.18641860752056041,
|
| 30 |
+
"corridor": 0.3944183625280857,
|
| 31 |
+
"disturbance": 0.030439561344489146,
|
| 32 |
+
"persistence": 3.206294293204943,
|
| 33 |
+
"support_mode": 0.5347911287099123,
|
| 34 |
+
"total": 0.9082020496328672,
|
| 35 |
+
"world_model": 1.8864398151636124
|
| 36 |
+
},
|
| 37 |
+
"val": {
|
| 38 |
+
"action": 0.04213718790560961,
|
| 39 |
+
"belief": 0.15712551027536392,
|
| 40 |
+
"corridor": 0.3507457673549652,
|
| 41 |
+
"disturbance": 0.006276358384639025,
|
| 42 |
+
"persistence": 1.8078171163797379,
|
| 43 |
+
"support_mode": 0.10970124043524265,
|
| 44 |
+
"total": 0.6724201738834381,
|
| 45 |
+
"world_model": 1.772064983844757
|
| 46 |
+
}
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"epoch": 2,
|
| 50 |
+
"train": {
|
| 51 |
+
"action": 0.031200370130439598,
|
| 52 |
+
"belief": 0.13828600694735846,
|
| 53 |
+
"corridor": 0.31750819956262905,
|
| 54 |
+
"disturbance": 0.011857866222271696,
|
| 55 |
+
"persistence": 1.7015922193725903,
|
| 56 |
+
"support_mode": 0.02674841312303518,
|
| 57 |
+
"total": 0.6129550884167353,
|
| 58 |
+
"world_model": 1.6799074759085972
|
| 59 |
+
},
|
| 60 |
+
"val": {
|
| 61 |
+
"action": 0.019523032009601593,
|
| 62 |
+
"belief": 0.09429990872740746,
|
| 63 |
+
"corridor": 0.24884792044758797,
|
| 64 |
+
"disturbance": 0.0043011417728848755,
|
| 65 |
+
"persistence": 1.5114311277866364,
|
| 66 |
+
"support_mode": 0.0060500025865621865,
|
| 67 |
+
"total": 0.5359727554023266,
|
| 68 |
+
"world_model": 1.5474220663309097
|
| 69 |
+
}
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"epoch": 3,
|
| 73 |
+
"train": {
|
| 74 |
+
"action": 0.022356805779660743,
|
| 75 |
+
"belief": 0.09125891048461199,
|
| 76 |
+
"corridor": 0.23351835707823435,
|
| 77 |
+
"disturbance": 0.006718798467773013,
|
| 78 |
+
"persistence": 1.6300043910741806,
|
| 79 |
+
"support_mode": 0.004253969304651643,
|
| 80 |
+
"total": 0.5548354809482893,
|
| 81 |
+
"world_model": 1.5845081210136414
|
| 82 |
+
},
|
| 83 |
+
"val": {
|
| 84 |
+
"action": 0.01580847823061049,
|
| 85 |
+
"belief": 0.09042494650930166,
|
| 86 |
+
"corridor": 0.22376472875475883,
|
| 87 |
+
"disturbance": 0.018967560958117247,
|
| 88 |
+
"persistence": 1.4363956600427628,
|
| 89 |
+
"support_mode": 0.03418254409916699,
|
| 90 |
+
"total": 0.5279115326702595,
|
| 91 |
+
"world_model": 1.5608257874846458
|
| 92 |
+
}
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"epoch": 4,
|
| 96 |
+
"train": {
|
| 97 |
+
"action": 0.019881066245337326,
|
| 98 |
+
"belief": 0.08954659259567659,
|
| 99 |
+
"corridor": 0.21636931287745634,
|
| 100 |
+
"disturbance": 0.005539724506282558,
|
| 101 |
+
"persistence": 1.592231921851635,
|
| 102 |
+
"support_mode": 0.008331454223177085,
|
| 103 |
+
"total": 0.5372808227936426,
|
| 104 |
+
"world_model": 1.5431083713968594
|
| 105 |
+
},
|
| 106 |
+
"val": {
|
| 107 |
+
"action": 0.015133287757635117,
|
| 108 |
+
"belief": 0.08718204218894243,
|
| 109 |
+
"corridor": 0.20481965504586697,
|
| 110 |
+
"disturbance": 0.0031357303814729676,
|
| 111 |
+
"persistence": 1.3192060887813568,
|
| 112 |
+
"support_mode": 0.0030863596766721457,
|
| 113 |
+
"total": 0.47997843474149704,
|
| 114 |
+
"world_model": 1.4341248571872711
|
| 115 |
+
}
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 5,
|
| 119 |
+
"train": {
|
| 120 |
+
"action": 0.030778280459344387,
|
| 121 |
+
"belief": 0.09159998937199514,
|
| 122 |
+
"corridor": 0.21967005419234434,
|
| 123 |
+
"disturbance": 0.005901901221174437,
|
| 124 |
+
"persistence": 1.651158797244231,
|
| 125 |
+
"support_mode": 0.0024410486221313477,
|
| 126 |
+
"total": 0.5050872204204401,
|
| 127 |
+
"world_model": 1.2986134762565296
|
| 128 |
+
},
|
| 129 |
+
"val": {
|
| 130 |
+
"action": 0.03259791061282158,
|
| 131 |
+
"belief": 0.08867455553263426,
|
| 132 |
+
"corridor": 0.20528649538755417,
|
| 133 |
+
"disturbance": 0.0037689711316488683,
|
| 134 |
+
"persistence": 1.3772646486759186,
|
| 135 |
+
"support_mode": 0.0007588127191411331,
|
| 136 |
+
"total": 0.4101765304803848,
|
| 137 |
+
"world_model": 0.9693519398570061
|
| 138 |
+
}
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"epoch": 6,
|
| 142 |
+
"train": {
|
| 143 |
+
"action": 0.028416083427146077,
|
| 144 |
+
"belief": 0.09289384291817744,
|
| 145 |
+
"corridor": 0.22298985657592615,
|
| 146 |
+
"disturbance": 0.0031898027373244986,
|
| 147 |
+
"persistence": 1.2752377291520436,
|
| 148 |
+
"support_mode": 0.04850278014297752,
|
| 149 |
+
"total": 0.40898223718007404,
|
| 150 |
+
"world_model": 0.9810265600681305
|
| 151 |
+
},
|
| 152 |
+
"val": {
|
| 153 |
+
"action": 0.02159481483977288,
|
| 154 |
+
"belief": 0.08797950763255358,
|
| 155 |
+
"corridor": 0.20524934865534306,
|
| 156 |
+
"disturbance": 0.0015436648827744648,
|
| 157 |
+
"persistence": 1.286000706255436,
|
| 158 |
+
"support_mode": 0.0010480962373549119,
|
| 159 |
+
"total": 0.3605738691985607,
|
| 160 |
+
"world_model": 0.8230927512049675
|
| 161 |
+
}
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"epoch": 7,
|
| 165 |
+
"train": {
|
| 166 |
+
"action": 0.021424691736077268,
|
| 167 |
+
"belief": 0.0899931692207853,
|
| 168 |
+
"corridor": 0.21607277914881706,
|
| 169 |
+
"disturbance": 0.0034827212220989168,
|
| 170 |
+
"persistence": 0.9069182885189851,
|
| 171 |
+
"support_mode": 0.00435957100125961,
|
| 172 |
+
"total": 0.3383450036247571,
|
| 173 |
+
"world_model": 0.8875602881113688
|
| 174 |
+
},
|
| 175 |
+
"val": {
|
| 176 |
+
"action": 0.017686392879113555,
|
| 177 |
+
"belief": 0.09035013243556023,
|
| 178 |
+
"corridor": 0.21036655083298683,
|
| 179 |
+
"disturbance": 0.004888073919573799,
|
| 180 |
+
"persistence": 0.5709216743707657,
|
| 181 |
+
"support_mode": 0.001884725206764415,
|
| 182 |
+
"total": 0.31777225248515606,
|
| 183 |
+
"world_model": 0.978156752884388
|
| 184 |
+
}
|
| 185 |
+
}
|
| 186 |
+
]
|
artifacts/outputs/reveal_runs/proxy_reveal_state_clip/config_resolved.yaml
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_reveal_state_clip
|
| 2 |
+
output_dir: /workspace/outputs/reveal_runs
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 7
|
| 5 |
+
data:
|
| 6 |
+
proxies:
|
| 7 |
+
- foliage_proxy
|
| 8 |
+
- bag_proxy
|
| 9 |
+
- cloth_proxy
|
| 10 |
+
resolution: 224
|
| 11 |
+
train_episodes_per_proxy: 48
|
| 12 |
+
val_episodes_per_proxy: 16
|
| 13 |
+
train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224.pt
|
| 14 |
+
val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224.pt
|
| 15 |
+
rebuild_dataset: false
|
| 16 |
+
chunk_horizon: 8
|
| 17 |
+
rollout_horizon: 5
|
| 18 |
+
seed: 7
|
| 19 |
+
optim:
|
| 20 |
+
epochs: 4
|
| 21 |
+
batch_size: 2
|
| 22 |
+
num_workers: 0
|
| 23 |
+
lr: 0.0003
|
| 24 |
+
weight_decay: 0.0001
|
| 25 |
+
trainer:
|
| 26 |
+
policy_type: reveal_state
|
| 27 |
+
use_bf16: true
|
| 28 |
+
grad_clip_norm: 1.0
|
| 29 |
+
freeze_backbone: true
|
| 30 |
+
gradient_checkpointing: false
|
| 31 |
+
plan_during_train: true
|
| 32 |
+
plan_during_eval: true
|
| 33 |
+
support_mode_conditioning: true
|
| 34 |
+
policy:
|
| 35 |
+
backbone:
|
| 36 |
+
model_name: openai/clip-vit-base-patch32
|
| 37 |
+
hidden_dim: 512
|
| 38 |
+
max_text_tokens: 32
|
| 39 |
+
freeze_backbone: true
|
| 40 |
+
gradient_checkpointing: false
|
| 41 |
+
use_dummy_backbone: false
|
| 42 |
+
fusion:
|
| 43 |
+
hidden_dim: 512
|
| 44 |
+
num_cameras: 3
|
| 45 |
+
num_layers: 4
|
| 46 |
+
num_heads: 8
|
| 47 |
+
ff_dim: 2048
|
| 48 |
+
dropout: 0.1
|
| 49 |
+
proprio_dim: 32
|
| 50 |
+
proprio_tokens: 1
|
| 51 |
+
decoder:
|
| 52 |
+
hidden_dim: 512
|
| 53 |
+
num_heads: 8
|
| 54 |
+
num_layers: 4
|
| 55 |
+
ff_dim: 2048
|
| 56 |
+
dropout: 0.1
|
| 57 |
+
chunk_size: 8
|
| 58 |
+
action_dim: 14
|
| 59 |
+
num_candidates: 8
|
| 60 |
+
reveal_head:
|
| 61 |
+
hidden_dim: 512
|
| 62 |
+
num_support_modes: 3
|
| 63 |
+
num_approach_templates: 32
|
| 64 |
+
rollout_horizon: 5
|
| 65 |
+
belief_map_size: 32
|
| 66 |
+
predict_belief_map: true
|
| 67 |
+
world_model:
|
| 68 |
+
hidden_dim: 512
|
| 69 |
+
action_dim: 14
|
| 70 |
+
num_support_modes: 3
|
| 71 |
+
num_approach_templates: 32
|
| 72 |
+
rollout_horizon: 5
|
| 73 |
+
planner:
|
| 74 |
+
num_candidates: 8
|
| 75 |
+
corridor_weight: 1.0
|
| 76 |
+
persistence_weight: 0.65
|
| 77 |
+
proposal_weight: 0.35
|
| 78 |
+
disturbance_weight: 0.8
|
| 79 |
+
reocclusion_weight: 0.6
|
| 80 |
+
visibility_weight: 0.35
|
| 81 |
+
loss_weights:
|
| 82 |
+
action: 1.0
|
| 83 |
+
support_mode: 0.15
|
| 84 |
+
corridor: 0.2
|
| 85 |
+
persistence: 0.1
|
| 86 |
+
disturbance: 0.1
|
| 87 |
+
world_model: 0.2
|
| 88 |
+
belief: 0.05
|
artifacts/outputs/reveal_runs/proxy_reveal_state_clip/metrics.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.2168051045758562,
|
| 6 |
+
"belief": 0.16835976690444024,
|
| 7 |
+
"corridor": 0.2947022703851705,
|
| 8 |
+
"disturbance": 0.007973204485554213,
|
| 9 |
+
"persistence": 4.26063614482967,
|
| 10 |
+
"support_mode": 0.7333370827879581,
|
| 11 |
+
"total": 1.1824027625990163,
|
| 12 |
+
"world_model": 1.8068884567440493
|
| 13 |
+
},
|
| 14 |
+
"val": {
|
| 15 |
+
"action": 0.06980070081495103,
|
| 16 |
+
"belief": 0.09293079068736425,
|
| 17 |
+
"corridor": 0.23202623426914215,
|
| 18 |
+
"disturbance": 0.006832122442401236,
|
| 19 |
+
"persistence": 3.871745571257576,
|
| 20 |
+
"support_mode": 0.6699983808729384,
|
| 21 |
+
"total": 0.7863351002572074,
|
| 22 |
+
"world_model": 0.8856253113065448
|
| 23 |
+
}
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 1,
|
| 27 |
+
"train": {
|
| 28 |
+
"action": 0.054234529075003104,
|
| 29 |
+
"belief": 0.09439963061382009,
|
| 30 |
+
"corridor": 0.24123663386983396,
|
| 31 |
+
"disturbance": 0.008799185583979581,
|
| 32 |
+
"persistence": 3.9709763473865247,
|
| 33 |
+
"support_mode": 0.674577163776178,
|
| 34 |
+
"total": 0.796180099092853,
|
| 35 |
+
"world_model": 0.9490705705125918
|
| 36 |
+
},
|
| 37 |
+
"val": {
|
| 38 |
+
"action": 0.06558700479448788,
|
| 39 |
+
"belief": 0.1815936780638165,
|
| 40 |
+
"corridor": 0.3361685186151474,
|
| 41 |
+
"disturbance": 0.023940630294086915,
|
| 42 |
+
"persistence": 4.7415515091565865,
|
| 43 |
+
"support_mode": 0.8642671259622725,
|
| 44 |
+
"total": 0.9338183213794042,
|
| 45 |
+
"world_model": 0.9286431225519332
|
| 46 |
+
}
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"epoch": 2,
|
| 50 |
+
"train": {
|
| 51 |
+
"action": 0.03919103866472294,
|
| 52 |
+
"belief": 0.09202757795677759,
|
| 53 |
+
"corridor": 0.21921133667874243,
|
| 54 |
+
"disturbance": 0.04529383548148981,
|
| 55 |
+
"persistence": 1.5436662856260246,
|
| 56 |
+
"support_mode": 0.23989241035820927,
|
| 57 |
+
"total": 0.45590807076212,
|
| 58 |
+
"world_model": 0.8669675243774634
|
| 59 |
+
},
|
| 60 |
+
"val": {
|
| 61 |
+
"action": 0.02496799406787706,
|
| 62 |
+
"belief": 0.08762083173034683,
|
| 63 |
+
"corridor": 0.1930048821996602,
|
| 64 |
+
"disturbance": 0.012308748878745569,
|
| 65 |
+
"persistence": 0.9973389923809066,
|
| 66 |
+
"support_mode": 0.14653402309687363,
|
| 67 |
+
"total": 0.34120540746620726,
|
| 68 |
+
"world_model": 0.7515525425237323
|
| 69 |
+
}
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"epoch": 3,
|
| 73 |
+
"train": {
|
| 74 |
+
"action": 0.034251564747961094,
|
| 75 |
+
"belief": 0.0881565280882788,
|
| 76 |
+
"corridor": 0.19749194407513784,
|
| 77 |
+
"disturbance": 0.019202744416642326,
|
| 78 |
+
"persistence": 1.0902665860137868,
|
| 79 |
+
"support_mode": 0.07417118861413127,
|
| 80 |
+
"total": 0.3623058025905599,
|
| 81 |
+
"world_model": 0.810377035309507
|
| 82 |
+
},
|
| 83 |
+
"val": {
|
| 84 |
+
"action": 0.020182275937663183,
|
| 85 |
+
"belief": 0.08651774370717624,
|
| 86 |
+
"corridor": 0.18512752960022125,
|
| 87 |
+
"disturbance": 0.02845218790591591,
|
| 88 |
+
"persistence": 1.0011120429706006,
|
| 89 |
+
"support_mode": 0.1388084255080367,
|
| 90 |
+
"total": 0.3356363290832156,
|
| 91 |
+
"world_model": 0.7516248249818408
|
| 92 |
+
}
|
| 93 |
+
}
|
| 94 |
+
]
|
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/config_resolved.yaml
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: rlbench_subset3_backbone_only_clip
|
| 2 |
+
output_dir: /workspace/outputs/rlbench_custom
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 7
|
| 5 |
+
init_checkpoint: /workspace/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
|
| 6 |
+
init_strict: false
|
| 7 |
+
data:
|
| 8 |
+
dataset_root: /workspace/data/rlbench2
|
| 9 |
+
tasks:
|
| 10 |
+
- bimanual_lift_ball
|
| 11 |
+
- bimanual_push_box
|
| 12 |
+
- bimanual_dual_push_buttons
|
| 13 |
+
train_episodes:
|
| 14 |
+
- 0
|
| 15 |
+
val_episodes:
|
| 16 |
+
- 1
|
| 17 |
+
resolution: 224
|
| 18 |
+
chunk_horizon: 8
|
| 19 |
+
proprio_dim: 32
|
| 20 |
+
optim:
|
| 21 |
+
epochs: 2
|
| 22 |
+
batch_size: 2
|
| 23 |
+
num_workers: 0
|
| 24 |
+
lr: 0.0002
|
| 25 |
+
weight_decay: 0.0001
|
| 26 |
+
trainer:
|
| 27 |
+
policy_type: backbone_only
|
| 28 |
+
use_bf16: true
|
| 29 |
+
grad_clip_norm: 1.0
|
| 30 |
+
freeze_backbone: true
|
| 31 |
+
gradient_checkpointing: false
|
| 32 |
+
plan_during_train: false
|
| 33 |
+
plan_during_eval: false
|
| 34 |
+
support_mode_conditioning: true
|
| 35 |
+
policy:
|
| 36 |
+
backbone:
|
| 37 |
+
model_name: openai/clip-vit-base-patch32
|
| 38 |
+
hidden_dim: 512
|
| 39 |
+
max_text_tokens: 32
|
| 40 |
+
freeze_backbone: true
|
| 41 |
+
gradient_checkpointing: false
|
| 42 |
+
use_dummy_backbone: false
|
| 43 |
+
fusion:
|
| 44 |
+
hidden_dim: 512
|
| 45 |
+
num_cameras: 3
|
| 46 |
+
num_layers: 4
|
| 47 |
+
num_heads: 8
|
| 48 |
+
ff_dim: 2048
|
| 49 |
+
dropout: 0.1
|
| 50 |
+
proprio_dim: 32
|
| 51 |
+
proprio_tokens: 1
|
| 52 |
+
decoder:
|
| 53 |
+
hidden_dim: 512
|
| 54 |
+
num_heads: 8
|
| 55 |
+
num_layers: 4
|
| 56 |
+
ff_dim: 2048
|
| 57 |
+
dropout: 0.1
|
| 58 |
+
chunk_size: 8
|
| 59 |
+
action_dim: 14
|
| 60 |
+
num_candidates: 8
|
| 61 |
+
reveal_head:
|
| 62 |
+
hidden_dim: 512
|
| 63 |
+
num_support_modes: 3
|
| 64 |
+
num_approach_templates: 32
|
| 65 |
+
rollout_horizon: 5
|
| 66 |
+
belief_map_size: 32
|
| 67 |
+
predict_belief_map: true
|
| 68 |
+
world_model:
|
| 69 |
+
hidden_dim: 512
|
| 70 |
+
action_dim: 14
|
| 71 |
+
num_support_modes: 3
|
| 72 |
+
num_approach_templates: 32
|
| 73 |
+
rollout_horizon: 5
|
| 74 |
+
planner:
|
| 75 |
+
num_candidates: 8
|
| 76 |
+
corridor_weight: 1.0
|
| 77 |
+
persistence_weight: 0.5
|
| 78 |
+
proposal_weight: 0.5
|
| 79 |
+
disturbance_weight: 0.75
|
| 80 |
+
reocclusion_weight: 0.5
|
| 81 |
+
visibility_weight: 0.25
|
| 82 |
+
loss_weights:
|
| 83 |
+
action: 1.0
|
| 84 |
+
support_mode: 0.1
|
| 85 |
+
corridor: 0.1
|
| 86 |
+
persistence: 0.05
|
| 87 |
+
disturbance: 0.05
|
| 88 |
+
world_model: 0.1
|
| 89 |
+
belief: 0.05
|
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/metrics.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.010832569689285108,
|
| 6 |
+
"total": 0.010832569689285108,
|
| 7 |
+
"world_model": 0.0
|
| 8 |
+
},
|
| 9 |
+
"val": {
|
| 10 |
+
"action": 0.00584922067168602,
|
| 11 |
+
"total": 0.00584922067168602,
|
| 12 |
+
"world_model": 0.0
|
| 13 |
+
}
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"epoch": 1,
|
| 17 |
+
"train": {
|
| 18 |
+
"action": 0.007243322389241776,
|
| 19 |
+
"total": 0.007243322389241776,
|
| 20 |
+
"world_model": 0.0
|
| 21 |
+
},
|
| 22 |
+
"val": {
|
| 23 |
+
"action": 0.004669623740794346,
|
| 24 |
+
"total": 0.004669623740794346,
|
| 25 |
+
"world_model": 0.0
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
]
|
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/summary.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"experiment_name": "rlbench_subset3_backbone_only_clip",
|
| 3 |
+
"device": "cuda",
|
| 4 |
+
"best_checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/checkpoint_best.pt",
|
| 5 |
+
"final_train_total": 0.007243322389241776,
|
| 6 |
+
"final_val_total": 0.004669623740794346,
|
| 7 |
+
"train_dataset": {
|
| 8 |
+
"dataset_root": "/workspace/data/rlbench2",
|
| 9 |
+
"tasks": [
|
| 10 |
+
"bimanual_lift_ball",
|
| 11 |
+
"bimanual_push_box",
|
| 12 |
+
"bimanual_dual_push_buttons"
|
| 13 |
+
],
|
| 14 |
+
"episode_indices": [
|
| 15 |
+
0
|
| 16 |
+
],
|
| 17 |
+
"num_episodes": 3,
|
| 18 |
+
"num_samples": 381,
|
| 19 |
+
"resolution": 224,
|
| 20 |
+
"chunk_size": 8,
|
| 21 |
+
"proprio_dim": 32
|
| 22 |
+
},
|
| 23 |
+
"val_dataset": {
|
| 24 |
+
"dataset_root": "/workspace/data/rlbench2",
|
| 25 |
+
"tasks": [
|
| 26 |
+
"bimanual_lift_ball",
|
| 27 |
+
"bimanual_push_box",
|
| 28 |
+
"bimanual_dual_push_buttons"
|
| 29 |
+
],
|
| 30 |
+
"episode_indices": [
|
| 31 |
+
1
|
| 32 |
+
],
|
| 33 |
+
"num_episodes": 3,
|
| 34 |
+
"num_samples": 374,
|
| 35 |
+
"resolution": 224,
|
| 36 |
+
"chunk_size": 8,
|
| 37 |
+
"proprio_dim": 32
|
| 38 |
+
},
|
| 39 |
+
"init_info": {
|
| 40 |
+
"path": "/workspace/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
|
| 41 |
+
"missing_keys": [],
|
| 42 |
+
"unexpected_keys": []
|
| 43 |
+
}
|
| 44 |
+
}
|
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/config_resolved.yaml
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: rlbench_subset3_backbone_only_dummy
|
| 2 |
+
output_dir: /workspace/outputs/rlbench_custom
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 7
|
| 5 |
+
init_checkpoint: /workspace/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
|
| 6 |
+
init_strict: false
|
| 7 |
+
data:
|
| 8 |
+
dataset_root: /workspace/data/rlbench2
|
| 9 |
+
tasks:
|
| 10 |
+
- bimanual_lift_ball
|
| 11 |
+
- bimanual_push_box
|
| 12 |
+
- bimanual_dual_push_buttons
|
| 13 |
+
train_episodes:
|
| 14 |
+
- 0
|
| 15 |
+
val_episodes:
|
| 16 |
+
- 1
|
| 17 |
+
resolution: 224
|
| 18 |
+
chunk_horizon: 8
|
| 19 |
+
proprio_dim: 32
|
| 20 |
+
optim:
|
| 21 |
+
epochs: 2
|
| 22 |
+
batch_size: 4
|
| 23 |
+
num_workers: 0
|
| 24 |
+
lr: 0.0005
|
| 25 |
+
weight_decay: 0.0001
|
| 26 |
+
trainer:
|
| 27 |
+
policy_type: backbone_only
|
| 28 |
+
use_bf16: true
|
| 29 |
+
grad_clip_norm: 1.0
|
| 30 |
+
freeze_backbone: true
|
| 31 |
+
gradient_checkpointing: false
|
| 32 |
+
plan_during_train: false
|
| 33 |
+
plan_during_eval: false
|
| 34 |
+
support_mode_conditioning: true
|
| 35 |
+
policy:
|
| 36 |
+
backbone:
|
| 37 |
+
model_name: openai/clip-vit-base-patch32
|
| 38 |
+
hidden_dim: 128
|
| 39 |
+
max_text_tokens: 32
|
| 40 |
+
freeze_backbone: true
|
| 41 |
+
gradient_checkpointing: false
|
| 42 |
+
use_dummy_backbone: true
|
| 43 |
+
fusion:
|
| 44 |
+
hidden_dim: 128
|
| 45 |
+
num_cameras: 3
|
| 46 |
+
num_layers: 2
|
| 47 |
+
num_heads: 4
|
| 48 |
+
ff_dim: 256
|
| 49 |
+
dropout: 0.1
|
| 50 |
+
proprio_dim: 32
|
| 51 |
+
proprio_tokens: 1
|
| 52 |
+
decoder:
|
| 53 |
+
hidden_dim: 128
|
| 54 |
+
num_heads: 4
|
| 55 |
+
num_layers: 2
|
| 56 |
+
ff_dim: 256
|
| 57 |
+
dropout: 0.1
|
| 58 |
+
chunk_size: 8
|
| 59 |
+
action_dim: 14
|
| 60 |
+
num_candidates: 8
|
| 61 |
+
reveal_head:
|
| 62 |
+
hidden_dim: 128
|
| 63 |
+
num_support_modes: 3
|
| 64 |
+
num_approach_templates: 32
|
| 65 |
+
rollout_horizon: 5
|
| 66 |
+
belief_map_size: 32
|
| 67 |
+
predict_belief_map: true
|
| 68 |
+
world_model:
|
| 69 |
+
hidden_dim: 128
|
| 70 |
+
action_dim: 14
|
| 71 |
+
num_support_modes: 3
|
| 72 |
+
num_approach_templates: 32
|
| 73 |
+
rollout_horizon: 5
|
| 74 |
+
planner:
|
| 75 |
+
num_candidates: 8
|
| 76 |
+
corridor_weight: 1.0
|
| 77 |
+
persistence_weight: 0.5
|
| 78 |
+
proposal_weight: 0.5
|
| 79 |
+
disturbance_weight: 0.75
|
| 80 |
+
reocclusion_weight: 0.5
|
| 81 |
+
visibility_weight: 0.25
|
| 82 |
+
loss_weights:
|
| 83 |
+
action: 1.0
|
| 84 |
+
support_mode: 0.1
|
| 85 |
+
corridor: 0.1
|
| 86 |
+
persistence: 0.05
|
| 87 |
+
disturbance: 0.05
|
| 88 |
+
world_model: 0.1
|
| 89 |
+
belief: 0.05
|
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/metrics.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.012133247866586316,
|
| 6 |
+
"total": 0.012133247866586316,
|
| 7 |
+
"world_model": 0.0
|
| 8 |
+
},
|
| 9 |
+
"val": {
|
| 10 |
+
"action": 0.008180527588070191,
|
| 11 |
+
"total": 0.008180527588070191,
|
| 12 |
+
"world_model": 0.0
|
| 13 |
+
}
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"epoch": 1,
|
| 17 |
+
"train": {
|
| 18 |
+
"action": 0.00792471425726641,
|
| 19 |
+
"total": 0.00792471425726641,
|
| 20 |
+
"world_model": 0.0
|
| 21 |
+
},
|
| 22 |
+
"val": {
|
| 23 |
+
"action": 0.005605970580716608,
|
| 24 |
+
"total": 0.005605970580716608,
|
| 25 |
+
"world_model": 0.0
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
]
|
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/summary.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"experiment_name": "rlbench_subset3_backbone_only_dummy",
|
| 3 |
+
"device": "cuda",
|
| 4 |
+
"best_checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/checkpoint_best.pt",
|
| 5 |
+
"final_train_total": 0.00792471425726641,
|
| 6 |
+
"final_val_total": 0.005605970580716608,
|
| 7 |
+
"train_dataset": {
|
| 8 |
+
"dataset_root": "/workspace/data/rlbench2",
|
| 9 |
+
"tasks": [
|
| 10 |
+
"bimanual_lift_ball",
|
| 11 |
+
"bimanual_push_box",
|
| 12 |
+
"bimanual_dual_push_buttons"
|
| 13 |
+
],
|
| 14 |
+
"episode_indices": [
|
| 15 |
+
0
|
| 16 |
+
],
|
| 17 |
+
"num_episodes": 3,
|
| 18 |
+
"num_samples": 381,
|
| 19 |
+
"resolution": 224,
|
| 20 |
+
"chunk_size": 8,
|
| 21 |
+
"proprio_dim": 32
|
| 22 |
+
},
|
| 23 |
+
"val_dataset": {
|
| 24 |
+
"dataset_root": "/workspace/data/rlbench2",
|
| 25 |
+
"tasks": [
|
| 26 |
+
"bimanual_lift_ball",
|
| 27 |
+
"bimanual_push_box",
|
| 28 |
+
"bimanual_dual_push_buttons"
|
| 29 |
+
],
|
| 30 |
+
"episode_indices": [
|
| 31 |
+
1
|
| 32 |
+
],
|
| 33 |
+
"num_episodes": 3,
|
| 34 |
+
"num_samples": 374,
|
| 35 |
+
"resolution": 224,
|
| 36 |
+
"chunk_size": 8,
|
| 37 |
+
"proprio_dim": 32
|
| 38 |
+
},
|
| 39 |
+
"init_info": {
|
| 40 |
+
"path": "/workspace/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt",
|
| 41 |
+
"missing_keys": [],
|
| 42 |
+
"unexpected_keys": []
|
| 43 |
+
}
|
| 44 |
+
}
|
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/config_resolved.yaml
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: rlbench_subset3_reveal_state_clip
|
| 2 |
+
output_dir: /workspace/outputs/rlbench_custom
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 7
|
| 5 |
+
init_checkpoint: /workspace/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt
|
| 6 |
+
init_strict: false
|
| 7 |
+
data:
|
| 8 |
+
dataset_root: /workspace/data/rlbench2
|
| 9 |
+
tasks:
|
| 10 |
+
- bimanual_lift_ball
|
| 11 |
+
- bimanual_push_box
|
| 12 |
+
- bimanual_dual_push_buttons
|
| 13 |
+
train_episodes:
|
| 14 |
+
- 0
|
| 15 |
+
val_episodes:
|
| 16 |
+
- 1
|
| 17 |
+
resolution: 224
|
| 18 |
+
chunk_horizon: 8
|
| 19 |
+
proprio_dim: 32
|
| 20 |
+
optim:
|
| 21 |
+
epochs: 2
|
| 22 |
+
batch_size: 2
|
| 23 |
+
num_workers: 0
|
| 24 |
+
lr: 0.0002
|
| 25 |
+
weight_decay: 0.0001
|
| 26 |
+
trainer:
|
| 27 |
+
policy_type: reveal_state
|
| 28 |
+
use_bf16: true
|
| 29 |
+
grad_clip_norm: 1.0
|
| 30 |
+
freeze_backbone: true
|
| 31 |
+
gradient_checkpointing: false
|
| 32 |
+
plan_during_train: false
|
| 33 |
+
plan_during_eval: false
|
| 34 |
+
support_mode_conditioning: true
|
| 35 |
+
policy:
|
| 36 |
+
backbone:
|
| 37 |
+
model_name: openai/clip-vit-base-patch32
|
| 38 |
+
hidden_dim: 512
|
| 39 |
+
max_text_tokens: 32
|
| 40 |
+
freeze_backbone: true
|
| 41 |
+
gradient_checkpointing: false
|
| 42 |
+
use_dummy_backbone: false
|
| 43 |
+
fusion:
|
| 44 |
+
hidden_dim: 512
|
| 45 |
+
num_cameras: 3
|
| 46 |
+
num_layers: 4
|
| 47 |
+
num_heads: 8
|
| 48 |
+
ff_dim: 2048
|
| 49 |
+
dropout: 0.1
|
| 50 |
+
proprio_dim: 32
|
| 51 |
+
proprio_tokens: 1
|
| 52 |
+
decoder:
|
| 53 |
+
hidden_dim: 512
|
| 54 |
+
num_heads: 8
|
| 55 |
+
num_layers: 4
|
| 56 |
+
ff_dim: 2048
|
| 57 |
+
dropout: 0.1
|
| 58 |
+
chunk_size: 8
|
| 59 |
+
action_dim: 14
|
| 60 |
+
num_candidates: 8
|
| 61 |
+
reveal_head:
|
| 62 |
+
hidden_dim: 512
|
| 63 |
+
num_support_modes: 3
|
| 64 |
+
num_approach_templates: 32
|
| 65 |
+
rollout_horizon: 5
|
| 66 |
+
belief_map_size: 32
|
| 67 |
+
predict_belief_map: true
|
| 68 |
+
world_model:
|
| 69 |
+
hidden_dim: 512
|
| 70 |
+
action_dim: 14
|
| 71 |
+
num_support_modes: 3
|
| 72 |
+
num_approach_templates: 32
|
| 73 |
+
rollout_horizon: 5
|
| 74 |
+
planner:
|
| 75 |
+
num_candidates: 8
|
| 76 |
+
corridor_weight: 1.0
|
| 77 |
+
persistence_weight: 0.65
|
| 78 |
+
proposal_weight: 0.35
|
| 79 |
+
disturbance_weight: 0.8
|
| 80 |
+
reocclusion_weight: 0.6
|
| 81 |
+
visibility_weight: 0.35
|
| 82 |
+
loss_weights:
|
| 83 |
+
action: 1.0
|
| 84 |
+
support_mode: 0.15
|
| 85 |
+
corridor: 0.2
|
| 86 |
+
persistence: 0.1
|
| 87 |
+
disturbance: 0.1
|
| 88 |
+
world_model: 0.2
|
| 89 |
+
belief: 0.05
|
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/metrics.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.012311161635931172,
|
| 6 |
+
"total": 0.012311161635931172,
|
| 7 |
+
"world_model": 0.0
|
| 8 |
+
},
|
| 9 |
+
"val": {
|
| 10 |
+
"action": 0.00556847607881269,
|
| 11 |
+
"total": 0.00556847607881269,
|
| 12 |
+
"world_model": 0.0
|
| 13 |
+
}
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"epoch": 1,
|
| 17 |
+
"train": {
|
| 18 |
+
"action": 0.0070935887447924045,
|
| 19 |
+
"total": 0.0070935887447924045,
|
| 20 |
+
"world_model": 0.0
|
| 21 |
+
},
|
| 22 |
+
"val": {
|
| 23 |
+
"action": 0.004233352240750238,
|
| 24 |
+
"total": 0.004233352240750238,
|
| 25 |
+
"world_model": 0.0
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
]
|
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/summary.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"experiment_name": "rlbench_subset3_reveal_state_clip",
|
| 3 |
+
"device": "cuda",
|
| 4 |
+
"best_checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/checkpoint_best.pt",
|
| 5 |
+
"final_train_total": 0.0070935887447924045,
|
| 6 |
+
"final_val_total": 0.004233352240750238,
|
| 7 |
+
"train_dataset": {
|
| 8 |
+
"dataset_root": "/workspace/data/rlbench2",
|
| 9 |
+
"tasks": [
|
| 10 |
+
"bimanual_lift_ball",
|
| 11 |
+
"bimanual_push_box",
|
| 12 |
+
"bimanual_dual_push_buttons"
|
| 13 |
+
],
|
| 14 |
+
"episode_indices": [
|
| 15 |
+
0
|
| 16 |
+
],
|
| 17 |
+
"num_episodes": 3,
|
| 18 |
+
"num_samples": 381,
|
| 19 |
+
"resolution": 224,
|
| 20 |
+
"chunk_size": 8,
|
| 21 |
+
"proprio_dim": 32
|
| 22 |
+
},
|
| 23 |
+
"val_dataset": {
|
| 24 |
+
"dataset_root": "/workspace/data/rlbench2",
|
| 25 |
+
"tasks": [
|
| 26 |
+
"bimanual_lift_ball",
|
| 27 |
+
"bimanual_push_box",
|
| 28 |
+
"bimanual_dual_push_buttons"
|
| 29 |
+
],
|
| 30 |
+
"episode_indices": [
|
| 31 |
+
1
|
| 32 |
+
],
|
| 33 |
+
"num_episodes": 3,
|
| 34 |
+
"num_samples": 374,
|
| 35 |
+
"resolution": 224,
|
| 36 |
+
"chunk_size": 8,
|
| 37 |
+
"proprio_dim": 32
|
| 38 |
+
},
|
| 39 |
+
"init_info": {
|
| 40 |
+
"path": "/workspace/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt",
|
| 41 |
+
"missing_keys": [],
|
| 42 |
+
"unexpected_keys": []
|
| 43 |
+
}
|
| 44 |
+
}
|
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/config_resolved.yaml
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: rlbench_subset3_reveal_state_dummy
|
| 2 |
+
output_dir: /workspace/outputs/rlbench_custom
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 7
|
| 5 |
+
init_checkpoint: /workspace/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
|
| 6 |
+
init_strict: false
|
| 7 |
+
data:
|
| 8 |
+
dataset_root: /workspace/data/rlbench2
|
| 9 |
+
tasks:
|
| 10 |
+
- bimanual_lift_ball
|
| 11 |
+
- bimanual_push_box
|
| 12 |
+
- bimanual_dual_push_buttons
|
| 13 |
+
train_episodes:
|
| 14 |
+
- 0
|
| 15 |
+
val_episodes:
|
| 16 |
+
- 1
|
| 17 |
+
resolution: 224
|
| 18 |
+
chunk_horizon: 8
|
| 19 |
+
proprio_dim: 32
|
| 20 |
+
optim:
|
| 21 |
+
epochs: 2
|
| 22 |
+
batch_size: 4
|
| 23 |
+
num_workers: 0
|
| 24 |
+
lr: 0.0005
|
| 25 |
+
weight_decay: 0.0001
|
| 26 |
+
trainer:
|
| 27 |
+
policy_type: reveal_state
|
| 28 |
+
use_bf16: true
|
| 29 |
+
grad_clip_norm: 1.0
|
| 30 |
+
freeze_backbone: true
|
| 31 |
+
gradient_checkpointing: false
|
| 32 |
+
plan_during_train: false
|
| 33 |
+
plan_during_eval: false
|
| 34 |
+
support_mode_conditioning: true
|
| 35 |
+
policy:
|
| 36 |
+
backbone:
|
| 37 |
+
model_name: openai/clip-vit-base-patch32
|
| 38 |
+
hidden_dim: 128
|
| 39 |
+
max_text_tokens: 32
|
| 40 |
+
freeze_backbone: true
|
| 41 |
+
gradient_checkpointing: false
|
| 42 |
+
use_dummy_backbone: true
|
| 43 |
+
fusion:
|
| 44 |
+
hidden_dim: 128
|
| 45 |
+
num_cameras: 3
|
| 46 |
+
num_layers: 2
|
| 47 |
+
num_heads: 4
|
| 48 |
+
ff_dim: 256
|
| 49 |
+
dropout: 0.1
|
| 50 |
+
proprio_dim: 32
|
| 51 |
+
proprio_tokens: 1
|
| 52 |
+
decoder:
|
| 53 |
+
hidden_dim: 128
|
| 54 |
+
num_heads: 4
|
| 55 |
+
num_layers: 2
|
| 56 |
+
ff_dim: 256
|
| 57 |
+
dropout: 0.1
|
| 58 |
+
chunk_size: 8
|
| 59 |
+
action_dim: 14
|
| 60 |
+
num_candidates: 8
|
| 61 |
+
reveal_head:
|
| 62 |
+
hidden_dim: 128
|
| 63 |
+
num_support_modes: 3
|
| 64 |
+
num_approach_templates: 32
|
| 65 |
+
rollout_horizon: 5
|
| 66 |
+
belief_map_size: 32
|
| 67 |
+
predict_belief_map: true
|
| 68 |
+
world_model:
|
| 69 |
+
hidden_dim: 128
|
| 70 |
+
action_dim: 14
|
| 71 |
+
num_support_modes: 3
|
| 72 |
+
num_approach_templates: 32
|
| 73 |
+
rollout_horizon: 5
|
| 74 |
+
planner:
|
| 75 |
+
num_candidates: 8
|
| 76 |
+
corridor_weight: 1.0
|
| 77 |
+
persistence_weight: 0.65
|
| 78 |
+
proposal_weight: 0.35
|
| 79 |
+
disturbance_weight: 0.8
|
| 80 |
+
reocclusion_weight: 0.6
|
| 81 |
+
visibility_weight: 0.35
|
| 82 |
+
loss_weights:
|
| 83 |
+
action: 1.0
|
| 84 |
+
support_mode: 0.15
|
| 85 |
+
corridor: 0.2
|
| 86 |
+
persistence: 0.1
|
| 87 |
+
disturbance: 0.1
|
| 88 |
+
world_model: 0.2
|
| 89 |
+
belief: 0.05
|
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/metrics.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.015062082646181807,
|
| 6 |
+
"total": 0.015062082646181807,
|
| 7 |
+
"world_model": 0.0
|
| 8 |
+
},
|
| 9 |
+
"val": {
|
| 10 |
+
"action": 0.008003641142846738,
|
| 11 |
+
"total": 0.008003641142846738,
|
| 12 |
+
"world_model": 0.0
|
| 13 |
+
}
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"epoch": 1,
|
| 17 |
+
"train": {
|
| 18 |
+
"action": 0.007828686845944807,
|
| 19 |
+
"total": 0.007828686845944807,
|
| 20 |
+
"world_model": 0.0
|
| 21 |
+
},
|
| 22 |
+
"val": {
|
| 23 |
+
"action": 0.0091639062995958,
|
| 24 |
+
"total": 0.0091639062995958,
|
| 25 |
+
"world_model": 0.0
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
]
|
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/summary.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"experiment_name": "rlbench_subset3_reveal_state_dummy",
|
| 3 |
+
"device": "cuda",
|
| 4 |
+
"best_checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/checkpoint_best.pt",
|
| 5 |
+
"final_train_total": 0.007828686845944807,
|
| 6 |
+
"final_val_total": 0.0091639062995958,
|
| 7 |
+
"train_dataset": {
|
| 8 |
+
"dataset_root": "/workspace/data/rlbench2",
|
| 9 |
+
"tasks": [
|
| 10 |
+
"bimanual_lift_ball",
|
| 11 |
+
"bimanual_push_box",
|
| 12 |
+
"bimanual_dual_push_buttons"
|
| 13 |
+
],
|
| 14 |
+
"episode_indices": [
|
| 15 |
+
0
|
| 16 |
+
],
|
| 17 |
+
"num_episodes": 3,
|
| 18 |
+
"num_samples": 381,
|
| 19 |
+
"resolution": 224,
|
| 20 |
+
"chunk_size": 8,
|
| 21 |
+
"proprio_dim": 32
|
| 22 |
+
},
|
| 23 |
+
"val_dataset": {
|
| 24 |
+
"dataset_root": "/workspace/data/rlbench2",
|
| 25 |
+
"tasks": [
|
| 26 |
+
"bimanual_lift_ball",
|
| 27 |
+
"bimanual_push_box",
|
| 28 |
+
"bimanual_dual_push_buttons"
|
| 29 |
+
],
|
| 30 |
+
"episode_indices": [
|
| 31 |
+
1
|
| 32 |
+
],
|
| 33 |
+
"num_episodes": 3,
|
| 34 |
+
"num_samples": 374,
|
| 35 |
+
"resolution": 224,
|
| 36 |
+
"chunk_size": 8,
|
| 37 |
+
"proprio_dim": 32
|
| 38 |
+
},
|
| 39 |
+
"init_info": {
|
| 40 |
+
"path": "/workspace/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt",
|
| 41 |
+
"missing_keys": [],
|
| 42 |
+
"unexpected_keys": []
|
| 43 |
+
}
|
| 44 |
+
}
|
artifacts/reports/reveal_eval/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backbone": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 1.0,
|
| 5 |
+
"bag_proxy": 1.0,
|
| 6 |
+
"cloth_proxy": 1.0
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 1.0,
|
| 9 |
+
"visibility_integral": 1.7894555413060718,
|
| 10 |
+
"corridor_availability": 0.7018518588609166,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 0.0,
|
| 13 |
+
"disturbance_cost": 0.1193024102701909
|
| 14 |
+
},
|
| 15 |
+
"reveal": {
|
| 16 |
+
"per_task_success": {
|
| 17 |
+
"foliage_proxy": 0.9583333333333334,
|
| 18 |
+
"bag_proxy": 0.9166666666666666,
|
| 19 |
+
"cloth_proxy": 1.0
|
| 20 |
+
},
|
| 21 |
+
"mean_success": 0.9583333333333334,
|
| 22 |
+
"visibility_integral": 6.966822463605139,
|
| 23 |
+
"corridor_availability": 0.7799575842089124,
|
| 24 |
+
"reocclusion_rate": 0.005997474747474748,
|
| 25 |
+
"persistence_horizon_mae": 1.2541997782345518,
|
| 26 |
+
"disturbance_cost": 0.2107134228054848
|
| 27 |
+
}
|
| 28 |
+
}
|
artifacts/reports/reveal_eval/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## backbone
|
| 4 |
+
- checkpoint: /workspace/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
|
| 5 |
+
- mean_success: 1.000
|
| 6 |
+
- visibility_integral: 1.789
|
| 7 |
+
- corridor_availability: 0.702
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 0.000
|
| 10 |
+
- disturbance_cost: 0.119
|
| 11 |
+
- foliage_proxy_success: 1.000
|
| 12 |
+
- bag_proxy_success: 1.000
|
| 13 |
+
- cloth_proxy_success: 1.000
|
| 14 |
+
|
| 15 |
+
## reveal
|
| 16 |
+
- checkpoint: /workspace/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
|
| 17 |
+
- mean_success: 0.958
|
| 18 |
+
- visibility_integral: 6.967
|
| 19 |
+
- corridor_availability: 0.780
|
| 20 |
+
- reocclusion_rate: 0.006
|
| 21 |
+
- persistence_horizon_mae: 1.254
|
| 22 |
+
- disturbance_cost: 0.211
|
| 23 |
+
- foliage_proxy_success: 0.958
|
| 24 |
+
- bag_proxy_success: 0.917
|
| 25 |
+
- cloth_proxy_success: 1.000
|
artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/checkpoint_best.pt",
|
| 3 |
+
"plan": false,
|
| 4 |
+
"support_mode_conditioning": true,
|
| 5 |
+
"episodes_per_task": 1,
|
| 6 |
+
"episode_length": 5,
|
| 7 |
+
"resolution": 224,
|
| 8 |
+
"tasks": {
|
| 9 |
+
"bimanual_lift_ball": {
|
| 10 |
+
"successes": [
|
| 11 |
+
0.0
|
| 12 |
+
],
|
| 13 |
+
"returns": [
|
| 14 |
+
0.0
|
| 15 |
+
],
|
| 16 |
+
"mean_success": 0.0,
|
| 17 |
+
"mean_return": 0.0
|
| 18 |
+
},
|
| 19 |
+
"bimanual_push_box": {
|
| 20 |
+
"successes": [
|
| 21 |
+
0.0
|
| 22 |
+
],
|
| 23 |
+
"returns": [
|
| 24 |
+
0.0
|
| 25 |
+
],
|
| 26 |
+
"mean_success": 0.0,
|
| 27 |
+
"mean_return": 0.0
|
| 28 |
+
},
|
| 29 |
+
"bimanual_dual_push_buttons": {
|
| 30 |
+
"successes": [
|
| 31 |
+
0.0
|
| 32 |
+
],
|
| 33 |
+
"returns": [
|
| 34 |
+
0.0
|
| 35 |
+
],
|
| 36 |
+
"mean_success": 0.0,
|
| 37 |
+
"mean_return": 0.0
|
| 38 |
+
}
|
| 39 |
+
},
|
| 40 |
+
"mean_success": 0.0
|
| 41 |
+
}
|
artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# RLBench Rollout Eval
|
| 2 |
+
|
| 3 |
+
- Checkpoint: `/workspace/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/checkpoint_best.pt`
|
| 4 |
+
- Plan enabled: `False`
|
| 5 |
+
- Support-mode conditioning: `True`
|
| 6 |
+
- Mean success: `0.000`
|
| 7 |
+
|
| 8 |
+
## Per-task
|
| 9 |
+
|
| 10 |
+
- `bimanual_lift_ball`: mean_success=0.000, returns=[0.0]
|
| 11 |
+
- `bimanual_push_box`: mean_success=0.000, returns=[0.0]
|
| 12 |
+
- `bimanual_dual_push_buttons`: mean_success=0.000, returns=[0.0]
|
artifacts/reports/rlbench_custom_clip/reveal_state_rollout_noplan/rollout_eval.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/checkpoint_best.pt",
|
| 3 |
+
"plan": false,
|
| 4 |
+
"support_mode_conditioning": true,
|
| 5 |
+
"episodes_per_task": 1,
|
| 6 |
+
"episode_length": 5,
|
| 7 |
+
"resolution": 224,
|
| 8 |
+
"tasks": {
|
| 9 |
+
"bimanual_lift_ball": {
|
| 10 |
+
"successes": [
|
| 11 |
+
0.0
|
| 12 |
+
],
|
| 13 |
+
"returns": [
|
| 14 |
+
0.0
|
| 15 |
+
],
|
| 16 |
+
"mean_success": 0.0,
|
| 17 |
+
"mean_return": 0.0
|
| 18 |
+
},
|
| 19 |
+
"bimanual_push_box": {
|
| 20 |
+
"successes": [
|
| 21 |
+
0.0
|
| 22 |
+
],
|
| 23 |
+
"returns": [
|
| 24 |
+
0.0
|
| 25 |
+
],
|
| 26 |
+
"mean_success": 0.0,
|
| 27 |
+
"mean_return": 0.0
|
| 28 |
+
},
|
| 29 |
+
"bimanual_dual_push_buttons": {
|
| 30 |
+
"successes": [
|
| 31 |
+
0.0
|
| 32 |
+
],
|
| 33 |
+
"returns": [
|
| 34 |
+
0.0
|
| 35 |
+
],
|
| 36 |
+
"mean_success": 0.0,
|
| 37 |
+
"mean_return": 0.0
|
| 38 |
+
}
|
| 39 |
+
},
|
| 40 |
+
"mean_success": 0.0
|
| 41 |
+
}
|
code/reveal_vla_bimanual/.gitignore
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
.DS_Store
|
| 4 |
+
.mypy_cache/
|
| 5 |
+
.pytest_cache/
|
| 6 |
+
.ruff_cache/
|
| 7 |
+
.venv/
|
| 8 |
+
artifacts/
|
| 9 |
+
outputs/
|
| 10 |
+
logs/
|
| 11 |
+
wandb/
|
| 12 |
+
reports/
|
code/reveal_vla_bimanual/README.md
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# reveal_vla_bimanual
|
| 2 |
+
|
| 3 |
+
Simulation-first prototype for bimanual reveal-and-retrieve under elastic occlusion.
|
| 4 |
+
|
| 5 |
+
This repo is structured around five top-level modules:
|
| 6 |
+
|
| 7 |
+
- `sim_rlbench/`: RLBench2 / PerAct2 wrappers, dataset hooks, camera setup, and benchmark evaluation helpers.
|
| 8 |
+
- `sim_reveal/`: reveal-proxy environments, scripted teachers, and privileged label extraction.
|
| 9 |
+
- `models/`: shared backbone wrappers, multi-view fusion, bimanual decoder, reveal-state head, world model, and planner.
|
| 10 |
+
- `train/`: trainers, losses, checkpointing, and Hydra/YAML configs.
|
| 11 |
+
- `eval/`: benchmark scripts, ablations, metrics, plots, and report generation.
|
| 12 |
+
|
| 13 |
+
Current bootstrap priorities:
|
| 14 |
+
|
| 15 |
+
1. Reproduce the RLBench2 / PerAct2 stack with a fixed 3-camera interface.
|
| 16 |
+
2. Stand up a backbone-only 3-camera policy in the same training/eval harness.
|
| 17 |
+
3. Add reveal-state supervision and short-horizon planning for synthetic reveal proxies.
|
| 18 |
+
|
| 19 |
+
Upstream dependencies are kept in `/workspace/third_party` and pinned in `docs/upstream_pins.md`.
|
| 20 |
+
|
| 21 |
+
## RLBench env A
|
| 22 |
+
|
| 23 |
+
The RLBench / PerAct2 stack is pinned to Python 3.10 and lives in `/workspace/envs/rlbench`.
|
| 24 |
+
|
| 25 |
+
Bring it up with:
|
| 26 |
+
|
| 27 |
+
```bash
|
| 28 |
+
/workspace/reveal_vla_bimanual/scripts/setup_env_a_rlbench.sh
|
| 29 |
+
/workspace/reveal_vla_bimanual/scripts/setup_rlbench_headless_x.sh
|
| 30 |
+
/workspace/reveal_vla_bimanual/scripts/start_rlbench_x.sh
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
Verify GPU GL on the headless display:
|
| 34 |
+
|
| 35 |
+
```bash
|
| 36 |
+
DISPLAY=:99 glxinfo -B
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
Run the RLBench launch/reset/step smoke test:
|
| 40 |
+
|
| 41 |
+
```bash
|
| 42 |
+
env \
|
| 43 |
+
DISPLAY=:99 \
|
| 44 |
+
XDG_RUNTIME_DIR=/tmp/runtime-root \
|
| 45 |
+
COPPELIASIM_ROOT=/workspace/assets/coppeliasim_v4_1_0 \
|
| 46 |
+
LD_LIBRARY_PATH=/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu:/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu/nvidia:/workspace/assets/coppeliasim_v4_1_0 \
|
| 47 |
+
QT_QPA_PLATFORM_PLUGIN_PATH=/workspace/assets/coppeliasim_v4_1_0 \
|
| 48 |
+
/workspace/.tools/micromamba/bin/micromamba run \
|
| 49 |
+
-r /workspace/.micromamba \
|
| 50 |
+
-p /workspace/envs/rlbench \
|
| 51 |
+
python -m sim_rlbench.launch_smoke --headless
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
The working benchmark interface is fixed to three cameras only:
|
| 55 |
+
|
| 56 |
+
- `front`
|
| 57 |
+
- `wrist_left`
|
| 58 |
+
- `wrist_right`
|
| 59 |
+
|
| 60 |
+
The smoke test covers launch, bimanual task reset, canonical observation extraction, and one bimanual action step in `headless=True`, which is the same mode used by the upstream PerAct2-style training stack.
|
| 61 |
+
|
| 62 |
+
Generate the PerAct2-compatible train command for the fixed 3-camera interface with:
|
| 63 |
+
|
| 64 |
+
```bash
|
| 65 |
+
micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
|
| 66 |
+
python -m sim_rlbench.smoke_test --print-train-command
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
Download the published PerAct2 demos into `/workspace/data/rlbench2` with checksum verification:
|
| 70 |
+
|
| 71 |
+
```bash
|
| 72 |
+
micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
|
| 73 |
+
python -m sim_rlbench.dataset_download --resolution 256 --splits train
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
If you want the archives unpacked directly into the demo root expected by RLBench, add `--extract`:
|
| 77 |
+
|
| 78 |
+
```bash
|
| 79 |
+
apt-get install -y squashfs-tools
|
| 80 |
+
micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
|
| 81 |
+
python -m sim_rlbench.dataset_download --resolution 256 --splits train --extract
|
| 82 |
+
```
|
code/reveal_vla_bimanual/docs/upstream_pins.md
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Upstream Pins
|
| 2 |
+
|
| 3 |
+
Pinned on 2026-03-22 in `/workspace/third_party`.
|
| 4 |
+
|
| 5 |
+
Mandatory benchmark stack:
|
| 6 |
+
|
| 7 |
+
- `peract_bimanual`: `bb0232a6ba3fe116566e9568f0c7af980ed6703d`
|
| 8 |
+
- `RLBench`: `8af748c51287989294e00c9c670e3330a0e35ed5`
|
| 9 |
+
- `PyRep`: `b8bd1d7a3182adcd570d001649c0849047ebf197`
|
| 10 |
+
- `YARR`: `6822ff78602c77878b27d4cfe759ce029c67bffb`
|
| 11 |
+
|
| 12 |
+
Optional published baseline:
|
| 13 |
+
|
| 14 |
+
- `AnyBimanual`: `76024e48b0e9489101459e85bc909c126ec581b4`
|
| 15 |
+
|
| 16 |
+
Reveal-proxy stack candidate:
|
| 17 |
+
|
| 18 |
+
- `IsaacLab`: `v2.3.1` was cloned for inspection, but it targets Python 3.11 and Isaac Sim 5.x.
|
| 19 |
+
- For the frozen project scope of Python 3.10 on Ubuntu 22.04, env B should stay on an Isaac Sim 4.5-compatible Isaac Lab release instead of the latest branch.
|
| 20 |
+
|
| 21 |
+
Notes:
|
| 22 |
+
|
| 23 |
+
- `peract_bimanual` defaults to 6 cameras and older Python/Torch pins. This repo overrides camera selection and environment creation rather than running the upstream install scripts unchanged.
|
| 24 |
+
- RLBench headless execution on this RunPod host will require an X server setup because the base image does not currently ship `X`, `xvfb`, or `nvidia-xconfig`.
|
code/reveal_vla_bimanual/docs/xorg.rtx6000.conf
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Section "ServerLayout"
|
| 2 |
+
Identifier "Layout0"
|
| 3 |
+
Screen 0 "Screen0"
|
| 4 |
+
EndSection
|
| 5 |
+
|
| 6 |
+
Section "Monitor"
|
| 7 |
+
Identifier "Monitor0"
|
| 8 |
+
VendorName "Unknown"
|
| 9 |
+
ModelName "Unknown"
|
| 10 |
+
Option "DPMS"
|
| 11 |
+
EndSection
|
| 12 |
+
|
| 13 |
+
Section "Device"
|
| 14 |
+
Identifier "Device0"
|
| 15 |
+
Driver "nvidia"
|
| 16 |
+
VendorName "NVIDIA Corporation"
|
| 17 |
+
BusID "PCI:65:0:0"
|
| 18 |
+
Option "AllowEmptyInitialConfiguration" "True"
|
| 19 |
+
Option "UseDisplayDevice" "None"
|
| 20 |
+
Option "ProbeAllGpus" "False"
|
| 21 |
+
EndSection
|
| 22 |
+
|
| 23 |
+
Section "Screen"
|
| 24 |
+
Identifier "Screen0"
|
| 25 |
+
Device "Device0"
|
| 26 |
+
Monitor "Monitor0"
|
| 27 |
+
DefaultDepth 24
|
| 28 |
+
Option "AllowEmptyInitialConfiguration" "True"
|
| 29 |
+
SubSection "Display"
|
| 30 |
+
Depth 24
|
| 31 |
+
Virtual 1280 1024
|
| 32 |
+
EndSubSection
|
| 33 |
+
EndSection
|
code/reveal_vla_bimanual/envs/reveal310.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: reveal310
|
| 2 |
+
channels:
|
| 3 |
+
- pytorch
|
| 4 |
+
- nvidia
|
| 5 |
+
- conda-forge
|
| 6 |
+
dependencies:
|
| 7 |
+
- python=3.10
|
| 8 |
+
- pip
|
| 9 |
+
- git
|
| 10 |
+
- cmake
|
| 11 |
+
- ninja
|
| 12 |
+
- make
|
| 13 |
+
- gxx_linux-64
|
| 14 |
+
- pkg-config
|
| 15 |
+
- numpy=1.26.*
|
| 16 |
+
- pandas=2.2.*
|
| 17 |
+
- scipy=1.13.*
|
| 18 |
+
- matplotlib=3.8.*
|
| 19 |
+
- pyyaml=6.*
|
| 20 |
+
- imageio
|
| 21 |
+
- trimesh
|
| 22 |
+
- networkx
|
| 23 |
+
- psutil
|
| 24 |
+
- tqdm
|
| 25 |
+
- pytorch=2.3.1
|
| 26 |
+
- torchvision=0.18.1
|
| 27 |
+
- torchaudio=2.3.1
|
| 28 |
+
- pytorch-cuda=12.1
|
| 29 |
+
- pip:
|
| 30 |
+
- accelerate==0.31.0
|
| 31 |
+
- einops==0.8.0
|
| 32 |
+
- hydra-core==1.3.2
|
| 33 |
+
- omegaconf==2.3.0
|
| 34 |
+
- safetensors==0.4.3
|
| 35 |
+
- tensorboard==2.16.2
|
| 36 |
+
- timm==1.0.7
|
| 37 |
+
- transformers==4.41.2
|
| 38 |
+
- wandb==0.18.0
|
code/reveal_vla_bimanual/envs/rlbench310.yaml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: rlbench310
|
| 2 |
+
channels:
|
| 3 |
+
- pytorch
|
| 4 |
+
- nvidia
|
| 5 |
+
- conda-forge
|
| 6 |
+
dependencies:
|
| 7 |
+
- python=3.10
|
| 8 |
+
- pip
|
| 9 |
+
- git
|
| 10 |
+
- cmake
|
| 11 |
+
- cffi
|
| 12 |
+
- ninja
|
| 13 |
+
- make
|
| 14 |
+
- gxx_linux-64
|
| 15 |
+
- pkg-config
|
| 16 |
+
- numpy=1.26.*
|
| 17 |
+
- pandas=2.2.*
|
| 18 |
+
- scipy=1.13.*
|
| 19 |
+
- matplotlib=3.8.*
|
| 20 |
+
- pyyaml=6.*
|
| 21 |
+
- h5py
|
| 22 |
+
- imageio
|
| 23 |
+
- pillow
|
| 24 |
+
- psutil
|
| 25 |
+
- tqdm
|
| 26 |
+
- trimesh
|
| 27 |
+
- pytorch=2.3.1
|
| 28 |
+
- torchvision=0.18.1
|
| 29 |
+
- torchaudio=2.3.1
|
| 30 |
+
- pytorch-cuda=12.1
|
| 31 |
+
- pip:
|
| 32 |
+
- accelerate==0.31.0
|
| 33 |
+
- absl-py==2.1.0
|
| 34 |
+
- clip @ git+https://github.com/openai/CLIP.git
|
| 35 |
+
- einops==0.8.0
|
| 36 |
+
- ftfy==6.2.0
|
| 37 |
+
- gym==0.26.2
|
| 38 |
+
- hydra-core==1.3.2
|
| 39 |
+
- natsort==8.4.0
|
| 40 |
+
- omegaconf==2.3.0
|
| 41 |
+
- perceiver-pytorch==0.8.8
|
| 42 |
+
- pyrender==0.1.45
|
| 43 |
+
- pytorch-lamb==1.0.0
|
| 44 |
+
- regex==2024.5.15
|
| 45 |
+
- rich==13.9.4
|
| 46 |
+
- rich-click==1.8.9
|
| 47 |
+
- safetensors==0.4.3
|
| 48 |
+
- tensorboard==2.16.2
|
| 49 |
+
- transformers==4.41.2
|
| 50 |
+
- wandb==0.18.0
|
code/reveal_vla_bimanual/eval/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from eval.metrics import BenchmarkMetrics
|
| 2 |
+
|
| 3 |
+
__all__ = ["BenchmarkMetrics"]
|
code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (215 Bytes). View file
|
|
|
code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (249 Bytes). View file
|
|
|
code/reveal_vla_bimanual/eval/__pycache__/ablations.cpython-310.pyc
ADDED
|
Binary file (344 Bytes). View file
|
|
|
code/reveal_vla_bimanual/eval/__pycache__/ablations.cpython-311.pyc
ADDED
|
Binary file (408 Bytes). View file
|
|
|
code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-310.pyc
ADDED
|
Binary file (2.03 kB). View file
|
|
|
code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-311.pyc
ADDED
|
Binary file (3.58 kB). View file
|
|
|
code/reveal_vla_bimanual/eval/__pycache__/report.cpython-310.pyc
ADDED
|
Binary file (1.71 kB). View file
|
|
|
code/reveal_vla_bimanual/eval/__pycache__/report.cpython-311.pyc
ADDED
|
Binary file (3.29 kB). View file
|
|
|
code/reveal_vla_bimanual/eval/__pycache__/run_ablations.cpython-310.pyc
ADDED
|
Binary file (2.12 kB). View file
|
|
|
code/reveal_vla_bimanual/eval/__pycache__/run_ablations.cpython-311.pyc
ADDED
|
Binary file (3.77 kB). View file
|
|
|
code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-310.pyc
ADDED
|
Binary file (7.33 kB). View file
|
|
|
code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-311.pyc
ADDED
|
Binary file (14.2 kB). View file
|
|
|
code/reveal_vla_bimanual/eval/__pycache__/run_rlbench_rollout_eval.cpython-310.pyc
ADDED
|
Binary file (5.96 kB). View file
|
|
|
code/reveal_vla_bimanual/eval/ablations.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MANDATORY_ABLATIONS: tuple[str, ...] = (
|
| 2 |
+
"no_reveal_state_head",
|
| 3 |
+
"no_world_model",
|
| 4 |
+
"no_planner_reranking",
|
| 5 |
+
"no_support_mode_conditioning",
|
| 6 |
+
"no_wrist_cameras",
|
| 7 |
+
"no_global_camera",
|
| 8 |
+
)
|
code/reveal_vla_bimanual/eval/metrics.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@dataclass
|
| 9 |
+
class BenchmarkMetrics:
|
| 10 |
+
per_task_success: dict[str, float]
|
| 11 |
+
mean_success: float
|
| 12 |
+
visibility_integral: float | None = None
|
| 13 |
+
corridor_availability: float | None = None
|
| 14 |
+
reocclusion_rate: float | None = None
|
| 15 |
+
persistence_horizon_mae: float | None = None
|
| 16 |
+
disturbance_cost: float | None = None
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def mean_success(per_task_success: dict[str, float]) -> float:
|
| 20 |
+
if not per_task_success:
|
| 21 |
+
return 0.0
|
| 22 |
+
return float(np.mean(list(per_task_success.values())))
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def visibility_integral(curve: np.ndarray) -> float:
|
| 26 |
+
curve = np.asarray(curve, dtype=np.float32)
|
| 27 |
+
return float(curve.sum())
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def corridor_availability(corridor_open: np.ndarray) -> float:
|
| 31 |
+
corridor_open = np.asarray(corridor_open, dtype=np.float32)
|
| 32 |
+
return float(corridor_open.mean())
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def reocclusion_rate(corridor_open: np.ndarray) -> float:
|
| 36 |
+
corridor_open = np.asarray(corridor_open, dtype=np.float32)
|
| 37 |
+
if corridor_open.size < 2:
|
| 38 |
+
return 0.0
|
| 39 |
+
return float(np.logical_and(corridor_open[:-1] > 0.5, corridor_open[1:] <= 0.5).mean())
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def persistence_horizon_mae(prediction: np.ndarray, target: np.ndarray) -> float:
|
| 43 |
+
prediction = np.asarray(prediction, dtype=np.float32)
|
| 44 |
+
target = np.asarray(target, dtype=np.float32)
|
| 45 |
+
return float(np.abs(prediction - target).mean())
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def mean_disturbance_cost(values: np.ndarray) -> float:
|
| 49 |
+
values = np.asarray(values, dtype=np.float32)
|
| 50 |
+
if values.size == 0:
|
| 51 |
+
return 0.0
|
| 52 |
+
return float(values.mean())
|
code/reveal_vla_bimanual/eval/report.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
from eval.metrics import BenchmarkMetrics
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def write_markdown_report(
|
| 9 |
+
output_path: Path,
|
| 10 |
+
title: str,
|
| 11 |
+
metrics: BenchmarkMetrics,
|
| 12 |
+
hardware: str,
|
| 13 |
+
training_settings: dict[str, str],
|
| 14 |
+
published_reference: dict[str, float] | None = None,
|
| 15 |
+
) -> None:
|
| 16 |
+
lines = [f"# {title}", "", f"- Hardware: {hardware}"]
|
| 17 |
+
for key, value in training_settings.items():
|
| 18 |
+
lines.append(f"- {key}: {value}")
|
| 19 |
+
|
| 20 |
+
lines.extend(["", "## Success"])
|
| 21 |
+
for task, score in metrics.per_task_success.items():
|
| 22 |
+
lines.append(f"- {task}: {score:.3f}")
|
| 23 |
+
lines.append(f"- mean_success: {metrics.mean_success:.3f}")
|
| 24 |
+
|
| 25 |
+
if published_reference:
|
| 26 |
+
lines.extend(["", "## Published Reference"])
|
| 27 |
+
for task, score in published_reference.items():
|
| 28 |
+
lines.append(f"- {task}: {score:.3f}")
|
| 29 |
+
|
| 30 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 31 |
+
output_path.write_text("\n".join(lines), encoding="utf-8")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def write_comparison_report(
|
| 35 |
+
output_path: Path,
|
| 36 |
+
title: str,
|
| 37 |
+
sections: dict[str, dict[str, float | str]],
|
| 38 |
+
) -> None:
|
| 39 |
+
lines = [f"# {title}", ""]
|
| 40 |
+
for section_name, values in sections.items():
|
| 41 |
+
lines.append(f"## {section_name}")
|
| 42 |
+
for key, value in values.items():
|
| 43 |
+
if isinstance(value, float):
|
| 44 |
+
lines.append(f"- {key}: {value:.3f}")
|
| 45 |
+
else:
|
| 46 |
+
lines.append(f"- {key}: {value}")
|
| 47 |
+
lines.append("")
|
| 48 |
+
|
| 49 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 50 |
+
output_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")
|
code/reveal_vla_bimanual/eval/run_ablations.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
from eval.ablations import MANDATORY_ABLATIONS
|
| 8 |
+
from eval.report import write_comparison_report
|
| 9 |
+
from eval.run_reveal_benchmark import evaluate_model, load_model
|
| 10 |
+
from sim_reveal import available_proxy_names
|
| 11 |
+
|
| 12 |
+
import torch
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main() -> None:
|
| 16 |
+
parser = argparse.ArgumentParser()
|
| 17 |
+
parser.add_argument("--checkpoint", required=True)
|
| 18 |
+
parser.add_argument("--episodes", type=int, default=24)
|
| 19 |
+
parser.add_argument("--resolution", type=int, default=None)
|
| 20 |
+
parser.add_argument("--output-root", default="/workspace/reports/reveal_ablation")
|
| 21 |
+
parser.add_argument("--proxies", nargs="*", default=None)
|
| 22 |
+
args = parser.parse_args()
|
| 23 |
+
|
| 24 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 25 |
+
model, checkpoint = load_model(args.checkpoint, device=device)
|
| 26 |
+
resolution = int(args.resolution or checkpoint.get("data_resolution", 96))
|
| 27 |
+
proxies = list(args.proxies or available_proxy_names())
|
| 28 |
+
output_root = Path(args.output_root)
|
| 29 |
+
output_root.mkdir(parents=True, exist_ok=True)
|
| 30 |
+
|
| 31 |
+
sections = {}
|
| 32 |
+
raw = {}
|
| 33 |
+
for ablation in (None, *MANDATORY_ABLATIONS):
|
| 34 |
+
label = "full_model" if ablation is None else ablation
|
| 35 |
+
metrics = evaluate_model(
|
| 36 |
+
model=model,
|
| 37 |
+
device=device,
|
| 38 |
+
proxies=proxies,
|
| 39 |
+
episodes=args.episodes,
|
| 40 |
+
resolution=resolution,
|
| 41 |
+
ablation=ablation,
|
| 42 |
+
)
|
| 43 |
+
raw[label] = {
|
| 44 |
+
"per_task_success": metrics.per_task_success,
|
| 45 |
+
"mean_success": metrics.mean_success,
|
| 46 |
+
"visibility_integral": metrics.visibility_integral,
|
| 47 |
+
"corridor_availability": metrics.corridor_availability,
|
| 48 |
+
"reocclusion_rate": metrics.reocclusion_rate,
|
| 49 |
+
"persistence_horizon_mae": metrics.persistence_horizon_mae,
|
| 50 |
+
"disturbance_cost": metrics.disturbance_cost,
|
| 51 |
+
}
|
| 52 |
+
sections[label] = {
|
| 53 |
+
"mean_success": metrics.mean_success,
|
| 54 |
+
"visibility_integral": metrics.visibility_integral or 0.0,
|
| 55 |
+
"corridor_availability": metrics.corridor_availability or 0.0,
|
| 56 |
+
"reocclusion_rate": metrics.reocclusion_rate or 0.0,
|
| 57 |
+
"persistence_horizon_mae": metrics.persistence_horizon_mae or 0.0,
|
| 58 |
+
"disturbance_cost": metrics.disturbance_cost or 0.0,
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
json_path = output_root / "ablations.json"
|
| 62 |
+
json_path.write_text(json.dumps(raw, indent=2), encoding="utf-8")
|
| 63 |
+
write_comparison_report(output_root / "ablations.md", "Reveal Ablations", sections)
|
| 64 |
+
print(json.dumps({"output_json": str(json_path), "sections": sections}, indent=2))
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
if __name__ == "__main__":
|
| 68 |
+
main()
|
code/reveal_vla_bimanual/eval/run_reveal_benchmark.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
from dataclasses import asdict
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Any
|
| 8 |
+
|
| 9 |
+
import numpy as np
|
| 10 |
+
import torch
|
| 11 |
+
from torch import Tensor
|
| 12 |
+
|
| 13 |
+
from eval.metrics import (
|
| 14 |
+
BenchmarkMetrics,
|
| 15 |
+
corridor_availability,
|
| 16 |
+
mean_disturbance_cost,
|
| 17 |
+
mean_success,
|
| 18 |
+
persistence_horizon_mae,
|
| 19 |
+
reocclusion_rate,
|
| 20 |
+
visibility_integral,
|
| 21 |
+
)
|
| 22 |
+
from eval.report import write_comparison_report
|
| 23 |
+
from models.action_decoder import ChunkDecoderConfig
|
| 24 |
+
from models.backbones import FrozenVLBackboneConfig
|
| 25 |
+
from models.multiview_fusion import MultiViewFusionConfig
|
| 26 |
+
from models.planner import PlannerConfig
|
| 27 |
+
from models.policy import PolicyConfig
|
| 28 |
+
from models.reveal_head import RevealHeadConfig
|
| 29 |
+
from models.world_model import RevealWMConfig
|
| 30 |
+
from sim_reveal import available_proxy_names, make_proxy_env
|
| 31 |
+
from train.trainer import TrainerConfig, build_policy
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _policy_config_from_dict(cfg: dict[str, Any]) -> PolicyConfig:
|
| 35 |
+
return PolicyConfig(
|
| 36 |
+
backbone=FrozenVLBackboneConfig(**cfg["backbone"]),
|
| 37 |
+
fusion=MultiViewFusionConfig(**cfg["fusion"]),
|
| 38 |
+
decoder=ChunkDecoderConfig(**cfg["decoder"]),
|
| 39 |
+
reveal_head=RevealHeadConfig(**cfg["reveal_head"]),
|
| 40 |
+
world_model=RevealWMConfig(**cfg["world_model"]),
|
| 41 |
+
planner=PlannerConfig(**cfg["planner"]),
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _trainer_config_from_dict(cfg: dict[str, Any]) -> TrainerConfig:
|
| 46 |
+
return TrainerConfig(**cfg)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def load_model(checkpoint_path: str | Path, device: torch.device) -> tuple[torch.nn.Module, dict[str, Any]]:
|
| 50 |
+
checkpoint = torch.load(Path(checkpoint_path), map_location="cpu")
|
| 51 |
+
policy_config = _policy_config_from_dict(checkpoint["policy_config"])
|
| 52 |
+
trainer_config = _trainer_config_from_dict(checkpoint["trainer_config"])
|
| 53 |
+
model = build_policy(policy_config, trainer_config).to(device)
|
| 54 |
+
model.load_state_dict(checkpoint["state_dict"])
|
| 55 |
+
model.eval()
|
| 56 |
+
return model, checkpoint
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def _prepare_batch(observation: dict[str, Any], device: torch.device) -> dict[str, Any]:
|
| 60 |
+
images = torch.from_numpy(observation["images"]).permute(0, 3, 1, 2).unsqueeze(0).float() / 255.0
|
| 61 |
+
proprio = torch.from_numpy(observation["proprio"]).unsqueeze(0).float()
|
| 62 |
+
return {
|
| 63 |
+
"images": images.to(device),
|
| 64 |
+
"proprio": proprio.to(device),
|
| 65 |
+
"texts": [observation["text"]],
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _apply_camera_ablation(images: Tensor, ablation: str | None) -> Tensor:
|
| 70 |
+
images = images.clone()
|
| 71 |
+
if ablation == "no_wrist_cameras":
|
| 72 |
+
images[:, 1:] = 0.0
|
| 73 |
+
if ablation == "no_global_camera":
|
| 74 |
+
images[:, 0] = 0.0
|
| 75 |
+
return images
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def select_chunk(
|
| 79 |
+
model: torch.nn.Module,
|
| 80 |
+
batch: dict[str, Any],
|
| 81 |
+
ablation: str | None = None,
|
| 82 |
+
) -> tuple[Tensor, dict[str, Tensor]]:
|
| 83 |
+
images = _apply_camera_ablation(batch["images"], ablation)
|
| 84 |
+
forward_kwargs = {
|
| 85 |
+
"images": images,
|
| 86 |
+
"proprio": batch["proprio"],
|
| 87 |
+
"texts": batch["texts"],
|
| 88 |
+
}
|
| 89 |
+
if hasattr(model, "reveal_head"):
|
| 90 |
+
if ablation == "no_world_model":
|
| 91 |
+
outputs = model(**forward_kwargs, plan=False)
|
| 92 |
+
return outputs["action_mean"], outputs
|
| 93 |
+
outputs = model(
|
| 94 |
+
**forward_kwargs,
|
| 95 |
+
plan=True,
|
| 96 |
+
support_mode_conditioning=(ablation != "no_support_mode_conditioning"),
|
| 97 |
+
)
|
| 98 |
+
if ablation == "no_planner_reranking":
|
| 99 |
+
return outputs["candidate_chunks"][:, 0], outputs
|
| 100 |
+
if "planned_chunk" in outputs and ablation != "no_reveal_state_head":
|
| 101 |
+
return outputs["planned_chunk"], outputs
|
| 102 |
+
return outputs["action_mean"], outputs
|
| 103 |
+
outputs = model(**forward_kwargs)
|
| 104 |
+
return outputs["action_mean"], outputs
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def evaluate_model(
|
| 108 |
+
model: torch.nn.Module,
|
| 109 |
+
device: torch.device,
|
| 110 |
+
proxies: list[str],
|
| 111 |
+
episodes: int,
|
| 112 |
+
resolution: int,
|
| 113 |
+
ablation: str | None = None,
|
| 114 |
+
) -> BenchmarkMetrics:
|
| 115 |
+
per_task_success: dict[str, float] = {}
|
| 116 |
+
visibility_scores = []
|
| 117 |
+
corridor_scores = []
|
| 118 |
+
reocclusion_scores = []
|
| 119 |
+
persistence_errors = []
|
| 120 |
+
disturbance_scores = []
|
| 121 |
+
|
| 122 |
+
for proxy_offset, proxy_name in enumerate(proxies):
|
| 123 |
+
successes = []
|
| 124 |
+
for episode_idx in range(episodes):
|
| 125 |
+
env = make_proxy_env(
|
| 126 |
+
proxy_name=proxy_name,
|
| 127 |
+
resolution=resolution,
|
| 128 |
+
seed=proxy_offset * 10_000 + episode_idx,
|
| 129 |
+
)
|
| 130 |
+
observation, privileged_state = env.reset(seed=proxy_offset * 10_000 + episode_idx)
|
| 131 |
+
episode_visibility = [float(privileged_state["visibility"])]
|
| 132 |
+
episode_corridor = [float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any())]
|
| 133 |
+
episode_disturbance = [float(privileged_state["disturbance_cost"])]
|
| 134 |
+
done = False
|
| 135 |
+
while not done:
|
| 136 |
+
batch = _prepare_batch(observation, device=device)
|
| 137 |
+
with torch.no_grad():
|
| 138 |
+
chunk, outputs = select_chunk(model, batch, ablation=ablation)
|
| 139 |
+
action = chunk[0, 0].detach().cpu().numpy()
|
| 140 |
+
observation, _, terminated, truncated, privileged_state = env.step(action)
|
| 141 |
+
episode_visibility.append(float(privileged_state["visibility"]))
|
| 142 |
+
episode_corridor.append(float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any()))
|
| 143 |
+
episode_disturbance.append(float(privileged_state["disturbance_cost"]))
|
| 144 |
+
if "reveal_state" in outputs and ablation != "no_reveal_state_head":
|
| 145 |
+
persistence_errors.append(
|
| 146 |
+
persistence_horizon_mae(
|
| 147 |
+
outputs["reveal_state"]["persistence_horizon"][0].detach().cpu().numpy(),
|
| 148 |
+
privileged_state["persistence_horizon"],
|
| 149 |
+
)
|
| 150 |
+
)
|
| 151 |
+
done = bool(terminated or truncated)
|
| 152 |
+
successes.append(float(privileged_state["retrieval_success"]))
|
| 153 |
+
visibility_scores.append(visibility_integral(np.asarray(episode_visibility)))
|
| 154 |
+
corridor_scores.append(corridor_availability(np.asarray(episode_corridor)))
|
| 155 |
+
reocclusion_scores.append(reocclusion_rate(np.asarray(episode_corridor)))
|
| 156 |
+
disturbance_scores.append(mean_disturbance_cost(np.asarray(episode_disturbance)))
|
| 157 |
+
per_task_success[proxy_name] = float(np.mean(successes))
|
| 158 |
+
|
| 159 |
+
return BenchmarkMetrics(
|
| 160 |
+
per_task_success=per_task_success,
|
| 161 |
+
mean_success=mean_success(per_task_success),
|
| 162 |
+
visibility_integral=float(np.mean(visibility_scores)) if visibility_scores else None,
|
| 163 |
+
corridor_availability=float(np.mean(corridor_scores)) if corridor_scores else None,
|
| 164 |
+
reocclusion_rate=float(np.mean(reocclusion_scores)) if reocclusion_scores else None,
|
| 165 |
+
persistence_horizon_mae=float(np.mean(persistence_errors)) if persistence_errors else None,
|
| 166 |
+
disturbance_cost=float(np.mean(disturbance_scores)) if disturbance_scores else None,
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def _metrics_to_dict(metrics: BenchmarkMetrics) -> dict[str, float | dict[str, float]]:
|
| 171 |
+
return {
|
| 172 |
+
"per_task_success": metrics.per_task_success,
|
| 173 |
+
"mean_success": metrics.mean_success,
|
| 174 |
+
"visibility_integral": metrics.visibility_integral or 0.0,
|
| 175 |
+
"corridor_availability": metrics.corridor_availability or 0.0,
|
| 176 |
+
"reocclusion_rate": metrics.reocclusion_rate or 0.0,
|
| 177 |
+
"persistence_horizon_mae": metrics.persistence_horizon_mae or 0.0,
|
| 178 |
+
"disturbance_cost": metrics.disturbance_cost or 0.0,
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def main() -> None:
|
| 183 |
+
parser = argparse.ArgumentParser()
|
| 184 |
+
parser.add_argument("--model", action="append", required=True, help="label=/abs/path/checkpoint.pt")
|
| 185 |
+
parser.add_argument("--episodes", type=int, default=24)
|
| 186 |
+
parser.add_argument("--resolution", type=int, default=None)
|
| 187 |
+
parser.add_argument("--ablation", default=None)
|
| 188 |
+
parser.add_argument("--output-root", default="/workspace/reports/reveal_eval")
|
| 189 |
+
parser.add_argument("--proxies", nargs="*", default=None)
|
| 190 |
+
args = parser.parse_args()
|
| 191 |
+
|
| 192 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 193 |
+
proxies = list(args.proxies or available_proxy_names())
|
| 194 |
+
output_root = Path(args.output_root)
|
| 195 |
+
output_root.mkdir(parents=True, exist_ok=True)
|
| 196 |
+
|
| 197 |
+
sections: dict[str, dict[str, float | str]] = {}
|
| 198 |
+
raw_metrics: dict[str, dict[str, float | dict[str, float]]] = {}
|
| 199 |
+
for item in args.model:
|
| 200 |
+
label, checkpoint_path = item.split("=", maxsplit=1)
|
| 201 |
+
model, checkpoint = load_model(checkpoint_path, device=device)
|
| 202 |
+
resolution = int(args.resolution or checkpoint.get("data_resolution", 96))
|
| 203 |
+
metrics = evaluate_model(
|
| 204 |
+
model=model,
|
| 205 |
+
device=device,
|
| 206 |
+
proxies=proxies,
|
| 207 |
+
episodes=args.episodes,
|
| 208 |
+
resolution=resolution,
|
| 209 |
+
ablation=args.ablation,
|
| 210 |
+
)
|
| 211 |
+
raw_metrics[label] = _metrics_to_dict(metrics)
|
| 212 |
+
sections[label] = {
|
| 213 |
+
"checkpoint": checkpoint_path,
|
| 214 |
+
"mean_success": metrics.mean_success,
|
| 215 |
+
"visibility_integral": metrics.visibility_integral or 0.0,
|
| 216 |
+
"corridor_availability": metrics.corridor_availability or 0.0,
|
| 217 |
+
"reocclusion_rate": metrics.reocclusion_rate or 0.0,
|
| 218 |
+
"persistence_horizon_mae": metrics.persistence_horizon_mae or 0.0,
|
| 219 |
+
"disturbance_cost": metrics.disturbance_cost or 0.0,
|
| 220 |
+
}
|
| 221 |
+
for task_name, score in metrics.per_task_success.items():
|
| 222 |
+
sections[label][f"{task_name}_success"] = score
|
| 223 |
+
|
| 224 |
+
json_path = output_root / "reveal_benchmark.json"
|
| 225 |
+
json_path.write_text(json.dumps(raw_metrics, indent=2), encoding="utf-8")
|
| 226 |
+
write_comparison_report(output_root / "reveal_benchmark.md", "Reveal Proxy Benchmark", sections)
|
| 227 |
+
print(json.dumps({"output_json": str(json_path), "sections": sections}, indent=2))
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
if __name__ == "__main__":
|
| 231 |
+
main()
|