Safetensors
English
jie530 commited on
Commit
427303d
·
verified ·
1 Parent(s): eb608f5

Upload folder using huggingface_hub

Browse files
Files changed (39) hide show
  1. intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_10000_pytorch_model.pt +3 -0
  2. intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_15000_pytorch_model.pt +3 -0
  3. intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_20000_pytorch_model.pt +3 -0
  4. intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_25000_pytorch_model.pt +3 -0
  5. intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_30000_pytorch_model.pt +3 -0
  6. intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_35000_pytorch_model.pt +3 -0
  7. intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_40000_pytorch_model.pt +3 -0
  8. intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_5000_pytorch_model.pt +3 -0
  9. intervla-m1/simple/G1WholebodyHandover/20260409_053020/config.json +151 -0
  10. intervla-m1/simple/G1WholebodyHandover/20260409_053020/config.yaml +130 -0
  11. intervla-m1/simple/G1WholebodyHandover/20260409_053020/dataset_statistics.json +480 -0
  12. intervla-m1/simple/G1WholebodyHandover/20260409_053020/final_model/pytorch_model.pt +3 -0
  13. intervla-m1/simple/G1WholebodyHandover/20260409_053020/summary.jsonl +8 -0
  14. intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_10000_pytorch_model.pt +3 -0
  15. intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_15000_pytorch_model.pt +3 -0
  16. intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_20000_pytorch_model.pt +3 -0
  17. intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_25000_pytorch_model.pt +3 -0
  18. intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_30000_pytorch_model.pt +3 -0
  19. intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_35000_pytorch_model.pt +3 -0
  20. intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_40000_pytorch_model.pt +3 -0
  21. intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_5000_pytorch_model.pt +3 -0
  22. intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/config.json +151 -0
  23. intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/config.yaml +130 -0
  24. intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/dataset_statistics.json +480 -0
  25. intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/final_model/pytorch_model.pt +3 -0
  26. intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/summary.jsonl +8 -0
  27. intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_10000_pytorch_model.pt +3 -0
  28. intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_15000_pytorch_model.pt +3 -0
  29. intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_20000_pytorch_model.pt +3 -0
  30. intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_25000_pytorch_model.pt +3 -0
  31. intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_30000_pytorch_model.pt +3 -0
  32. intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_35000_pytorch_model.pt +3 -0
  33. intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_40000_pytorch_model.pt +3 -0
  34. intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_5000_pytorch_model.pt +3 -0
  35. intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/config.json +151 -0
  36. intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/config.yaml +130 -0
  37. intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/dataset_statistics.json +480 -0
  38. intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/final_model/pytorch_model.pt +3 -0
  39. intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/summary.jsonl +8 -0
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_10000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3496810b7e088ebbf3a9499d5f908de154e5c5535615fd2b971644839bc07910
3
+ size 8604575530
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_15000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c46a21ba90ad0a1d42830640f817396440e519ad1749f84e5dd41fc0f93773
3
+ size 8604575530
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_20000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef9b1b1d89d55bb21bdefce2d312c5891f93c110a692ab37dd52c1137c95492
3
+ size 8604575530
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_25000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e61b9432e2bd7d371b20b4e8ef23aa88e55c06723b68278d91fa4bfb868f7fe
3
+ size 8604575530
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_30000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d032d86a01e3d7a34aca50eee8d353700cfaa9d8e3e39627adfca1da6170c90e
3
+ size 8604575530
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_35000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b61194d366f59d033c52d9d45df3a164d48dc2de03f71ac21256fed9b2fb15ef
3
+ size 8604575530
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_40000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5de077c53050ef68f06eed06696ded8300cdb24f41b7bd639d6b52905e2eb1e6
3
+ size 8604575530
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_5000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b210d0f52e719aabadd09df6a20ffe8dc5c031d331a7f417f30f83bd2b9918ed
3
+ size 8604574397
intervla-m1/simple/G1WholebodyHandover/20260409_053020/config.json ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_id": "G1WholebodyHandover",
3
+ "run_root_dir": "runs/InternVLA/Checkpoints",
4
+ "seed": 42,
5
+ "trackers": [
6
+ "jsonl",
7
+ "wandb"
8
+ ],
9
+ "wandb_entity": "jliu530-soochow-university",
10
+ "wandb_project": "psi",
11
+ "is_debug": false,
12
+ "framework": {
13
+ "framework_py": "InternVLA-M1",
14
+ "qwenvl": {
15
+ "base_vlm": "Qwen/Qwen2.5-VL-3B-Instruct",
16
+ "attn_implementation": "flash_attention_2",
17
+ "vl_hidden_dim": 2048
18
+ },
19
+ "dino": {
20
+ "dino_backbone": "dinov2_vits14"
21
+ },
22
+ "layer_qformer": {
23
+ "qformer_end_layer": 37,
24
+ "qformer_start_layer": 36,
25
+ "num_query_tokens": 64,
26
+ "input_dim": 2048,
27
+ "ouptput_dim": 768,
28
+ "grad_scale": 0.5
29
+ },
30
+ "action_model": {
31
+ "action_model_type": "DiT-B",
32
+ "action_hidden_dim": 768,
33
+ "action_dim": 36,
34
+ "use_ema": false,
35
+ "future_action_window_size": 15,
36
+ "past_action_window_size": 0,
37
+ "repeated_diffusion_steps": 8
38
+ },
39
+ "fm_head_config": {
40
+ "input_embedding_dim": 1536,
41
+ "hidden_size": 1024,
42
+ "add_pos_embed": true,
43
+ "max_seq_len": 1024,
44
+ "action_dim": 36,
45
+ "future_action_window_size": 15,
46
+ "action_horizon": 16,
47
+ "past_action_window_size": 0,
48
+ "noise_beta_alpha": 1.5,
49
+ "noise_beta_beta": 1.0,
50
+ "noise_s": 0.999,
51
+ "num_timestep_buckets": 1000,
52
+ "num_inference_timesteps": 4,
53
+ "num_target_vision_tokens": 32,
54
+ "diffusion_model_cfg": {
55
+ "attention_head_dim": 48,
56
+ "cross_attention_dim": 2048,
57
+ "dropout": 0.2,
58
+ "final_dropout": true,
59
+ "interleave_self_attention": true,
60
+ "norm_type": "ada_norm",
61
+ "num_attention_heads": 32,
62
+ "num_layers": 16,
63
+ "output_dim": 1024,
64
+ "positional_embeddings": null
65
+ }
66
+ }
67
+ },
68
+ "datasets": {
69
+ "vlm_data": {
70
+ "dataset_py": "vlm_datasets",
71
+ "dataformat": "llava_json",
72
+ "dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
73
+ "eval_dataset": "aokvqa_cauldron_llava_format",
74
+ "data_flatten": false,
75
+ "base_interval": 2,
76
+ "max_pixels": 50176,
77
+ "min_pixels": 784,
78
+ "model_max_length": 2048,
79
+ "model_type": "qwen2.5vl",
80
+ "per_device_batch_size": 4
81
+ },
82
+ "vla_data": {
83
+ "dataset_py": "lerobot_datasets",
84
+ "data_root_dir": "/data/jliu/data/G1WholebodyHandover-v0",
85
+ "data_mix": "humanoid_",
86
+ "action_type": "abs_joints",
87
+ "CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
88
+ "CoT_answer": "bbox",
89
+ "default_image_resolution": [
90
+ 3,
91
+ 224,
92
+ 224
93
+ ],
94
+ "per_device_batch_size": 64,
95
+ "preload_all": true,
96
+ "load_all_data_for_training": true,
97
+ "obs": [
98
+ "image_0"
99
+ ],
100
+ "image_size": [
101
+ 224,
102
+ 224
103
+ ]
104
+ }
105
+ },
106
+ "trainer": {
107
+ "epochs": 100,
108
+ "max_train_steps": 40000,
109
+ "num_warmup_steps": 0,
110
+ "save_interval": 5000,
111
+ "eval_interval": 100,
112
+ "learning_rate": {
113
+ "base": 5e-05,
114
+ "qwen_vl_interface": 1e-05,
115
+ "action_model": 0.0001
116
+ },
117
+ "lr_scheduler_type": "cosine_with_min_lr",
118
+ "scheduler_specific_kwargs": {
119
+ "min_lr": 5e-07
120
+ },
121
+ "freeze_modules": "qwen_vl_interface,layer_qformer,dino_encoder,dino_pro",
122
+ "loss_scale": {
123
+ "vla": 1.0,
124
+ "vlm": 0.1
125
+ },
126
+ "pretrained_checkpoint": "/hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt",
127
+ "skip_reload_modules": "action_model",
128
+ "repeated_diffusion_steps": 4,
129
+ "max_grad_norm": 1.0,
130
+ "warmup_ratio": 0.1,
131
+ "weight_decay": 0.0,
132
+ "logging_frequency": 10,
133
+ "gradient_clipping": 1.0,
134
+ "gradient_accumulation_steps": 1,
135
+ "optimizer": {
136
+ "name": "AdamW",
137
+ "betas": [
138
+ 0.9,
139
+ 0.95
140
+ ],
141
+ "eps": 1e-08,
142
+ "weight_decay": 1e-08
143
+ },
144
+ "is_resume": false,
145
+ "resume_epoch": null,
146
+ "resume_step": null,
147
+ "enable_gradient_checkpointing": true,
148
+ "enable_mixed_precision_training": true
149
+ },
150
+ "output_dir": "runs/InternVLA/Checkpoints/G1WholebodyHandover/20260409_053020"
151
+ }
intervla-m1/simple/G1WholebodyHandover/20260409_053020/config.yaml ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_id: G1WholebodyHandover
2
+ run_root_dir: runs/InternVLA/Checkpoints
3
+ seed: 42
4
+ trackers:
5
+ - jsonl
6
+ - wandb
7
+ wandb_entity: jliu530-soochow-university
8
+ wandb_project: psi
9
+ is_debug: false
10
+ framework:
11
+ framework_py: InternVLA-M1
12
+ qwenvl:
13
+ base_vlm: Qwen/Qwen2.5-VL-3B-Instruct
14
+ attn_implementation: flash_attention_2
15
+ vl_hidden_dim: 2048
16
+ dino:
17
+ dino_backbone: dinov2_vits14
18
+ layer_qformer:
19
+ qformer_end_layer: 37
20
+ qformer_start_layer: 36
21
+ num_query_tokens: 64
22
+ input_dim: 2048
23
+ ouptput_dim: 768
24
+ grad_scale: 0.5
25
+ action_model:
26
+ action_model_type: DiT-B
27
+ action_hidden_dim: 768
28
+ action_dim: 36
29
+ use_ema: false
30
+ future_action_window_size: 15
31
+ past_action_window_size: 0
32
+ repeated_diffusion_steps: 8
33
+ fm_head_config:
34
+ input_embedding_dim: 1536
35
+ hidden_size: 1024
36
+ add_pos_embed: true
37
+ max_seq_len: 1024
38
+ action_dim: 36
39
+ future_action_window_size: 15
40
+ action_horizon: 16
41
+ past_action_window_size: 0
42
+ noise_beta_alpha: 1.5
43
+ noise_beta_beta: 1.0
44
+ noise_s: 0.999
45
+ num_timestep_buckets: 1000
46
+ num_inference_timesteps: 4
47
+ num_target_vision_tokens: 32
48
+ diffusion_model_cfg:
49
+ attention_head_dim: 48
50
+ cross_attention_dim: 2048
51
+ dropout: 0.2
52
+ final_dropout: true
53
+ interleave_self_attention: true
54
+ norm_type: ada_norm
55
+ num_attention_heads: 32
56
+ num_layers: 16
57
+ output_dim: 1024
58
+ positional_embeddings: null
59
+ datasets:
60
+ vlm_data:
61
+ dataset_py: vlm_datasets
62
+ dataformat: llava_json
63
+ dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
64
+ eval_dataset: aokvqa_cauldron_llava_format
65
+ data_flatten: false
66
+ base_interval: 2
67
+ max_pixels: 50176
68
+ min_pixels: 784
69
+ model_max_length: 2048
70
+ model_type: qwen2.5vl
71
+ per_device_batch_size: 4
72
+ vla_data:
73
+ dataset_py: lerobot_datasets
74
+ data_root_dir: /data/jliu/data/G1WholebodyHandover-v0
75
+ data_mix: humanoid_
76
+ action_type: abs_joints
77
+ CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
78
+ Locate their bounding boxes in [x1,y1,x2,y2] format.
79
+ CoT_answer: bbox
80
+ default_image_resolution:
81
+ - 3
82
+ - 224
83
+ - 224
84
+ per_device_batch_size: 64
85
+ preload_all: true
86
+ load_all_data_for_training: true
87
+ obs:
88
+ - image_0
89
+ image_size:
90
+ - 224
91
+ - 224
92
+ trainer:
93
+ epochs: 100
94
+ max_train_steps: 40000
95
+ num_warmup_steps: 0
96
+ save_interval: 5000
97
+ eval_interval: 100
98
+ learning_rate:
99
+ base: 5.0e-05
100
+ qwen_vl_interface: 1.0e-05
101
+ action_model: 0.0001
102
+ lr_scheduler_type: cosine_with_min_lr
103
+ scheduler_specific_kwargs:
104
+ min_lr: 5.0e-07
105
+ freeze_modules: qwen_vl_interface,layer_qformer,dino_encoder,dino_pro
106
+ loss_scale:
107
+ vla: 1.0
108
+ vlm: 0.1
109
+ pretrained_checkpoint: /hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt
110
+ skip_reload_modules: action_model
111
+ repeated_diffusion_steps: 4
112
+ max_grad_norm: 1.0
113
+ warmup_ratio: 0.1
114
+ weight_decay: 0.0
115
+ logging_frequency: 10
116
+ gradient_clipping: 1.0
117
+ gradient_accumulation_steps: 1
118
+ optimizer:
119
+ name: AdamW
120
+ betas:
121
+ - 0.9
122
+ - 0.95
123
+ eps: 1.0e-08
124
+ weight_decay: 1.0e-08
125
+ is_resume: false
126
+ resume_epoch: null
127
+ resume_step: null
128
+ enable_gradient_checkpointing: true
129
+ enable_mixed_precision_training: true
130
+ output_dir: runs/InternVLA/Checkpoints/G1WholebodyHandover/20260409_053020
intervla-m1/simple/G1WholebodyHandover/20260409_053020/dataset_statistics.json ADDED
@@ -0,0 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ -0.217879056930542,
6
+ 0.33495163917541504,
7
+ 0.33495163917541504,
8
+ -0.30543649196624756,
9
+ -0.7176172733306885,
10
+ -0.6976513862609863,
11
+ -0.7176172733306885,
12
+ -0.16062557697296143,
13
+ -0.22490878403186798,
14
+ -0.22490878403186798,
15
+ 0.48187702894210815,
16
+ 0.48187702894210815,
17
+ 0.19276303052902222,
18
+ 0.48187702894210815,
19
+ -0.06166350468993187,
20
+ 0.2401788830757141,
21
+ 0.0873611643910408,
22
+ -0.09128011763095856,
23
+ -0.06374146789312363,
24
+ -0.03256600350141525,
25
+ -0.012831903994083405,
26
+ -0.09480268508195877,
27
+ -0.2054116129875183,
28
+ 0.14932410418987274,
29
+ -0.1460893154144287,
30
+ -0.02532443404197693,
31
+ 0.13397766649723053,
32
+ 0.06257354468107224,
33
+ -0.000577143335249275,
34
+ 0.02542431280016899,
35
+ -0.01923290081322193,
36
+ 0.7401506304740906,
37
+ 0.009205126203596592,
38
+ 0.023015793412923813,
39
+ 0.00010224639845546335,
40
+ 0.0025475628208369017
41
+ ],
42
+ "std": [
43
+ 0.2612758278846741,
44
+ 0.349688321352005,
45
+ 0.349688321352005,
46
+ 0.3383631706237793,
47
+ 0.7491674423217773,
48
+ 0.7378979921340942,
49
+ 0.7491674423217773,
50
+ 0.23291678726673126,
51
+ 0.3260721266269684,
52
+ 0.3260721266269684,
53
+ 0.6987663507461548,
54
+ 0.6987663507461548,
55
+ 0.279427170753479,
56
+ 0.6987663507461548,
57
+ 0.09250971674919128,
58
+ 0.08452950417995453,
59
+ 0.408634215593338,
60
+ 0.1649845391511917,
61
+ 0.19536836445331573,
62
+ 0.14846064150333405,
63
+ 0.26794546842575073,
64
+ 0.08737793564796448,
65
+ 0.024206371977925228,
66
+ 0.14024904370307922,
67
+ 0.24467714130878448,
68
+ 0.19793805480003357,
69
+ 0.18220646679401398,
70
+ 0.17332760989665985,
71
+ 0.025831403210759163,
72
+ 0.04043002799153328,
73
+ 0.07628294080495834,
74
+ 0.00015065705521256548,
75
+ 0.09775390475988388,
76
+ 0.09573811292648315,
77
+ 0.04227162525057793,
78
+ 0.026238275691866875
79
+ ],
80
+ "max": [
81
+ 0.5,
82
+ 0.699999988079071,
83
+ 0.699999988079071,
84
+ 0.0,
85
+ 2.2146225653890418e-16,
86
+ 2.2146225653890418e-16,
87
+ 2.2146225653890418e-16,
88
+ 1.2266071310501902e-19,
89
+ 1.1078670818917075e-16,
90
+ 1.1078670818917075e-16,
91
+ 1.5,
92
+ 1.5,
93
+ 0.6000000238418579,
94
+ 1.5,
95
+ 0.2472410947084427,
96
+ 0.7092280983924866,
97
+ 1.2571598291397095,
98
+ 0.42311304807662964,
99
+ 0.8564174771308899,
100
+ 0.5002086162567139,
101
+ 0.5172277092933655,
102
+ 0.16140148043632507,
103
+ -0.1900009959936142,
104
+ 0.5362864136695862,
105
+ 0.5715147256851196,
106
+ 0.5002322196960449,
107
+ 0.566592276096344,
108
+ 0.6392397880554199,
109
+ 0.1580466777086258,
110
+ 0.2233395129442215,
111
+ 0.2582152187824249,
112
+ 0.7400000095367432,
113
+ 0.5,
114
+ 0.5,
115
+ 0.3454970121383667,
116
+ 0.2899305522441864
117
+ ],
118
+ "min": [
119
+ -0.5,
120
+ -1.1095792134107943e-16,
121
+ -1.1095792134107943e-16,
122
+ -1.5,
123
+ -1.5,
124
+ -1.5,
125
+ -1.5,
126
+ -0.5,
127
+ -0.699999988079071,
128
+ -0.699999988079071,
129
+ -2.2166350627321588e-16,
130
+ -2.2166350627321588e-16,
131
+ 0.0,
132
+ -2.2166350627321588e-16,
133
+ -0.47567468881607056,
134
+ 0.1900009959936142,
135
+ -0.512170135974884,
136
+ -0.6265152096748352,
137
+ -0.5008617043495178,
138
+ -0.8220608830451965,
139
+ -0.9223371148109436,
140
+ -0.49507391452789307,
141
+ -0.3437551259994507,
142
+ -0.6871383190155029,
143
+ -0.7637607455253601,
144
+ -0.7568023204803467,
145
+ -0.576077401638031,
146
+ -0.4588268995285034,
147
+ -0.13876836001873016,
148
+ -0.10360867530107498,
149
+ -0.47856518626213074,
150
+ 0.7400000095367432,
151
+ -0.5,
152
+ -0.5,
153
+ -0.26161932945251465,
154
+ -0.06718750298023224
155
+ ],
156
+ "q01": [
157
+ -0.5,
158
+ 0.0,
159
+ 0.0,
160
+ -1.5,
161
+ -1.5,
162
+ -1.5,
163
+ -1.5,
164
+ -0.5,
165
+ -0.699999988079071,
166
+ -0.699999988079071,
167
+ 0.0,
168
+ 0.0,
169
+ 0.0,
170
+ 0.0,
171
+ -0.3209294053912163,
172
+ 0.1900009959936142,
173
+ -0.38803558617830275,
174
+ -0.5046620488166809,
175
+ -0.38927449703216555,
176
+ -0.47090124636888503,
177
+ -0.7163057714700699,
178
+ -0.3420322224497795,
179
+ -0.29934623271226884,
180
+ -0.26393272846937177,
181
+ -0.6705281788110733,
182
+ -0.5924341869354248,
183
+ -0.40590299278497693,
184
+ -0.32045080602169035,
185
+ -0.07038286864757538,
186
+ -0.07046280093491078,
187
+ -0.2561952766776085,
188
+ 0.7400000095367432,
189
+ -0.3524305522441864,
190
+ 0.0,
191
+ -0.1027187518030405,
192
+ 0.0
193
+ ],
194
+ "q99": [
195
+ 0.5,
196
+ 0.699999988079071,
197
+ 0.699999988079071,
198
+ 0.0,
199
+ 0.0,
200
+ 0.0,
201
+ 0.0,
202
+ 0.0,
203
+ 0.0,
204
+ 0.0,
205
+ 1.5,
206
+ 1.5,
207
+ 0.6000000238418579,
208
+ 1.5,
209
+ 0.14089947253465648,
210
+ 0.580883502960205,
211
+ 1.1797874009609222,
212
+ 0.25774784147739405,
213
+ 0.5666770941019057,
214
+ 0.34745706409215893,
215
+ 0.339419822692871,
216
+ 0.08011209599673746,
217
+ -0.1900009959936142,
218
+ 0.40124923735857004,
219
+ 0.3766537192463873,
220
+ 0.41124969720840454,
221
+ 0.48994380980730057,
222
+ 0.4618227949738502,
223
+ 0.09533960297703735,
224
+ 0.12665506854653355,
225
+ 0.11710006609559051,
226
+ 0.7400000095367432,
227
+ 0.4913194477558136,
228
+ 0.5,
229
+ 0.10739764258265483,
230
+ 0.12076389044523239
231
+ ],
232
+ "mask": [
233
+ true,
234
+ true,
235
+ true,
236
+ true,
237
+ true,
238
+ true,
239
+ true,
240
+ true,
241
+ true,
242
+ true,
243
+ true,
244
+ true,
245
+ true,
246
+ true,
247
+ true,
248
+ true,
249
+ true,
250
+ true,
251
+ true,
252
+ true,
253
+ true,
254
+ true,
255
+ true,
256
+ true,
257
+ true,
258
+ true,
259
+ true,
260
+ true,
261
+ true,
262
+ true,
263
+ true,
264
+ true,
265
+ true,
266
+ true,
267
+ true,
268
+ true
269
+ ]
270
+ },
271
+ "state": {
272
+ "mean": [
273
+ -0.2155175358057022,
274
+ -0.02219489961862564,
275
+ 0.28812506794929504,
276
+ -0.1511061191558838,
277
+ -0.5701737403869629,
278
+ -0.21330925822257996,
279
+ -0.6113156676292419,
280
+ -0.17896264791488647,
281
+ -0.017788594588637352,
282
+ -0.20973540842533112,
283
+ 0.1504911184310913,
284
+ 0.4522649943828583,
285
+ 0.16520608961582184,
286
+ 0.4278402328491211,
287
+ -0.025824211537837982,
288
+ 0.2251066416501999,
289
+ 0.06837588548660278,
290
+ -0.006100596394389868,
291
+ -0.057470113039016724,
292
+ 0.03059970773756504,
293
+ -0.014116800390183926,
294
+ -0.05716487765312195,
295
+ -0.1999409943819046,
296
+ 0.16329504549503326,
297
+ -0.06425096839666367,
298
+ -0.031190501525998116,
299
+ 0.18948617577552795,
300
+ 0.06857924908399582,
301
+ -0.005298840347677469,
302
+ 0.06115104258060455,
303
+ -0.01866256073117256,
304
+ 0.7401506304740906
305
+ ],
306
+ "std": [
307
+ 0.25221753120422363,
308
+ 0.0899457111954689,
309
+ 0.2992278039455414,
310
+ 0.17489215731620789,
311
+ 0.6001524329185486,
312
+ 0.2285156548023224,
313
+ 0.6428972482681274,
314
+ 0.2302001267671585,
315
+ 0.07764989882707596,
316
+ 0.2707969546318054,
317
+ 0.16321305930614471,
318
+ 0.5522690415382385,
319
+ 0.2336428165435791,
320
+ 0.5221257209777832,
321
+ 0.08843120187520981,
322
+ 0.07218267768621445,
323
+ 0.4016489088535309,
324
+ 0.16209223866462708,
325
+ 0.192921981215477,
326
+ 0.14427520334720612,
327
+ 0.26766741275787354,
328
+ 0.08319389075040817,
329
+ 0.02173873409628868,
330
+ 0.13788215816020966,
331
+ 0.24289271235466003,
332
+ 0.1938791275024414,
333
+ 0.17991188168525696,
334
+ 0.174061119556427,
335
+ 0.025640971958637238,
336
+ 0.041976913809776306,
337
+ 0.0752870962023735,
338
+ 0.00015065705521256548
339
+ ],
340
+ "max": [
341
+ 0.43566983938217163,
342
+ 0.3739710748195648,
343
+ 0.6575677990913391,
344
+ 0.004060761071741581,
345
+ 0.0005700877518393099,
346
+ 0.0004725759499706328,
347
+ 0.00010080631182063371,
348
+ 1.310737025050912e-05,
349
+ 0.21882089972496033,
350
+ 0.0005271440604701638,
351
+ 0.530737042427063,
352
+ 1.4406861066818237,
353
+ 1.4605127573013306,
354
+ 1.4595911502838135,
355
+ 0.2663630545139313,
356
+ 0.657910943031311,
357
+ 1.2515853643417358,
358
+ 0.502498209476471,
359
+ 0.8292973637580872,
360
+ 0.5248894095420837,
361
+ 0.4653257131576538,
362
+ 0.18638382852077484,
363
+ -0.16696421802043915,
364
+ 0.49318820238113403,
365
+ 0.6363148093223572,
366
+ 0.45773962140083313,
367
+ 0.6238265037536621,
368
+ 0.653800904750824,
369
+ 0.1436084657907486,
370
+ 0.25937986373901367,
371
+ 0.26422709226608276,
372
+ 0.7400000095367432
373
+ ],
374
+ "min": [
375
+ -0.5564982891082764,
376
+ -0.48307520151138306,
377
+ -0.0005447770818136632,
378
+ -0.8388738632202148,
379
+ -1.3970016241073608,
380
+ -0.8296014666557312,
381
+ -1.4599460363388062,
382
+ -0.5806806683540344,
383
+ -0.5149835348129272,
384
+ -0.6775947213172913,
385
+ -0.001480442238971591,
386
+ -0.0002713006397243589,
387
+ -0.000914653530344367,
388
+ -0.00019419840828049928,
389
+ -0.4206617772579193,
390
+ 0.13972464203834534,
391
+ -0.546251654624939,
392
+ -0.5596316456794739,
393
+ -0.4764360189437866,
394
+ -0.7253566384315491,
395
+ -0.9443663954734802,
396
+ -0.4381798803806305,
397
+ -0.3338131606578827,
398
+ -0.667724072933197,
399
+ -0.6881827116012573,
400
+ -0.7544379830360413,
401
+ -0.5189417600631714,
402
+ -0.4484957158565521,
403
+ -0.13709338009357452,
404
+ -0.07360810041427612,
405
+ -0.4748336970806122,
406
+ 0.7400000095367432
407
+ ],
408
+ "q01": [
409
+ -0.545208849310875,
410
+ -0.42749745190143584,
411
+ -0.0005246381351025775,
412
+ -0.6480066239833832,
413
+ -1.3562620949745179,
414
+ -0.7743040478229523,
415
+ -1.4221707606315612,
416
+ -0.5719072341918945,
417
+ -0.324733624458313,
418
+ -0.6709954166412353,
419
+ -1.057923989264964e-06,
420
+ -1.457349050326684e-07,
421
+ -1.5106486750937617e-06,
422
+ -2.423548727392699e-07,
423
+ -0.27424134463071825,
424
+ 0.15975838720798494,
425
+ -0.40353597432374955,
426
+ -0.4082282695174217,
427
+ -0.3712728089094162,
428
+ -0.40305238008499145,
429
+ -0.7383889842033386,
430
+ -0.2908404359221458,
431
+ -0.2874874520301819,
432
+ -0.24262819081544876,
433
+ -0.6050335317850113,
434
+ -0.5883933693170548,
435
+ -0.33852073848247527,
436
+ -0.31927637457847596,
437
+ -0.07735681585967541,
438
+ -0.042391608729958535,
439
+ -0.25428820788860323,
440
+ 0.7400000095367432
441
+ ],
442
+ "q99": [
443
+ 0.40327705115079837,
444
+ 0.2348814429342737,
445
+ 0.6518104630708694,
446
+ 0.00030119536590063946,
447
+ 0.0005192926508607343,
448
+ 0.0001505175937199965,
449
+ 2.3595025995746256e-05,
450
+ 5.185912332308361e-06,
451
+ 0.08334404386579981,
452
+ 0.00022139013104606418,
453
+ 0.44970364242792127,
454
+ 1.3850609183311462,
455
+ 1.4514530301094055,
456
+ 1.4533516800403594,
457
+ 0.16941204354166983,
458
+ 0.5226882100105286,
459
+ 1.1606279826164243,
460
+ 0.33347084760665896,
461
+ 0.5583706372976303,
462
+ 0.3959252551198003,
463
+ 0.3333164182305336,
464
+ 0.1102284654974937,
465
+ -0.1776231697201729,
466
+ 0.40905793011188507,
467
+ 0.44925396174192406,
468
+ 0.3893393576145172,
469
+ 0.5468983370065689,
470
+ 0.474879567325115,
471
+ 0.08610126286745068,
472
+ 0.1693275338411331,
473
+ 0.11660626158118245,
474
+ 0.7400000095367432
475
+ ]
476
+ },
477
+ "num_transitions": 45530,
478
+ "num_trajectories": 100
479
+ }
480
+ }
intervla-m1/simple/G1WholebodyHandover/20260409_053020/final_model/pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e208035472e5474bac095beed97fb98fabc9423207afb4ce872a77f68efb0418
3
+ size 8604557774
intervla-m1/simple/G1WholebodyHandover/20260409_053020/summary.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {"steps": 5000}
2
+ {"steps": 10000}
3
+ {"steps": 15000}
4
+ {"steps": 20000}
5
+ {"steps": 25000}
6
+ {"steps": 30000}
7
+ {"steps": 35000}
8
+ {"steps": 40000}
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_10000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b22aeeb726c35af16a4b2648560d30867903090efb032011d21b73597620d0b
3
+ size 8604575530
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_15000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ff18dfa42f23cffbe8bf20767cb45455f8e4ebaee7e2983f665081809ba5054
3
+ size 8604575530
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_20000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d8b75f78f58f3529a74a8da5d91c29e7ab83a9e54f59bccc26407dfd649e58f
3
+ size 8604575530
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_25000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:593e5f1f120302800b15e759aa5d3fcdcdd715adb08a38b9b8dc5b7c328f756b
3
+ size 8604575530
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_30000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bb2ca0788a6ffcc07c0c6f6b4a4212127f349b161cffc15aa3d26c7e5164555
3
+ size 8604575530
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_35000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df7251b81bf2de426cd479000063b168734004441353b65c638703039ea295df
3
+ size 8604575530
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_40000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ed48be8d9fdae3547ac2f4b9bedc3dc822e1bf72d72d65c3f62976f44fcaf8d
3
+ size 8604575530
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_5000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f1d99ec43b59b95843747d77328a8d4d2abd51115f47e3daa4cfdfda1e07b03
3
+ size 8604574397
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/config.json ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_id": "G1WholebodyLocomotionPickBetweenTablesTeleop",
3
+ "run_root_dir": "runs/InternVLA/Checkpoints",
4
+ "seed": 42,
5
+ "trackers": [
6
+ "jsonl",
7
+ "wandb"
8
+ ],
9
+ "wandb_entity": "jliu530-soochow-university",
10
+ "wandb_project": "psi",
11
+ "is_debug": false,
12
+ "framework": {
13
+ "framework_py": "InternVLA-M1",
14
+ "qwenvl": {
15
+ "base_vlm": "Qwen/Qwen2.5-VL-3B-Instruct",
16
+ "attn_implementation": "flash_attention_2",
17
+ "vl_hidden_dim": 2048
18
+ },
19
+ "dino": {
20
+ "dino_backbone": "dinov2_vits14"
21
+ },
22
+ "layer_qformer": {
23
+ "qformer_end_layer": 37,
24
+ "qformer_start_layer": 36,
25
+ "num_query_tokens": 64,
26
+ "input_dim": 2048,
27
+ "ouptput_dim": 768,
28
+ "grad_scale": 0.5
29
+ },
30
+ "action_model": {
31
+ "action_model_type": "DiT-B",
32
+ "action_hidden_dim": 768,
33
+ "action_dim": 36,
34
+ "use_ema": false,
35
+ "future_action_window_size": 15,
36
+ "past_action_window_size": 0,
37
+ "repeated_diffusion_steps": 8
38
+ },
39
+ "fm_head_config": {
40
+ "input_embedding_dim": 1536,
41
+ "hidden_size": 1024,
42
+ "add_pos_embed": true,
43
+ "max_seq_len": 1024,
44
+ "action_dim": 36,
45
+ "future_action_window_size": 15,
46
+ "action_horizon": 16,
47
+ "past_action_window_size": 0,
48
+ "noise_beta_alpha": 1.5,
49
+ "noise_beta_beta": 1.0,
50
+ "noise_s": 0.999,
51
+ "num_timestep_buckets": 1000,
52
+ "num_inference_timesteps": 4,
53
+ "num_target_vision_tokens": 32,
54
+ "diffusion_model_cfg": {
55
+ "attention_head_dim": 48,
56
+ "cross_attention_dim": 2048,
57
+ "dropout": 0.2,
58
+ "final_dropout": true,
59
+ "interleave_self_attention": true,
60
+ "norm_type": "ada_norm",
61
+ "num_attention_heads": 32,
62
+ "num_layers": 16,
63
+ "output_dim": 1024,
64
+ "positional_embeddings": null
65
+ }
66
+ }
67
+ },
68
+ "datasets": {
69
+ "vlm_data": {
70
+ "dataset_py": "vlm_datasets",
71
+ "dataformat": "llava_json",
72
+ "dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
73
+ "eval_dataset": "aokvqa_cauldron_llava_format",
74
+ "data_flatten": false,
75
+ "base_interval": 2,
76
+ "max_pixels": 50176,
77
+ "min_pixels": 784,
78
+ "model_max_length": 2048,
79
+ "model_type": "qwen2.5vl",
80
+ "per_device_batch_size": 4
81
+ },
82
+ "vla_data": {
83
+ "dataset_py": "lerobot_datasets",
84
+ "data_root_dir": "/data/jliu/data/G1WholebodyLocomotionPickBetweenTablesTeleop-v0",
85
+ "data_mix": "humanoid_",
86
+ "action_type": "abs_joints",
87
+ "CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
88
+ "CoT_answer": "bbox",
89
+ "default_image_resolution": [
90
+ 3,
91
+ 224,
92
+ 224
93
+ ],
94
+ "per_device_batch_size": 64,
95
+ "preload_all": true,
96
+ "load_all_data_for_training": true,
97
+ "obs": [
98
+ "image_0"
99
+ ],
100
+ "image_size": [
101
+ 224,
102
+ 224
103
+ ]
104
+ }
105
+ },
106
+ "trainer": {
107
+ "epochs": 100,
108
+ "max_train_steps": 40000,
109
+ "num_warmup_steps": 0,
110
+ "save_interval": 5000,
111
+ "eval_interval": 100,
112
+ "learning_rate": {
113
+ "base": 5e-05,
114
+ "qwen_vl_interface": 1e-05,
115
+ "action_model": 0.0001
116
+ },
117
+ "lr_scheduler_type": "cosine_with_min_lr",
118
+ "scheduler_specific_kwargs": {
119
+ "min_lr": 5e-07
120
+ },
121
+ "freeze_modules": "qwen_vl_interface,layer_qformer,dino_encoder,dino_pro",
122
+ "loss_scale": {
123
+ "vla": 1.0,
124
+ "vlm": 0.1
125
+ },
126
+ "pretrained_checkpoint": "/hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt",
127
+ "skip_reload_modules": "action_model",
128
+ "repeated_diffusion_steps": 4,
129
+ "max_grad_norm": 1.0,
130
+ "warmup_ratio": 0.1,
131
+ "weight_decay": 0.0,
132
+ "logging_frequency": 10,
133
+ "gradient_clipping": 1.0,
134
+ "gradient_accumulation_steps": 1,
135
+ "optimizer": {
136
+ "name": "AdamW",
137
+ "betas": [
138
+ 0.9,
139
+ 0.95
140
+ ],
141
+ "eps": 1e-08,
142
+ "weight_decay": 1e-08
143
+ },
144
+ "is_resume": false,
145
+ "resume_epoch": null,
146
+ "resume_step": null,
147
+ "enable_gradient_checkpointing": true,
148
+ "enable_mixed_precision_training": true
149
+ },
150
+ "output_dir": "runs/InternVLA/Checkpoints/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110"
151
+ }
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/config.yaml ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_id: G1WholebodyLocomotionPickBetweenTablesTeleop
2
+ run_root_dir: runs/InternVLA/Checkpoints
3
+ seed: 42
4
+ trackers:
5
+ - jsonl
6
+ - wandb
7
+ wandb_entity: jliu530-soochow-university
8
+ wandb_project: psi
9
+ is_debug: false
10
+ framework:
11
+ framework_py: InternVLA-M1
12
+ qwenvl:
13
+ base_vlm: Qwen/Qwen2.5-VL-3B-Instruct
14
+ attn_implementation: flash_attention_2
15
+ vl_hidden_dim: 2048
16
+ dino:
17
+ dino_backbone: dinov2_vits14
18
+ layer_qformer:
19
+ qformer_end_layer: 37
20
+ qformer_start_layer: 36
21
+ num_query_tokens: 64
22
+ input_dim: 2048
23
+ ouptput_dim: 768
24
+ grad_scale: 0.5
25
+ action_model:
26
+ action_model_type: DiT-B
27
+ action_hidden_dim: 768
28
+ action_dim: 36
29
+ use_ema: false
30
+ future_action_window_size: 15
31
+ past_action_window_size: 0
32
+ repeated_diffusion_steps: 8
33
+ fm_head_config:
34
+ input_embedding_dim: 1536
35
+ hidden_size: 1024
36
+ add_pos_embed: true
37
+ max_seq_len: 1024
38
+ action_dim: 36
39
+ future_action_window_size: 15
40
+ action_horizon: 16
41
+ past_action_window_size: 0
42
+ noise_beta_alpha: 1.5
43
+ noise_beta_beta: 1.0
44
+ noise_s: 0.999
45
+ num_timestep_buckets: 1000
46
+ num_inference_timesteps: 4
47
+ num_target_vision_tokens: 32
48
+ diffusion_model_cfg:
49
+ attention_head_dim: 48
50
+ cross_attention_dim: 2048
51
+ dropout: 0.2
52
+ final_dropout: true
53
+ interleave_self_attention: true
54
+ norm_type: ada_norm
55
+ num_attention_heads: 32
56
+ num_layers: 16
57
+ output_dim: 1024
58
+ positional_embeddings: null
59
+ datasets:
60
+ vlm_data:
61
+ dataset_py: vlm_datasets
62
+ dataformat: llava_json
63
+ dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
64
+ eval_dataset: aokvqa_cauldron_llava_format
65
+ data_flatten: false
66
+ base_interval: 2
67
+ max_pixels: 50176
68
+ min_pixels: 784
69
+ model_max_length: 2048
70
+ model_type: qwen2.5vl
71
+ per_device_batch_size: 4
72
+ vla_data:
73
+ dataset_py: lerobot_datasets
74
+ data_root_dir: /data/jliu/data/G1WholebodyLocomotionPickBetweenTablesTeleop-v0
75
+ data_mix: humanoid_
76
+ action_type: abs_joints
77
+ CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
78
+ Locate their bounding boxes in [x1,y1,x2,y2] format.
79
+ CoT_answer: bbox
80
+ default_image_resolution:
81
+ - 3
82
+ - 224
83
+ - 224
84
+ per_device_batch_size: 64
85
+ preload_all: true
86
+ load_all_data_for_training: true
87
+ obs:
88
+ - image_0
89
+ image_size:
90
+ - 224
91
+ - 224
92
+ trainer:
93
+ epochs: 100
94
+ max_train_steps: 40000
95
+ num_warmup_steps: 0
96
+ save_interval: 5000
97
+ eval_interval: 100
98
+ learning_rate:
99
+ base: 5.0e-05
100
+ qwen_vl_interface: 1.0e-05
101
+ action_model: 0.0001
102
+ lr_scheduler_type: cosine_with_min_lr
103
+ scheduler_specific_kwargs:
104
+ min_lr: 5.0e-07
105
+ freeze_modules: qwen_vl_interface,layer_qformer,dino_encoder,dino_pro
106
+ loss_scale:
107
+ vla: 1.0
108
+ vlm: 0.1
109
+ pretrained_checkpoint: /hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt
110
+ skip_reload_modules: action_model
111
+ repeated_diffusion_steps: 4
112
+ max_grad_norm: 1.0
113
+ warmup_ratio: 0.1
114
+ weight_decay: 0.0
115
+ logging_frequency: 10
116
+ gradient_clipping: 1.0
117
+ gradient_accumulation_steps: 1
118
+ optimizer:
119
+ name: AdamW
120
+ betas:
121
+ - 0.9
122
+ - 0.95
123
+ eps: 1.0e-08
124
+ weight_decay: 1.0e-08
125
+ is_resume: false
126
+ resume_epoch: null
127
+ resume_step: null
128
+ enable_gradient_checkpointing: true
129
+ enable_mixed_precision_training: true
130
+ output_dir: runs/InternVLA/Checkpoints/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/dataset_statistics.json ADDED
@@ -0,0 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ 0.0007652958738617599,
6
+ 0.0010714149102568626,
7
+ 0.0010714149102568626,
8
+ -0.0022958877962082624,
9
+ -0.0022958877962082624,
10
+ -0.0009183543152175844,
11
+ -0.0022958877962082624,
12
+ -0.4180760681629181,
13
+ -0.5853561758995056,
14
+ -0.5853561758995056,
15
+ 1.254742980003357,
16
+ 1.2550007104873657,
17
+ 0.5019800662994385,
18
+ 1.2550007104873657,
19
+ -0.06184714287519455,
20
+ 0.19450722634792328,
21
+ -0.12890596687793732,
22
+ -0.0004327417991589755,
23
+ -0.11649200320243835,
24
+ -0.022241855040192604,
25
+ 0.14975687861442566,
26
+ -0.2271433174610138,
27
+ -0.28912046551704407,
28
+ -0.12231507897377014,
29
+ -0.23417018353939056,
30
+ 0.2349066436290741,
31
+ 0.10561700165271759,
32
+ -0.03762618452310562,
33
+ -0.007439092267304659,
34
+ 0.04567578062415123,
35
+ 0.0297609381377697,
36
+ 0.7396373152732849,
37
+ 0.12372121214866638,
38
+ 0.013200674206018448,
39
+ -0.2235538214445114,
40
+ 0.4495043158531189
41
+ ],
42
+ "std": [
43
+ 0.01948833279311657,
44
+ 0.027283955365419388,
45
+ 0.027283955365419388,
46
+ 0.05846178159117699,
47
+ 0.05846178159117699,
48
+ 0.02337435446679592,
49
+ 0.05846178159117699,
50
+ 0.1841326802968979,
51
+ 0.2574934661388397,
52
+ 0.2574934661388397,
53
+ 0.5518325567245483,
54
+ 0.5516064763069153,
55
+ 0.22092333436012268,
56
+ 0.5516064763069153,
57
+ 0.08286559581756592,
58
+ 0.013414013199508157,
59
+ 0.14786243438720703,
60
+ 0.08076202124357224,
61
+ 0.06461654603481293,
62
+ 0.08045286685228348,
63
+ 0.10112713277339935,
64
+ 0.11457119882106781,
65
+ 0.1058330088853836,
66
+ 0.2477482408285141,
67
+ 0.16770882904529572,
68
+ 0.21804748475551605,
69
+ 0.15766489505767822,
70
+ 0.1760520040988922,
71
+ 0.052497588098049164,
72
+ 0.024612687528133392,
73
+ 0.057495709508657455,
74
+ 0.0003625280806534051,
75
+ 0.2089398205280304,
76
+ 0.08656619489192963,
77
+ 0.3711760640144348,
78
+ 2.2139852046966553
79
+ ],
80
+ "max": [
81
+ 0.5,
82
+ 0.699999988079071,
83
+ 0.699999988079071,
84
+ 0.0,
85
+ 0.0,
86
+ 0.0,
87
+ 0.0,
88
+ 6.930528109384597e-19,
89
+ 1.108467615016421e-16,
90
+ 1.108467615016421e-16,
91
+ 1.5,
92
+ 1.5,
93
+ 1.0,
94
+ 1.5,
95
+ 0.2557959258556366,
96
+ 0.35884979367256165,
97
+ 0.5090755820274353,
98
+ 0.19132143259048462,
99
+ 0.21249642968177795,
100
+ 0.3992660641670227,
101
+ 0.4283020794391632,
102
+ 0.1457289606332779,
103
+ -0.1900009959936142,
104
+ 0.6150448322296143,
105
+ 0.35468167066574097,
106
+ 0.8703295588493347,
107
+ 0.7531875371932983,
108
+ 0.971237301826477,
109
+ 0.13985762000083923,
110
+ 0.15686897933483124,
111
+ 0.4661160111427307,
112
+ 0.7400000095367432,
113
+ 0.5,
114
+ 0.5,
115
+ 1.0,
116
+ 3.1414895057678223
117
+ ],
118
+ "min": [
119
+ 0.0,
120
+ 0.0,
121
+ 0.0,
122
+ -1.5,
123
+ -1.5,
124
+ -0.6000000238418579,
125
+ -1.5,
126
+ -0.5,
127
+ -0.699999988079071,
128
+ -0.699999988079071,
129
+ -2.216935230032842e-16,
130
+ -2.216935230032842e-16,
131
+ -4.0845591349633594e-18,
132
+ -2.216935230032842e-16,
133
+ -0.4883034825325012,
134
+ 0.1900009959936142,
135
+ -0.5470856428146362,
136
+ -0.34318920969963074,
137
+ -0.35952919721603394,
138
+ -0.35302427411079407,
139
+ -0.4469815790653229,
140
+ -0.6371198296546936,
141
+ -0.7683824300765991,
142
+ -1.0653810501098633,
143
+ -0.8479154706001282,
144
+ -1.0297260284423828,
145
+ -0.42936205863952637,
146
+ -0.5147944092750549,
147
+ -0.16820405423641205,
148
+ -0.045328833162784576,
149
+ -0.13282617926597595,
150
+ 0.7400000095367432,
151
+ -0.5,
152
+ -0.5,
153
+ -1.0,
154
+ -3.138223648071289
155
+ ],
156
+ "q01": [
157
+ 0.0,
158
+ 0.0,
159
+ 0.0,
160
+ 0.0,
161
+ 0.0,
162
+ 0.0,
163
+ 0.0,
164
+ -0.5,
165
+ -0.699999988079071,
166
+ -0.699999988079071,
167
+ 0.0,
168
+ 0.0,
169
+ 0.0,
170
+ 0.0,
171
+ -0.30536221772432326,
172
+ 0.1900009959936142,
173
+ -0.4495888948440552,
174
+ -0.23494456708431244,
175
+ -0.27246662437915803,
176
+ -0.2315385288000107,
177
+ -0.18485171496868133,
178
+ -0.4921060320734978,
179
+ -0.6122316139936447,
180
+ -0.7731428289413452,
181
+ -0.5696775823831558,
182
+ -0.4394104504585266,
183
+ -0.28442258715629576,
184
+ -0.3538160628080368,
185
+ -0.11734950572252273,
186
+ -0.015320314802229404,
187
+ -0.09260479986667633,
188
+ 0.7400000095367432,
189
+ 0.0,
190
+ -0.3567708432674408,
191
+ -1.0,
192
+ -3.124027729034424
193
+ ],
194
+ "q99": [
195
+ 0.0,
196
+ 0.0,
197
+ 0.0,
198
+ 0.0,
199
+ 0.0,
200
+ 0.0,
201
+ 0.0,
202
+ 0.0,
203
+ 0.0,
204
+ 0.0,
205
+ 1.5,
206
+ 1.5,
207
+ 0.6000000238418579,
208
+ 1.5,
209
+ 0.12895929232239725,
210
+ 0.26329000800848007,
211
+ 0.2936864292621614,
212
+ 0.14898110926151276,
213
+ 0.06174419380724448,
214
+ 0.2529342502355577,
215
+ 0.3428380289673807,
216
+ -0.0013759000797290315,
217
+ -0.1900009959936142,
218
+ 0.34379526853561404,
219
+ 0.19408822178840665,
220
+ 0.6508016681671143,
221
+ 0.5123037415742882,
222
+ 0.49184119641780855,
223
+ 0.11030469514429586,
224
+ 0.11221159264445316,
225
+ 0.21934302642941514,
226
+ 0.7400000095367432,
227
+ 0.5,
228
+ 0.35676584899425506,
229
+ 0.19804689854383473,
230
+ 3.1301991939544678
231
+ ],
232
+ "mask": [
233
+ true,
234
+ true,
235
+ true,
236
+ true,
237
+ true,
238
+ true,
239
+ true,
240
+ true,
241
+ true,
242
+ true,
243
+ true,
244
+ true,
245
+ true,
246
+ true,
247
+ true,
248
+ true,
249
+ true,
250
+ true,
251
+ true,
252
+ true,
253
+ true,
254
+ true,
255
+ true,
256
+ true,
257
+ true,
258
+ true,
259
+ true,
260
+ true,
261
+ true,
262
+ true,
263
+ true,
264
+ true,
265
+ true,
266
+ true,
267
+ true,
268
+ true
269
+ ]
270
+ },
271
+ "state": {
272
+ "mean": [
273
+ 0.0007934353780001402,
274
+ 0.0010695882374420762,
275
+ 0.0012370680924504995,
276
+ -0.0009144614450633526,
277
+ -0.002343796193599701,
278
+ -0.002245287410914898,
279
+ -0.0023474614135921,
280
+ -0.43587779998779297,
281
+ 0.009345951490104198,
282
+ -0.5239402651786804,
283
+ 0.347458153963089,
284
+ 1.1157641410827637,
285
+ 0.3141929805278778,
286
+ 1.061142086982727,
287
+ -0.02466614544391632,
288
+ 0.18699687719345093,
289
+ -0.14352792501449585,
290
+ 0.0824379175901413,
291
+ -0.1188783347606659,
292
+ 0.038459114730358124,
293
+ 0.14665654301643372,
294
+ -0.18040531873703003,
295
+ -0.2714536190032959,
296
+ -0.09729836881160736,
297
+ -0.149928018450737,
298
+ 0.2267490029335022,
299
+ 0.16860823333263397,
300
+ -0.03799568489193916,
301
+ -0.00529489666223526,
302
+ 0.08189272880554199,
303
+ 0.029461175203323364,
304
+ 0.7396373152732849
305
+ ],
306
+ "std": [
307
+ 0.01693989522755146,
308
+ 0.02404647134244442,
309
+ 0.02378660999238491,
310
+ 0.017665348947048187,
311
+ 0.04505283385515213,
312
+ 0.045055001974105835,
313
+ 0.04504767060279846,
314
+ 0.19141361117362976,
315
+ 0.08607088029384613,
316
+ 0.22821539640426636,
317
+ 0.15562357008457184,
318
+ 0.497448593378067,
319
+ 0.16223013401031494,
320
+ 0.47353243827819824,
321
+ 0.07905217260122299,
322
+ 0.013432762585580316,
323
+ 0.1468047797679901,
324
+ 0.07942581176757812,
325
+ 0.06510384380817413,
326
+ 0.0791940838098526,
327
+ 0.10252271592617035,
328
+ 0.11002297699451447,
329
+ 0.09499681740999222,
330
+ 0.24013079702854156,
331
+ 0.1692967414855957,
332
+ 0.21809111535549164,
333
+ 0.158147931098938,
334
+ 0.17850598692893982,
335
+ 0.050502315163612366,
336
+ 0.023258700966835022,
337
+ 0.058882467448711395,
338
+ 0.0003625280806534051
339
+ ],
340
+ "max": [
341
+ 0.47981399297714233,
342
+ 0.6772664189338684,
343
+ 0.6746510863304138,
344
+ 0.0010172375477850437,
345
+ 0.0007091082516126335,
346
+ 0.001881288131698966,
347
+ 0.0011398319620639086,
348
+ 6.141255539660051e-07,
349
+ 0.3043450713157654,
350
+ 6.343479981296696e-07,
351
+ 0.6933000087738037,
352
+ 1.4612544775009155,
353
+ 1.4651201963424683,
354
+ 1.4609057903289795,
355
+ 0.2809508740901947,
356
+ 0.34028318524360657,
357
+ 0.47627460956573486,
358
+ 0.26476219296455383,
359
+ 0.20825636386871338,
360
+ 0.4566418528556824,
361
+ 0.42864030599594116,
362
+ 0.1656116098165512,
363
+ -0.1549365073442459,
364
+ 0.5154499411582947,
365
+ 0.4242899715900421,
366
+ 0.8548054695129395,
367
+ 0.8040095567703247,
368
+ 0.9811649322509766,
369
+ 0.136736661195755,
370
+ 0.195722296833992,
371
+ 0.45781663060188293,
372
+ 0.7400000095367432
373
+ ],
374
+ "min": [
375
+ -0.02442001923918724,
376
+ -0.0517612099647522,
377
+ -0.0006534014828503132,
378
+ -0.5095356106758118,
379
+ -1.323034405708313,
380
+ -1.3221508264541626,
381
+ -1.3230019807815552,
382
+ -0.5770347714424133,
383
+ -0.4338151812553406,
384
+ -0.6721642017364502,
385
+ -0.0017213862156495452,
386
+ -7.534810038123396e-07,
387
+ -0.001927333534695208,
388
+ -1.075333216249419e-06,
389
+ -0.43650975823402405,
390
+ 0.15721464157104492,
391
+ -0.5489339232444763,
392
+ -0.2632291913032532,
393
+ -0.3508843183517456,
394
+ -0.23784859478473663,
395
+ -0.4281824827194214,
396
+ -0.5803383588790894,
397
+ -0.7118590474128723,
398
+ -1.0344431400299072,
399
+ -0.7932196259498596,
400
+ -1.0205217599868774,
401
+ -0.3445618450641632,
402
+ -0.5986371040344238,
403
+ -0.13537253439426422,
404
+ -0.0017330688424408436,
405
+ -0.1421850621700287,
406
+ 0.7400000095367432
407
+ ],
408
+ "q01": [
409
+ -2.9736981127825855e-06,
410
+ -0.002589050980750471,
411
+ -2.1223586691121456e-06,
412
+ -0.00027220559131819756,
413
+ -5.2740163209819e-06,
414
+ -4.3062968397862275e-05,
415
+ 6.949997242600148e-08,
416
+ -0.5698864543437958,
417
+ -0.3627366861701012,
418
+ -0.6697060906887055,
419
+ -1.2418152664395165e-06,
420
+ -1.287923055315332e-07,
421
+ -7.915375590528129e-07,
422
+ -1.582540755862283e-07,
423
+ -0.2560530769824982,
424
+ 0.16664464086294173,
425
+ -0.4414859291911125,
426
+ -0.14972542390227317,
427
+ -0.2697399368882179,
428
+ -0.16884329065680503,
429
+ -0.18771703973412515,
430
+ -0.4346155697107315,
431
+ -0.5672112548351288,
432
+ -0.739670946598053,
433
+ -0.49567418187856677,
434
+ -0.4431849017739296,
435
+ -0.22463233456015586,
436
+ -0.3607582712173462,
437
+ -0.10682432219386101,
438
+ 0.02814220966771245,
439
+ -0.0969948647916317,
440
+ 0.7400000095367432
441
+ ],
442
+ "q99": [
443
+ 3.4415612958582677e-06,
444
+ 0.019452356398105622,
445
+ 0.01667371392250061,
446
+ 0.0005855054722633213,
447
+ 0.0003600666584679857,
448
+ 0.0006024087872356176,
449
+ 0.0003732459741877392,
450
+ 2.2463960860363857e-07,
451
+ 0.15193727359175682,
452
+ 5.1716865669959736e-08,
453
+ 0.5587792527675629,
454
+ 1.4585348367691042,
455
+ 0.5443805891275406,
456
+ 1.370901610851288,
457
+ 0.1576733058691028,
458
+ 0.24011337146163012,
459
+ 0.27520660489797616,
460
+ 0.2312955512106419,
461
+ 0.05981415051967266,
462
+ 0.3088961178064349,
463
+ 0.3407434976100922,
464
+ 0.030774814467877155,
465
+ -0.17435445189476012,
466
+ 0.3570790392160416,
467
+ 0.2687951233983051,
468
+ 0.6410461419820785,
469
+ 0.5765821474790573,
470
+ 0.49433160990476627,
471
+ 0.09976441204547885,
472
+ 0.14455557838082314,
473
+ 0.21845388084650053,
474
+ 0.7400000095367432
475
+ ]
476
+ },
477
+ "num_transitions": 62764,
478
+ "num_trajectories": 99
479
+ }
480
+ }
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/final_model/pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1109463a97d234bf908d94e5f4aefc67cc4733b29a65514b4db1ca4e43ff26a6
3
+ size 8604557774
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/summary.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {"steps": 5000}
2
+ {"steps": 10000}
3
+ {"steps": 15000}
4
+ {"steps": 20000}
5
+ {"steps": 25000}
6
+ {"steps": 30000}
7
+ {"steps": 35000}
8
+ {"steps": 40000}
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_10000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a4c1423afa3642772d54f313d495edc8b36c26cb7369aa8a28a6efe77388975
3
+ size 8604575530
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_15000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1e3a356b4abbfa51d31a369075f8634f7136034f1bf6f3364846ec40f39d663
3
+ size 8604575530
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_20000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc6bb69676e3b2321d66dbc1c2318bb31e9d32036e6d7c1bf2d53862a7ddf355
3
+ size 8604575530
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_25000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eaf44ddba127eb83c151bb7e347080765d7b6ed3b548ed664471f7b8919fad6
3
+ size 8604575530
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_30000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b802f0d6833b5a293bf1784129a017652841862c3189cd4860d55bc9e198f52
3
+ size 8604575530
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_35000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95b62139bca7bb8cdc48c5ce7530154d737292045eeb416d0b9f442a6288e51b
3
+ size 8604575530
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_40000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56c78ff6e3c141e32bb727c439373da5992fd5524c804611f5b934c6e5f42d5d
3
+ size 8604575530
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_5000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01afd6ef0afc4a55e55b0f021b6a9f27b609ab0c526eaab0aff52488d38ba394
3
+ size 8604574397
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/config.json ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_id": "G1WholebodyXMoveBendPickTeleop",
3
+ "run_root_dir": "runs/InternVLA/Checkpoints",
4
+ "seed": 42,
5
+ "trackers": [
6
+ "jsonl",
7
+ "wandb"
8
+ ],
9
+ "wandb_entity": "jliu530-soochow-university",
10
+ "wandb_project": "psi",
11
+ "is_debug": false,
12
+ "framework": {
13
+ "framework_py": "InternVLA-M1",
14
+ "qwenvl": {
15
+ "base_vlm": "Qwen/Qwen2.5-VL-3B-Instruct",
16
+ "attn_implementation": "flash_attention_2",
17
+ "vl_hidden_dim": 2048
18
+ },
19
+ "dino": {
20
+ "dino_backbone": "dinov2_vits14"
21
+ },
22
+ "layer_qformer": {
23
+ "qformer_end_layer": 37,
24
+ "qformer_start_layer": 36,
25
+ "num_query_tokens": 64,
26
+ "input_dim": 2048,
27
+ "ouptput_dim": 768,
28
+ "grad_scale": 0.5
29
+ },
30
+ "action_model": {
31
+ "action_model_type": "DiT-B",
32
+ "action_hidden_dim": 768,
33
+ "action_dim": 36,
34
+ "use_ema": false,
35
+ "future_action_window_size": 15,
36
+ "past_action_window_size": 0,
37
+ "repeated_diffusion_steps": 8
38
+ },
39
+ "fm_head_config": {
40
+ "input_embedding_dim": 1536,
41
+ "hidden_size": 1024,
42
+ "add_pos_embed": true,
43
+ "max_seq_len": 1024,
44
+ "action_dim": 36,
45
+ "future_action_window_size": 15,
46
+ "action_horizon": 16,
47
+ "past_action_window_size": 0,
48
+ "noise_beta_alpha": 1.5,
49
+ "noise_beta_beta": 1.0,
50
+ "noise_s": 0.999,
51
+ "num_timestep_buckets": 1000,
52
+ "num_inference_timesteps": 4,
53
+ "num_target_vision_tokens": 32,
54
+ "diffusion_model_cfg": {
55
+ "attention_head_dim": 48,
56
+ "cross_attention_dim": 2048,
57
+ "dropout": 0.2,
58
+ "final_dropout": true,
59
+ "interleave_self_attention": true,
60
+ "norm_type": "ada_norm",
61
+ "num_attention_heads": 32,
62
+ "num_layers": 16,
63
+ "output_dim": 1024,
64
+ "positional_embeddings": null
65
+ }
66
+ }
67
+ },
68
+ "datasets": {
69
+ "vlm_data": {
70
+ "dataset_py": "vlm_datasets",
71
+ "dataformat": "llava_json",
72
+ "dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
73
+ "eval_dataset": "aokvqa_cauldron_llava_format",
74
+ "data_flatten": false,
75
+ "base_interval": 2,
76
+ "max_pixels": 50176,
77
+ "min_pixels": 784,
78
+ "model_max_length": 2048,
79
+ "model_type": "qwen2.5vl",
80
+ "per_device_batch_size": 4
81
+ },
82
+ "vla_data": {
83
+ "dataset_py": "lerobot_datasets",
84
+ "data_root_dir": "/data/jliu/data/G1WholebodyXMoveBendPickTeleop-v0",
85
+ "data_mix": "humanoid_",
86
+ "action_type": "abs_joints",
87
+ "CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
88
+ "CoT_answer": "bbox",
89
+ "default_image_resolution": [
90
+ 3,
91
+ 224,
92
+ 224
93
+ ],
94
+ "per_device_batch_size": 64,
95
+ "preload_all": true,
96
+ "load_all_data_for_training": true,
97
+ "obs": [
98
+ "image_0"
99
+ ],
100
+ "image_size": [
101
+ 224,
102
+ 224
103
+ ]
104
+ }
105
+ },
106
+ "trainer": {
107
+ "epochs": 100,
108
+ "max_train_steps": 40000,
109
+ "num_warmup_steps": 0,
110
+ "save_interval": 5000,
111
+ "eval_interval": 100,
112
+ "learning_rate": {
113
+ "base": 5e-05,
114
+ "qwen_vl_interface": 1e-05,
115
+ "action_model": 0.0001
116
+ },
117
+ "lr_scheduler_type": "cosine_with_min_lr",
118
+ "scheduler_specific_kwargs": {
119
+ "min_lr": 5e-07
120
+ },
121
+ "freeze_modules": "qwen_vl_interface,layer_qformer,dino_encoder,dino_pro",
122
+ "loss_scale": {
123
+ "vla": 1.0,
124
+ "vlm": 0.1
125
+ },
126
+ "pretrained_checkpoint": "/hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt",
127
+ "skip_reload_modules": "action_model",
128
+ "repeated_diffusion_steps": 4,
129
+ "max_grad_norm": 1.0,
130
+ "warmup_ratio": 0.1,
131
+ "weight_decay": 0.0,
132
+ "logging_frequency": 10,
133
+ "gradient_clipping": 1.0,
134
+ "gradient_accumulation_steps": 1,
135
+ "optimizer": {
136
+ "name": "AdamW",
137
+ "betas": [
138
+ 0.9,
139
+ 0.95
140
+ ],
141
+ "eps": 1e-08,
142
+ "weight_decay": 1e-08
143
+ },
144
+ "is_resume": false,
145
+ "resume_epoch": null,
146
+ "resume_step": null,
147
+ "enable_gradient_checkpointing": true,
148
+ "enable_mixed_precision_training": true
149
+ },
150
+ "output_dir": "runs/InternVLA/Checkpoints/G1WholebodyXMoveBendPickTeleop/20260403_151218"
151
+ }
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/config.yaml ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_id: G1WholebodyXMoveBendPickTeleop
2
+ run_root_dir: runs/InternVLA/Checkpoints
3
+ seed: 42
4
+ trackers:
5
+ - jsonl
6
+ - wandb
7
+ wandb_entity: jliu530-soochow-university
8
+ wandb_project: psi
9
+ is_debug: false
10
+ framework:
11
+ framework_py: InternVLA-M1
12
+ qwenvl:
13
+ base_vlm: Qwen/Qwen2.5-VL-3B-Instruct
14
+ attn_implementation: flash_attention_2
15
+ vl_hidden_dim: 2048
16
+ dino:
17
+ dino_backbone: dinov2_vits14
18
+ layer_qformer:
19
+ qformer_end_layer: 37
20
+ qformer_start_layer: 36
21
+ num_query_tokens: 64
22
+ input_dim: 2048
23
+ ouptput_dim: 768
24
+ grad_scale: 0.5
25
+ action_model:
26
+ action_model_type: DiT-B
27
+ action_hidden_dim: 768
28
+ action_dim: 36
29
+ use_ema: false
30
+ future_action_window_size: 15
31
+ past_action_window_size: 0
32
+ repeated_diffusion_steps: 8
33
+ fm_head_config:
34
+ input_embedding_dim: 1536
35
+ hidden_size: 1024
36
+ add_pos_embed: true
37
+ max_seq_len: 1024
38
+ action_dim: 36
39
+ future_action_window_size: 15
40
+ action_horizon: 16
41
+ past_action_window_size: 0
42
+ noise_beta_alpha: 1.5
43
+ noise_beta_beta: 1.0
44
+ noise_s: 0.999
45
+ num_timestep_buckets: 1000
46
+ num_inference_timesteps: 4
47
+ num_target_vision_tokens: 32
48
+ diffusion_model_cfg:
49
+ attention_head_dim: 48
50
+ cross_attention_dim: 2048
51
+ dropout: 0.2
52
+ final_dropout: true
53
+ interleave_self_attention: true
54
+ norm_type: ada_norm
55
+ num_attention_heads: 32
56
+ num_layers: 16
57
+ output_dim: 1024
58
+ positional_embeddings: null
59
+ datasets:
60
+ vlm_data:
61
+ dataset_py: vlm_datasets
62
+ dataformat: llava_json
63
+ dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
64
+ eval_dataset: aokvqa_cauldron_llava_format
65
+ data_flatten: false
66
+ base_interval: 2
67
+ max_pixels: 50176
68
+ min_pixels: 784
69
+ model_max_length: 2048
70
+ model_type: qwen2.5vl
71
+ per_device_batch_size: 4
72
+ vla_data:
73
+ dataset_py: lerobot_datasets
74
+ data_root_dir: /data/jliu/data/G1WholebodyXMoveBendPickTeleop-v0
75
+ data_mix: humanoid_
76
+ action_type: abs_joints
77
+ CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
78
+ Locate their bounding boxes in [x1,y1,x2,y2] format.
79
+ CoT_answer: bbox
80
+ default_image_resolution:
81
+ - 3
82
+ - 224
83
+ - 224
84
+ per_device_batch_size: 64
85
+ preload_all: true
86
+ load_all_data_for_training: true
87
+ obs:
88
+ - image_0
89
+ image_size:
90
+ - 224
91
+ - 224
92
+ trainer:
93
+ epochs: 100
94
+ max_train_steps: 40000
95
+ num_warmup_steps: 0
96
+ save_interval: 5000
97
+ eval_interval: 100
98
+ learning_rate:
99
+ base: 5.0e-05
100
+ qwen_vl_interface: 1.0e-05
101
+ action_model: 0.0001
102
+ lr_scheduler_type: cosine_with_min_lr
103
+ scheduler_specific_kwargs:
104
+ min_lr: 5.0e-07
105
+ freeze_modules: qwen_vl_interface,layer_qformer,dino_encoder,dino_pro
106
+ loss_scale:
107
+ vla: 1.0
108
+ vlm: 0.1
109
+ pretrained_checkpoint: /hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt
110
+ skip_reload_modules: action_model
111
+ repeated_diffusion_steps: 4
112
+ max_grad_norm: 1.0
113
+ warmup_ratio: 0.1
114
+ weight_decay: 0.0
115
+ logging_frequency: 10
116
+ gradient_clipping: 1.0
117
+ gradient_accumulation_steps: 1
118
+ optimizer:
119
+ name: AdamW
120
+ betas:
121
+ - 0.9
122
+ - 0.95
123
+ eps: 1.0e-08
124
+ weight_decay: 1.0e-08
125
+ is_resume: false
126
+ resume_epoch: null
127
+ resume_step: null
128
+ enable_gradient_checkpointing: true
129
+ enable_mixed_precision_training: true
130
+ output_dir: runs/InternVLA/Checkpoints/G1WholebodyXMoveBendPickTeleop/20260403_151218
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/dataset_statistics.json ADDED
@@ -0,0 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ 0.0,
6
+ 0.0,
7
+ 0.0,
8
+ 0.0,
9
+ 0.0,
10
+ 0.0,
11
+ 0.0,
12
+ -0.06578648090362549,
13
+ -0.0920981913805008,
14
+ -0.0920981913805008,
15
+ 0.19735944271087646,
16
+ 0.19735944271087646,
17
+ 0.07894159853458405,
18
+ 0.19735944271087646,
19
+ -0.08300793915987015,
20
+ 0.19217251241207123,
21
+ -0.07867422699928284,
22
+ 0.051928430795669556,
23
+ -0.20575666427612305,
24
+ 0.026639869436621666,
25
+ 0.2688467502593994,
26
+ -0.07158240675926208,
27
+ -0.1959005445241928,
28
+ -0.044735364615917206,
29
+ 0.09095965325832367,
30
+ 0.14965403079986572,
31
+ -0.03834051638841629,
32
+ -0.059758421033620834,
33
+ -0.0003535658761393279,
34
+ 0.02151256427168846,
35
+ 0.008055430836975574,
36
+ 0.6770350337028503,
37
+ 0.09653493016958237,
38
+ -0.0004846698429901153,
39
+ 0.014787894673645496,
40
+ 0.0
41
+ ],
42
+ "std": [
43
+ 0.0,
44
+ 0.0,
45
+ 0.0,
46
+ 0.0,
47
+ 0.0,
48
+ 0.0,
49
+ 0.0,
50
+ 0.16896027326583862,
51
+ 0.2365109622478485,
52
+ 0.2365109622478485,
53
+ 0.5068987011909485,
54
+ 0.5068987011909485,
55
+ 0.2027282416820526,
56
+ 0.5068987011909485,
57
+ 0.04981342703104019,
58
+ 0.009523554705083325,
59
+ 0.17949867248535156,
60
+ 0.055632513016462326,
61
+ 0.057273078709840775,
62
+ 0.058521512895822525,
63
+ 0.11143220216035843,
64
+ 0.07724548876285553,
65
+ 0.023301944136619568,
66
+ 0.10750990360975266,
67
+ 0.06850353628396988,
68
+ 0.07305468618869781,
69
+ 0.06640303879976273,
70
+ 0.16941164433956146,
71
+ 0.041646938771009445,
72
+ 0.0699257031083107,
73
+ 0.027388552203774452,
74
+ 0.08091399818658829,
75
+ 0.11833422631025314,
76
+ 0.017973335459828377,
77
+ 0.03434111177921295,
78
+ 0.0
79
+ ],
80
+ "max": [
81
+ 0.0,
82
+ 0.0,
83
+ 0.0,
84
+ 0.0,
85
+ 0.0,
86
+ 0.0,
87
+ 0.0,
88
+ 0.0,
89
+ 0.0,
90
+ 0.0,
91
+ 1.5,
92
+ 1.5,
93
+ 0.6000000238418579,
94
+ 1.5,
95
+ 0.06430592387914658,
96
+ 0.2996276319026947,
97
+ 0.5128592252731323,
98
+ 0.3017215132713318,
99
+ -0.038698144257068634,
100
+ 0.21968720853328705,
101
+ 0.827497124671936,
102
+ 0.044904597103595734,
103
+ -0.1900009959936142,
104
+ 0.413065105676651,
105
+ 0.41873428225517273,
106
+ 0.6618388891220093,
107
+ 0.4026392698287964,
108
+ 0.8194853663444519,
109
+ 0.12383800745010376,
110
+ 0.16346246004104614,
111
+ 0.15494900941848755,
112
+ 0.7400000095367432,
113
+ 0.5,
114
+ 0.21786384284496307,
115
+ 0.1749052256345749,
116
+ 0.0
117
+ ],
118
+ "min": [
119
+ 0.0,
120
+ 0.0,
121
+ 0.0,
122
+ 0.0,
123
+ 0.0,
124
+ 0.0,
125
+ 0.0,
126
+ -0.5,
127
+ -0.699999988079071,
128
+ -0.699999988079071,
129
+ 0.0,
130
+ 0.0,
131
+ 0.0,
132
+ 0.0,
133
+ -0.3314070701599121,
134
+ 0.1900009959936142,
135
+ -0.8766500353813171,
136
+ -0.12303244322538376,
137
+ -0.4908517599105835,
138
+ -0.2786784768104553,
139
+ -0.022629141807556152,
140
+ -0.6784858703613281,
141
+ -0.5865002870559692,
142
+ -0.645729660987854,
143
+ -0.3608185946941376,
144
+ -0.15172408521175385,
145
+ -0.4648345112800598,
146
+ -0.2964947521686554,
147
+ -0.10700750350952148,
148
+ -0.21067920327186584,
149
+ -0.08102670311927795,
150
+ 0.44999998807907104,
151
+ -0.5,
152
+ -0.26561295986175537,
153
+ -0.11697302013635635,
154
+ 0.0
155
+ ],
156
+ "q01": [
157
+ 0.0,
158
+ 0.0,
159
+ 0.0,
160
+ 0.0,
161
+ 0.0,
162
+ 0.0,
163
+ 0.0,
164
+ -0.5,
165
+ -0.699999988079071,
166
+ -0.699999988079071,
167
+ 0.0,
168
+ 0.0,
169
+ 0.0,
170
+ 0.0,
171
+ -0.2336725726723671,
172
+ 0.1900009959936142,
173
+ -0.628720715045929,
174
+ -0.08062581032514572,
175
+ -0.3772744107246399,
176
+ -0.14941381871700288,
177
+ 0.023908816780894994,
178
+ -0.3680631712079048,
179
+ -0.3140790224075317,
180
+ -0.33936198383569716,
181
+ -0.1444373431801796,
182
+ -0.024716479536145926,
183
+ -0.25120449274778367,
184
+ -0.24746618106961252,
185
+ -0.0921607768535614,
186
+ -0.1331048083305359,
187
+ -0.055714426785707476,
188
+ 0.5099999904632568,
189
+ 0.0,
190
+ -0.08334636241197586,
191
+ -0.058656642064452175,
192
+ 0.0
193
+ ],
194
+ "q99": [
195
+ 0.0,
196
+ 0.0,
197
+ 0.0,
198
+ 0.0,
199
+ 0.0,
200
+ 0.0,
201
+ 0.0,
202
+ 0.0,
203
+ 0.0,
204
+ 0.0,
205
+ 1.5,
206
+ 1.5,
207
+ 0.6000000238418579,
208
+ 1.5,
209
+ 0.005833799573592792,
210
+ 0.24212055698037147,
211
+ 0.31104624718427587,
212
+ 0.2240664350986478,
213
+ -0.07993344962596893,
214
+ 0.1509539039433002,
215
+ 0.6122507166862484,
216
+ 0.020442928690463276,
217
+ -0.1900009959936142,
218
+ 0.20298720359802205,
219
+ 0.26972131878137506,
220
+ 0.33771990299224813,
221
+ 0.14230648443102825,
222
+ 0.5476555949449537,
223
+ 0.10829514846205711,
224
+ 0.10879010632634158,
225
+ 0.09789865501224988,
226
+ 0.7400000095367432,
227
+ 0.4088541567325592,
228
+ 0.057279629707336424,
229
+ 0.11845016352832313,
230
+ 0.0
231
+ ],
232
+ "mask": [
233
+ true,
234
+ true,
235
+ true,
236
+ true,
237
+ true,
238
+ true,
239
+ true,
240
+ true,
241
+ true,
242
+ true,
243
+ true,
244
+ true,
245
+ true,
246
+ true,
247
+ true,
248
+ true,
249
+ true,
250
+ true,
251
+ true,
252
+ true,
253
+ true,
254
+ true,
255
+ true,
256
+ true,
257
+ true,
258
+ true,
259
+ true,
260
+ true,
261
+ true,
262
+ true,
263
+ true,
264
+ true,
265
+ true,
266
+ true,
267
+ true,
268
+ true
269
+ ]
270
+ },
271
+ "state": {
272
+ "mean": [
273
+ 1.6478608813486062e-05,
274
+ -4.823089329875074e-05,
275
+ -1.5274658835551236e-06,
276
+ 2.0739900719490834e-05,
277
+ -4.8923579015536234e-05,
278
+ 1.184017673949711e-05,
279
+ 1.8281939446751494e-06,
280
+ -0.04951467365026474,
281
+ -0.03387488052248955,
282
+ -0.05985373631119728,
283
+ 0.034982677549123764,
284
+ 0.09988676756620407,
285
+ 0.05889609828591347,
286
+ 0.09540130198001862,
287
+ -0.04761885479092598,
288
+ 0.18126359581947327,
289
+ -0.08981631696224213,
290
+ 0.1303543597459793,
291
+ -0.20870409905910492,
292
+ 0.08360962569713593,
293
+ 0.26360899209976196,
294
+ -0.033612482249736786,
295
+ -0.1811068058013916,
296
+ -0.027028528973460197,
297
+ 0.1747075915336609,
298
+ 0.1506245732307434,
299
+ 0.02000368759036064,
300
+ -0.07187763601541519,
301
+ 0.0014201127924025059,
302
+ 0.06093015894293785,
303
+ 0.007754191290587187,
304
+ 0.6778029799461365
305
+ ],
306
+ "std": [
307
+ 0.0003944706986658275,
308
+ 0.0007630966720171273,
309
+ 2.2961552531342022e-05,
310
+ 0.000179155234945938,
311
+ 0.0010678438702598214,
312
+ 4.7558256483171135e-05,
313
+ 1.1183346941834316e-05,
314
+ 0.12474565207958221,
315
+ 0.07307292520999908,
316
+ 0.14443156123161316,
317
+ 0.09493549168109894,
318
+ 0.2512502074241638,
319
+ 0.1241452693939209,
320
+ 0.23682793974876404,
321
+ 0.049122974276542664,
322
+ 0.0104843201115727,
323
+ 0.1711176037788391,
324
+ 0.05156445503234863,
325
+ 0.05493027716875076,
326
+ 0.05776740238070488,
327
+ 0.10714928060770035,
328
+ 0.06816332787275314,
329
+ 0.01825851947069168,
330
+ 0.10530710965394974,
331
+ 0.059736523777246475,
332
+ 0.06796342134475708,
333
+ 0.05937612056732178,
334
+ 0.1552959680557251,
335
+ 0.03811460733413696,
336
+ 0.06448719650506973,
337
+ 0.028375638648867607,
338
+ 0.08062339574098587
339
+ ],
340
+ "max": [
341
+ 0.013749510049819946,
342
+ 0.0003444451722316444,
343
+ 5.732499630539678e-06,
344
+ 0.0019246992887929082,
345
+ 0.0014607172925025225,
346
+ 0.0007710650679655373,
347
+ 0.0006001993897370994,
348
+ 4.888642592959513e-07,
349
+ 0.06670719385147095,
350
+ 1.4086220971876173e-06,
351
+ 0.43387407064437866,
352
+ 1.2414171695709229,
353
+ 0.6964682936668396,
354
+ 1.2072811126708984,
355
+ 0.0866343304514885,
356
+ 0.2651435434818268,
357
+ 0.49075624346733093,
358
+ 0.34916067123413086,
359
+ -0.06531530618667603,
360
+ 0.2507650554180145,
361
+ 0.9099032282829285,
362
+ 0.07794909924268723,
363
+ -0.15903376042842865,
364
+ 0.29115578532218933,
365
+ 0.48632845282554626,
366
+ 0.4680853486061096,
367
+ 0.40000519156455994,
368
+ 0.7901750206947327,
369
+ 0.11165501922369003,
370
+ 0.1871986985206604,
371
+ 0.15685616433620453,
372
+ 0.7400000095367432
373
+ ],
374
+ "min": [
375
+ -0.00044060105574317276,
376
+ -0.029227260500192642,
377
+ -0.0007062808726914227,
378
+ -0.006396367214620113,
379
+ -0.034731876105070114,
380
+ -0.00020073111227247864,
381
+ -8.215621392082539e-07,
382
+ -0.5499086976051331,
383
+ -0.5100165009498596,
384
+ -0.613179087638855,
385
+ -0.0030598489101976156,
386
+ -0.0002515389060135931,
387
+ -0.00361030176281929,
388
+ -0.003131122561171651,
389
+ -0.30267173051834106,
390
+ 0.162300705909729,
391
+ -0.8084174394607544,
392
+ -0.053157128393650055,
393
+ -0.48188674449920654,
394
+ -0.28324440121650696,
395
+ -0.02153456024825573,
396
+ -0.559512734413147,
397
+ -0.4063037037849426,
398
+ -0.625334620475769,
399
+ -0.17857033014297485,
400
+ -0.14080968499183655,
401
+ -0.3861367404460907,
402
+ -0.2920348048210144,
403
+ -0.0902835875749588,
404
+ -0.1666938215494156,
405
+ -0.07615894079208374,
406
+ 0.44999998807907104
407
+ ],
408
+ "q01": [
409
+ 7.74661926357112e-07,
410
+ -6.3755543715160465e-06,
411
+ -7.83351255222442e-07,
412
+ 3.4136806561946284e-07,
413
+ -0.0008449232077691706,
414
+ 4.5431972239384775e-06,
415
+ 7.721260197968149e-07,
416
+ -0.5028422969579697,
417
+ -0.3325865414738655,
418
+ -0.5741579407453536,
419
+ -1.5824165325284411e-06,
420
+ -1.323924946916577e-07,
421
+ -9.707011122372932e-07,
422
+ -1.9067205457190538e-07,
423
+ -0.1996450574696064,
424
+ 0.1655060650408268,
425
+ -0.6192439311742782,
426
+ 0.0036195464059710497,
427
+ -0.37595251262187956,
428
+ -0.10034843616187572,
429
+ 0.029302983712404963,
430
+ -0.29443797826766965,
431
+ -0.26330254584550855,
432
+ -0.3154676526784897,
433
+ 0.021579700019210574,
434
+ -0.02289357639849186,
435
+ -0.1849268364906311,
436
+ -0.24821986511349678,
437
+ -0.07978948682546616,
438
+ -0.08723165072500706,
439
+ -0.05445469941943884,
440
+ 0.5099999904632568
441
+ ],
442
+ "q99": [
443
+ 2.1163743895158388e-06,
444
+ 3.5946895195593186e-06,
445
+ 9.480705858777559e-07,
446
+ 0.0007039297890150918,
447
+ 0.0005248256213963012,
448
+ 2.909682405515922e-05,
449
+ 3.943643955608417e-06,
450
+ 2.646454402110975e-07,
451
+ 2.4528015819669183e-06,
452
+ 3.882593460957627e-07,
453
+ 0.39650109171867354,
454
+ 1.1101934683322905,
455
+ 0.5173005294799803,
456
+ 1.034569376707077,
457
+ 0.03528875216841695,
458
+ 0.22509524688124644,
459
+ 0.29769810587167667,
460
+ 0.28808553427457806,
461
+ -0.08342873558402061,
462
+ 0.20056841030716893,
463
+ 0.6333562320470806,
464
+ 0.05092002365738146,
465
+ -0.1633606669306755,
466
+ 0.21988036155700677,
467
+ 0.343479991853237,
468
+ 0.2987542548775673,
469
+ 0.12391192510724047,
470
+ 0.48378621041774733,
471
+ 0.09747317329049104,
472
+ 0.14307723090052604,
473
+ 0.09777400560677017,
474
+ 0.7400000095367432
475
+ ]
476
+ },
477
+ "num_transitions": 23664,
478
+ "num_trajectories": 100
479
+ }
480
+ }
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/final_model/pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58eaff18dd7c1a3d468256151321af5e980b6fe219ac8291f98484134d166afd
3
+ size 8604557774
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/summary.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {"steps": 5000}
2
+ {"steps": 10000}
3
+ {"steps": 15000}
4
+ {"steps": 20000}
5
+ {"steps": 25000}
6
+ {"steps": 30000}
7
+ {"steps": 35000}
8
+ {"steps": 40000}