Upload folder using huggingface_hub
Browse files- intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_10000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_15000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_20000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_25000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_30000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_35000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_40000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_5000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyHandover/20260409_053020/config.json +151 -0
- intervla-m1/simple/G1WholebodyHandover/20260409_053020/config.yaml +130 -0
- intervla-m1/simple/G1WholebodyHandover/20260409_053020/dataset_statistics.json +480 -0
- intervla-m1/simple/G1WholebodyHandover/20260409_053020/final_model/pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyHandover/20260409_053020/summary.jsonl +8 -0
- intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_10000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_15000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_20000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_25000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_30000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_35000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_40000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_5000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/config.json +151 -0
- intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/config.yaml +130 -0
- intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/dataset_statistics.json +480 -0
- intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/final_model/pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/summary.jsonl +8 -0
- intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_10000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_15000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_20000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_25000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_30000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_35000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_40000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_5000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/config.json +151 -0
- intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/config.yaml +130 -0
- intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/dataset_statistics.json +480 -0
- intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/final_model/pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/summary.jsonl +8 -0
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_10000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3496810b7e088ebbf3a9499d5f908de154e5c5535615fd2b971644839bc07910
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_15000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1c46a21ba90ad0a1d42830640f817396440e519ad1749f84e5dd41fc0f93773
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_20000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ef9b1b1d89d55bb21bdefce2d312c5891f93c110a692ab37dd52c1137c95492
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_25000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e61b9432e2bd7d371b20b4e8ef23aa88e55c06723b68278d91fa4bfb868f7fe
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_30000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d032d86a01e3d7a34aca50eee8d353700cfaa9d8e3e39627adfca1da6170c90e
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_35000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b61194d366f59d033c52d9d45df3a164d48dc2de03f71ac21256fed9b2fb15ef
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_40000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5de077c53050ef68f06eed06696ded8300cdb24f41b7bd639d6b52905e2eb1e6
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyHandover/20260409_053020/checkpoints/steps_5000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b210d0f52e719aabadd09df6a20ffe8dc5c031d331a7f417f30f83bd2b9918ed
|
| 3 |
+
size 8604574397
|
intervla-m1/simple/G1WholebodyHandover/20260409_053020/config.json
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"run_id": "G1WholebodyHandover",
|
| 3 |
+
"run_root_dir": "runs/InternVLA/Checkpoints",
|
| 4 |
+
"seed": 42,
|
| 5 |
+
"trackers": [
|
| 6 |
+
"jsonl",
|
| 7 |
+
"wandb"
|
| 8 |
+
],
|
| 9 |
+
"wandb_entity": "jliu530-soochow-university",
|
| 10 |
+
"wandb_project": "psi",
|
| 11 |
+
"is_debug": false,
|
| 12 |
+
"framework": {
|
| 13 |
+
"framework_py": "InternVLA-M1",
|
| 14 |
+
"qwenvl": {
|
| 15 |
+
"base_vlm": "Qwen/Qwen2.5-VL-3B-Instruct",
|
| 16 |
+
"attn_implementation": "flash_attention_2",
|
| 17 |
+
"vl_hidden_dim": 2048
|
| 18 |
+
},
|
| 19 |
+
"dino": {
|
| 20 |
+
"dino_backbone": "dinov2_vits14"
|
| 21 |
+
},
|
| 22 |
+
"layer_qformer": {
|
| 23 |
+
"qformer_end_layer": 37,
|
| 24 |
+
"qformer_start_layer": 36,
|
| 25 |
+
"num_query_tokens": 64,
|
| 26 |
+
"input_dim": 2048,
|
| 27 |
+
"ouptput_dim": 768,
|
| 28 |
+
"grad_scale": 0.5
|
| 29 |
+
},
|
| 30 |
+
"action_model": {
|
| 31 |
+
"action_model_type": "DiT-B",
|
| 32 |
+
"action_hidden_dim": 768,
|
| 33 |
+
"action_dim": 36,
|
| 34 |
+
"use_ema": false,
|
| 35 |
+
"future_action_window_size": 15,
|
| 36 |
+
"past_action_window_size": 0,
|
| 37 |
+
"repeated_diffusion_steps": 8
|
| 38 |
+
},
|
| 39 |
+
"fm_head_config": {
|
| 40 |
+
"input_embedding_dim": 1536,
|
| 41 |
+
"hidden_size": 1024,
|
| 42 |
+
"add_pos_embed": true,
|
| 43 |
+
"max_seq_len": 1024,
|
| 44 |
+
"action_dim": 36,
|
| 45 |
+
"future_action_window_size": 15,
|
| 46 |
+
"action_horizon": 16,
|
| 47 |
+
"past_action_window_size": 0,
|
| 48 |
+
"noise_beta_alpha": 1.5,
|
| 49 |
+
"noise_beta_beta": 1.0,
|
| 50 |
+
"noise_s": 0.999,
|
| 51 |
+
"num_timestep_buckets": 1000,
|
| 52 |
+
"num_inference_timesteps": 4,
|
| 53 |
+
"num_target_vision_tokens": 32,
|
| 54 |
+
"diffusion_model_cfg": {
|
| 55 |
+
"attention_head_dim": 48,
|
| 56 |
+
"cross_attention_dim": 2048,
|
| 57 |
+
"dropout": 0.2,
|
| 58 |
+
"final_dropout": true,
|
| 59 |
+
"interleave_self_attention": true,
|
| 60 |
+
"norm_type": "ada_norm",
|
| 61 |
+
"num_attention_heads": 32,
|
| 62 |
+
"num_layers": 16,
|
| 63 |
+
"output_dim": 1024,
|
| 64 |
+
"positional_embeddings": null
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"datasets": {
|
| 69 |
+
"vlm_data": {
|
| 70 |
+
"dataset_py": "vlm_datasets",
|
| 71 |
+
"dataformat": "llava_json",
|
| 72 |
+
"dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
|
| 73 |
+
"eval_dataset": "aokvqa_cauldron_llava_format",
|
| 74 |
+
"data_flatten": false,
|
| 75 |
+
"base_interval": 2,
|
| 76 |
+
"max_pixels": 50176,
|
| 77 |
+
"min_pixels": 784,
|
| 78 |
+
"model_max_length": 2048,
|
| 79 |
+
"model_type": "qwen2.5vl",
|
| 80 |
+
"per_device_batch_size": 4
|
| 81 |
+
},
|
| 82 |
+
"vla_data": {
|
| 83 |
+
"dataset_py": "lerobot_datasets",
|
| 84 |
+
"data_root_dir": "/data/jliu/data/G1WholebodyHandover-v0",
|
| 85 |
+
"data_mix": "humanoid_",
|
| 86 |
+
"action_type": "abs_joints",
|
| 87 |
+
"CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
|
| 88 |
+
"CoT_answer": "bbox",
|
| 89 |
+
"default_image_resolution": [
|
| 90 |
+
3,
|
| 91 |
+
224,
|
| 92 |
+
224
|
| 93 |
+
],
|
| 94 |
+
"per_device_batch_size": 64,
|
| 95 |
+
"preload_all": true,
|
| 96 |
+
"load_all_data_for_training": true,
|
| 97 |
+
"obs": [
|
| 98 |
+
"image_0"
|
| 99 |
+
],
|
| 100 |
+
"image_size": [
|
| 101 |
+
224,
|
| 102 |
+
224
|
| 103 |
+
]
|
| 104 |
+
}
|
| 105 |
+
},
|
| 106 |
+
"trainer": {
|
| 107 |
+
"epochs": 100,
|
| 108 |
+
"max_train_steps": 40000,
|
| 109 |
+
"num_warmup_steps": 0,
|
| 110 |
+
"save_interval": 5000,
|
| 111 |
+
"eval_interval": 100,
|
| 112 |
+
"learning_rate": {
|
| 113 |
+
"base": 5e-05,
|
| 114 |
+
"qwen_vl_interface": 1e-05,
|
| 115 |
+
"action_model": 0.0001
|
| 116 |
+
},
|
| 117 |
+
"lr_scheduler_type": "cosine_with_min_lr",
|
| 118 |
+
"scheduler_specific_kwargs": {
|
| 119 |
+
"min_lr": 5e-07
|
| 120 |
+
},
|
| 121 |
+
"freeze_modules": "qwen_vl_interface,layer_qformer,dino_encoder,dino_pro",
|
| 122 |
+
"loss_scale": {
|
| 123 |
+
"vla": 1.0,
|
| 124 |
+
"vlm": 0.1
|
| 125 |
+
},
|
| 126 |
+
"pretrained_checkpoint": "/hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt",
|
| 127 |
+
"skip_reload_modules": "action_model",
|
| 128 |
+
"repeated_diffusion_steps": 4,
|
| 129 |
+
"max_grad_norm": 1.0,
|
| 130 |
+
"warmup_ratio": 0.1,
|
| 131 |
+
"weight_decay": 0.0,
|
| 132 |
+
"logging_frequency": 10,
|
| 133 |
+
"gradient_clipping": 1.0,
|
| 134 |
+
"gradient_accumulation_steps": 1,
|
| 135 |
+
"optimizer": {
|
| 136 |
+
"name": "AdamW",
|
| 137 |
+
"betas": [
|
| 138 |
+
0.9,
|
| 139 |
+
0.95
|
| 140 |
+
],
|
| 141 |
+
"eps": 1e-08,
|
| 142 |
+
"weight_decay": 1e-08
|
| 143 |
+
},
|
| 144 |
+
"is_resume": false,
|
| 145 |
+
"resume_epoch": null,
|
| 146 |
+
"resume_step": null,
|
| 147 |
+
"enable_gradient_checkpointing": true,
|
| 148 |
+
"enable_mixed_precision_training": true
|
| 149 |
+
},
|
| 150 |
+
"output_dir": "runs/InternVLA/Checkpoints/G1WholebodyHandover/20260409_053020"
|
| 151 |
+
}
|
intervla-m1/simple/G1WholebodyHandover/20260409_053020/config.yaml
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run_id: G1WholebodyHandover
|
| 2 |
+
run_root_dir: runs/InternVLA/Checkpoints
|
| 3 |
+
seed: 42
|
| 4 |
+
trackers:
|
| 5 |
+
- jsonl
|
| 6 |
+
- wandb
|
| 7 |
+
wandb_entity: jliu530-soochow-university
|
| 8 |
+
wandb_project: psi
|
| 9 |
+
is_debug: false
|
| 10 |
+
framework:
|
| 11 |
+
framework_py: InternVLA-M1
|
| 12 |
+
qwenvl:
|
| 13 |
+
base_vlm: Qwen/Qwen2.5-VL-3B-Instruct
|
| 14 |
+
attn_implementation: flash_attention_2
|
| 15 |
+
vl_hidden_dim: 2048
|
| 16 |
+
dino:
|
| 17 |
+
dino_backbone: dinov2_vits14
|
| 18 |
+
layer_qformer:
|
| 19 |
+
qformer_end_layer: 37
|
| 20 |
+
qformer_start_layer: 36
|
| 21 |
+
num_query_tokens: 64
|
| 22 |
+
input_dim: 2048
|
| 23 |
+
ouptput_dim: 768
|
| 24 |
+
grad_scale: 0.5
|
| 25 |
+
action_model:
|
| 26 |
+
action_model_type: DiT-B
|
| 27 |
+
action_hidden_dim: 768
|
| 28 |
+
action_dim: 36
|
| 29 |
+
use_ema: false
|
| 30 |
+
future_action_window_size: 15
|
| 31 |
+
past_action_window_size: 0
|
| 32 |
+
repeated_diffusion_steps: 8
|
| 33 |
+
fm_head_config:
|
| 34 |
+
input_embedding_dim: 1536
|
| 35 |
+
hidden_size: 1024
|
| 36 |
+
add_pos_embed: true
|
| 37 |
+
max_seq_len: 1024
|
| 38 |
+
action_dim: 36
|
| 39 |
+
future_action_window_size: 15
|
| 40 |
+
action_horizon: 16
|
| 41 |
+
past_action_window_size: 0
|
| 42 |
+
noise_beta_alpha: 1.5
|
| 43 |
+
noise_beta_beta: 1.0
|
| 44 |
+
noise_s: 0.999
|
| 45 |
+
num_timestep_buckets: 1000
|
| 46 |
+
num_inference_timesteps: 4
|
| 47 |
+
num_target_vision_tokens: 32
|
| 48 |
+
diffusion_model_cfg:
|
| 49 |
+
attention_head_dim: 48
|
| 50 |
+
cross_attention_dim: 2048
|
| 51 |
+
dropout: 0.2
|
| 52 |
+
final_dropout: true
|
| 53 |
+
interleave_self_attention: true
|
| 54 |
+
norm_type: ada_norm
|
| 55 |
+
num_attention_heads: 32
|
| 56 |
+
num_layers: 16
|
| 57 |
+
output_dim: 1024
|
| 58 |
+
positional_embeddings: null
|
| 59 |
+
datasets:
|
| 60 |
+
vlm_data:
|
| 61 |
+
dataset_py: vlm_datasets
|
| 62 |
+
dataformat: llava_json
|
| 63 |
+
dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
|
| 64 |
+
eval_dataset: aokvqa_cauldron_llava_format
|
| 65 |
+
data_flatten: false
|
| 66 |
+
base_interval: 2
|
| 67 |
+
max_pixels: 50176
|
| 68 |
+
min_pixels: 784
|
| 69 |
+
model_max_length: 2048
|
| 70 |
+
model_type: qwen2.5vl
|
| 71 |
+
per_device_batch_size: 4
|
| 72 |
+
vla_data:
|
| 73 |
+
dataset_py: lerobot_datasets
|
| 74 |
+
data_root_dir: /data/jliu/data/G1WholebodyHandover-v0
|
| 75 |
+
data_mix: humanoid_
|
| 76 |
+
action_type: abs_joints
|
| 77 |
+
CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
|
| 78 |
+
Locate their bounding boxes in [x1,y1,x2,y2] format.
|
| 79 |
+
CoT_answer: bbox
|
| 80 |
+
default_image_resolution:
|
| 81 |
+
- 3
|
| 82 |
+
- 224
|
| 83 |
+
- 224
|
| 84 |
+
per_device_batch_size: 64
|
| 85 |
+
preload_all: true
|
| 86 |
+
load_all_data_for_training: true
|
| 87 |
+
obs:
|
| 88 |
+
- image_0
|
| 89 |
+
image_size:
|
| 90 |
+
- 224
|
| 91 |
+
- 224
|
| 92 |
+
trainer:
|
| 93 |
+
epochs: 100
|
| 94 |
+
max_train_steps: 40000
|
| 95 |
+
num_warmup_steps: 0
|
| 96 |
+
save_interval: 5000
|
| 97 |
+
eval_interval: 100
|
| 98 |
+
learning_rate:
|
| 99 |
+
base: 5.0e-05
|
| 100 |
+
qwen_vl_interface: 1.0e-05
|
| 101 |
+
action_model: 0.0001
|
| 102 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 103 |
+
scheduler_specific_kwargs:
|
| 104 |
+
min_lr: 5.0e-07
|
| 105 |
+
freeze_modules: qwen_vl_interface,layer_qformer,dino_encoder,dino_pro
|
| 106 |
+
loss_scale:
|
| 107 |
+
vla: 1.0
|
| 108 |
+
vlm: 0.1
|
| 109 |
+
pretrained_checkpoint: /hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt
|
| 110 |
+
skip_reload_modules: action_model
|
| 111 |
+
repeated_diffusion_steps: 4
|
| 112 |
+
max_grad_norm: 1.0
|
| 113 |
+
warmup_ratio: 0.1
|
| 114 |
+
weight_decay: 0.0
|
| 115 |
+
logging_frequency: 10
|
| 116 |
+
gradient_clipping: 1.0
|
| 117 |
+
gradient_accumulation_steps: 1
|
| 118 |
+
optimizer:
|
| 119 |
+
name: AdamW
|
| 120 |
+
betas:
|
| 121 |
+
- 0.9
|
| 122 |
+
- 0.95
|
| 123 |
+
eps: 1.0e-08
|
| 124 |
+
weight_decay: 1.0e-08
|
| 125 |
+
is_resume: false
|
| 126 |
+
resume_epoch: null
|
| 127 |
+
resume_step: null
|
| 128 |
+
enable_gradient_checkpointing: true
|
| 129 |
+
enable_mixed_precision_training: true
|
| 130 |
+
output_dir: runs/InternVLA/Checkpoints/G1WholebodyHandover/20260409_053020
|
intervla-m1/simple/G1WholebodyHandover/20260409_053020/dataset_statistics.json
ADDED
|
@@ -0,0 +1,480 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"new_embodiment": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
-0.217879056930542,
|
| 6 |
+
0.33495163917541504,
|
| 7 |
+
0.33495163917541504,
|
| 8 |
+
-0.30543649196624756,
|
| 9 |
+
-0.7176172733306885,
|
| 10 |
+
-0.6976513862609863,
|
| 11 |
+
-0.7176172733306885,
|
| 12 |
+
-0.16062557697296143,
|
| 13 |
+
-0.22490878403186798,
|
| 14 |
+
-0.22490878403186798,
|
| 15 |
+
0.48187702894210815,
|
| 16 |
+
0.48187702894210815,
|
| 17 |
+
0.19276303052902222,
|
| 18 |
+
0.48187702894210815,
|
| 19 |
+
-0.06166350468993187,
|
| 20 |
+
0.2401788830757141,
|
| 21 |
+
0.0873611643910408,
|
| 22 |
+
-0.09128011763095856,
|
| 23 |
+
-0.06374146789312363,
|
| 24 |
+
-0.03256600350141525,
|
| 25 |
+
-0.012831903994083405,
|
| 26 |
+
-0.09480268508195877,
|
| 27 |
+
-0.2054116129875183,
|
| 28 |
+
0.14932410418987274,
|
| 29 |
+
-0.1460893154144287,
|
| 30 |
+
-0.02532443404197693,
|
| 31 |
+
0.13397766649723053,
|
| 32 |
+
0.06257354468107224,
|
| 33 |
+
-0.000577143335249275,
|
| 34 |
+
0.02542431280016899,
|
| 35 |
+
-0.01923290081322193,
|
| 36 |
+
0.7401506304740906,
|
| 37 |
+
0.009205126203596592,
|
| 38 |
+
0.023015793412923813,
|
| 39 |
+
0.00010224639845546335,
|
| 40 |
+
0.0025475628208369017
|
| 41 |
+
],
|
| 42 |
+
"std": [
|
| 43 |
+
0.2612758278846741,
|
| 44 |
+
0.349688321352005,
|
| 45 |
+
0.349688321352005,
|
| 46 |
+
0.3383631706237793,
|
| 47 |
+
0.7491674423217773,
|
| 48 |
+
0.7378979921340942,
|
| 49 |
+
0.7491674423217773,
|
| 50 |
+
0.23291678726673126,
|
| 51 |
+
0.3260721266269684,
|
| 52 |
+
0.3260721266269684,
|
| 53 |
+
0.6987663507461548,
|
| 54 |
+
0.6987663507461548,
|
| 55 |
+
0.279427170753479,
|
| 56 |
+
0.6987663507461548,
|
| 57 |
+
0.09250971674919128,
|
| 58 |
+
0.08452950417995453,
|
| 59 |
+
0.408634215593338,
|
| 60 |
+
0.1649845391511917,
|
| 61 |
+
0.19536836445331573,
|
| 62 |
+
0.14846064150333405,
|
| 63 |
+
0.26794546842575073,
|
| 64 |
+
0.08737793564796448,
|
| 65 |
+
0.024206371977925228,
|
| 66 |
+
0.14024904370307922,
|
| 67 |
+
0.24467714130878448,
|
| 68 |
+
0.19793805480003357,
|
| 69 |
+
0.18220646679401398,
|
| 70 |
+
0.17332760989665985,
|
| 71 |
+
0.025831403210759163,
|
| 72 |
+
0.04043002799153328,
|
| 73 |
+
0.07628294080495834,
|
| 74 |
+
0.00015065705521256548,
|
| 75 |
+
0.09775390475988388,
|
| 76 |
+
0.09573811292648315,
|
| 77 |
+
0.04227162525057793,
|
| 78 |
+
0.026238275691866875
|
| 79 |
+
],
|
| 80 |
+
"max": [
|
| 81 |
+
0.5,
|
| 82 |
+
0.699999988079071,
|
| 83 |
+
0.699999988079071,
|
| 84 |
+
0.0,
|
| 85 |
+
2.2146225653890418e-16,
|
| 86 |
+
2.2146225653890418e-16,
|
| 87 |
+
2.2146225653890418e-16,
|
| 88 |
+
1.2266071310501902e-19,
|
| 89 |
+
1.1078670818917075e-16,
|
| 90 |
+
1.1078670818917075e-16,
|
| 91 |
+
1.5,
|
| 92 |
+
1.5,
|
| 93 |
+
0.6000000238418579,
|
| 94 |
+
1.5,
|
| 95 |
+
0.2472410947084427,
|
| 96 |
+
0.7092280983924866,
|
| 97 |
+
1.2571598291397095,
|
| 98 |
+
0.42311304807662964,
|
| 99 |
+
0.8564174771308899,
|
| 100 |
+
0.5002086162567139,
|
| 101 |
+
0.5172277092933655,
|
| 102 |
+
0.16140148043632507,
|
| 103 |
+
-0.1900009959936142,
|
| 104 |
+
0.5362864136695862,
|
| 105 |
+
0.5715147256851196,
|
| 106 |
+
0.5002322196960449,
|
| 107 |
+
0.566592276096344,
|
| 108 |
+
0.6392397880554199,
|
| 109 |
+
0.1580466777086258,
|
| 110 |
+
0.2233395129442215,
|
| 111 |
+
0.2582152187824249,
|
| 112 |
+
0.7400000095367432,
|
| 113 |
+
0.5,
|
| 114 |
+
0.5,
|
| 115 |
+
0.3454970121383667,
|
| 116 |
+
0.2899305522441864
|
| 117 |
+
],
|
| 118 |
+
"min": [
|
| 119 |
+
-0.5,
|
| 120 |
+
-1.1095792134107943e-16,
|
| 121 |
+
-1.1095792134107943e-16,
|
| 122 |
+
-1.5,
|
| 123 |
+
-1.5,
|
| 124 |
+
-1.5,
|
| 125 |
+
-1.5,
|
| 126 |
+
-0.5,
|
| 127 |
+
-0.699999988079071,
|
| 128 |
+
-0.699999988079071,
|
| 129 |
+
-2.2166350627321588e-16,
|
| 130 |
+
-2.2166350627321588e-16,
|
| 131 |
+
0.0,
|
| 132 |
+
-2.2166350627321588e-16,
|
| 133 |
+
-0.47567468881607056,
|
| 134 |
+
0.1900009959936142,
|
| 135 |
+
-0.512170135974884,
|
| 136 |
+
-0.6265152096748352,
|
| 137 |
+
-0.5008617043495178,
|
| 138 |
+
-0.8220608830451965,
|
| 139 |
+
-0.9223371148109436,
|
| 140 |
+
-0.49507391452789307,
|
| 141 |
+
-0.3437551259994507,
|
| 142 |
+
-0.6871383190155029,
|
| 143 |
+
-0.7637607455253601,
|
| 144 |
+
-0.7568023204803467,
|
| 145 |
+
-0.576077401638031,
|
| 146 |
+
-0.4588268995285034,
|
| 147 |
+
-0.13876836001873016,
|
| 148 |
+
-0.10360867530107498,
|
| 149 |
+
-0.47856518626213074,
|
| 150 |
+
0.7400000095367432,
|
| 151 |
+
-0.5,
|
| 152 |
+
-0.5,
|
| 153 |
+
-0.26161932945251465,
|
| 154 |
+
-0.06718750298023224
|
| 155 |
+
],
|
| 156 |
+
"q01": [
|
| 157 |
+
-0.5,
|
| 158 |
+
0.0,
|
| 159 |
+
0.0,
|
| 160 |
+
-1.5,
|
| 161 |
+
-1.5,
|
| 162 |
+
-1.5,
|
| 163 |
+
-1.5,
|
| 164 |
+
-0.5,
|
| 165 |
+
-0.699999988079071,
|
| 166 |
+
-0.699999988079071,
|
| 167 |
+
0.0,
|
| 168 |
+
0.0,
|
| 169 |
+
0.0,
|
| 170 |
+
0.0,
|
| 171 |
+
-0.3209294053912163,
|
| 172 |
+
0.1900009959936142,
|
| 173 |
+
-0.38803558617830275,
|
| 174 |
+
-0.5046620488166809,
|
| 175 |
+
-0.38927449703216555,
|
| 176 |
+
-0.47090124636888503,
|
| 177 |
+
-0.7163057714700699,
|
| 178 |
+
-0.3420322224497795,
|
| 179 |
+
-0.29934623271226884,
|
| 180 |
+
-0.26393272846937177,
|
| 181 |
+
-0.6705281788110733,
|
| 182 |
+
-0.5924341869354248,
|
| 183 |
+
-0.40590299278497693,
|
| 184 |
+
-0.32045080602169035,
|
| 185 |
+
-0.07038286864757538,
|
| 186 |
+
-0.07046280093491078,
|
| 187 |
+
-0.2561952766776085,
|
| 188 |
+
0.7400000095367432,
|
| 189 |
+
-0.3524305522441864,
|
| 190 |
+
0.0,
|
| 191 |
+
-0.1027187518030405,
|
| 192 |
+
0.0
|
| 193 |
+
],
|
| 194 |
+
"q99": [
|
| 195 |
+
0.5,
|
| 196 |
+
0.699999988079071,
|
| 197 |
+
0.699999988079071,
|
| 198 |
+
0.0,
|
| 199 |
+
0.0,
|
| 200 |
+
0.0,
|
| 201 |
+
0.0,
|
| 202 |
+
0.0,
|
| 203 |
+
0.0,
|
| 204 |
+
0.0,
|
| 205 |
+
1.5,
|
| 206 |
+
1.5,
|
| 207 |
+
0.6000000238418579,
|
| 208 |
+
1.5,
|
| 209 |
+
0.14089947253465648,
|
| 210 |
+
0.580883502960205,
|
| 211 |
+
1.1797874009609222,
|
| 212 |
+
0.25774784147739405,
|
| 213 |
+
0.5666770941019057,
|
| 214 |
+
0.34745706409215893,
|
| 215 |
+
0.339419822692871,
|
| 216 |
+
0.08011209599673746,
|
| 217 |
+
-0.1900009959936142,
|
| 218 |
+
0.40124923735857004,
|
| 219 |
+
0.3766537192463873,
|
| 220 |
+
0.41124969720840454,
|
| 221 |
+
0.48994380980730057,
|
| 222 |
+
0.4618227949738502,
|
| 223 |
+
0.09533960297703735,
|
| 224 |
+
0.12665506854653355,
|
| 225 |
+
0.11710006609559051,
|
| 226 |
+
0.7400000095367432,
|
| 227 |
+
0.4913194477558136,
|
| 228 |
+
0.5,
|
| 229 |
+
0.10739764258265483,
|
| 230 |
+
0.12076389044523239
|
| 231 |
+
],
|
| 232 |
+
"mask": [
|
| 233 |
+
true,
|
| 234 |
+
true,
|
| 235 |
+
true,
|
| 236 |
+
true,
|
| 237 |
+
true,
|
| 238 |
+
true,
|
| 239 |
+
true,
|
| 240 |
+
true,
|
| 241 |
+
true,
|
| 242 |
+
true,
|
| 243 |
+
true,
|
| 244 |
+
true,
|
| 245 |
+
true,
|
| 246 |
+
true,
|
| 247 |
+
true,
|
| 248 |
+
true,
|
| 249 |
+
true,
|
| 250 |
+
true,
|
| 251 |
+
true,
|
| 252 |
+
true,
|
| 253 |
+
true,
|
| 254 |
+
true,
|
| 255 |
+
true,
|
| 256 |
+
true,
|
| 257 |
+
true,
|
| 258 |
+
true,
|
| 259 |
+
true,
|
| 260 |
+
true,
|
| 261 |
+
true,
|
| 262 |
+
true,
|
| 263 |
+
true,
|
| 264 |
+
true,
|
| 265 |
+
true,
|
| 266 |
+
true,
|
| 267 |
+
true,
|
| 268 |
+
true
|
| 269 |
+
]
|
| 270 |
+
},
|
| 271 |
+
"state": {
|
| 272 |
+
"mean": [
|
| 273 |
+
-0.2155175358057022,
|
| 274 |
+
-0.02219489961862564,
|
| 275 |
+
0.28812506794929504,
|
| 276 |
+
-0.1511061191558838,
|
| 277 |
+
-0.5701737403869629,
|
| 278 |
+
-0.21330925822257996,
|
| 279 |
+
-0.6113156676292419,
|
| 280 |
+
-0.17896264791488647,
|
| 281 |
+
-0.017788594588637352,
|
| 282 |
+
-0.20973540842533112,
|
| 283 |
+
0.1504911184310913,
|
| 284 |
+
0.4522649943828583,
|
| 285 |
+
0.16520608961582184,
|
| 286 |
+
0.4278402328491211,
|
| 287 |
+
-0.025824211537837982,
|
| 288 |
+
0.2251066416501999,
|
| 289 |
+
0.06837588548660278,
|
| 290 |
+
-0.006100596394389868,
|
| 291 |
+
-0.057470113039016724,
|
| 292 |
+
0.03059970773756504,
|
| 293 |
+
-0.014116800390183926,
|
| 294 |
+
-0.05716487765312195,
|
| 295 |
+
-0.1999409943819046,
|
| 296 |
+
0.16329504549503326,
|
| 297 |
+
-0.06425096839666367,
|
| 298 |
+
-0.031190501525998116,
|
| 299 |
+
0.18948617577552795,
|
| 300 |
+
0.06857924908399582,
|
| 301 |
+
-0.005298840347677469,
|
| 302 |
+
0.06115104258060455,
|
| 303 |
+
-0.01866256073117256,
|
| 304 |
+
0.7401506304740906
|
| 305 |
+
],
|
| 306 |
+
"std": [
|
| 307 |
+
0.25221753120422363,
|
| 308 |
+
0.0899457111954689,
|
| 309 |
+
0.2992278039455414,
|
| 310 |
+
0.17489215731620789,
|
| 311 |
+
0.6001524329185486,
|
| 312 |
+
0.2285156548023224,
|
| 313 |
+
0.6428972482681274,
|
| 314 |
+
0.2302001267671585,
|
| 315 |
+
0.07764989882707596,
|
| 316 |
+
0.2707969546318054,
|
| 317 |
+
0.16321305930614471,
|
| 318 |
+
0.5522690415382385,
|
| 319 |
+
0.2336428165435791,
|
| 320 |
+
0.5221257209777832,
|
| 321 |
+
0.08843120187520981,
|
| 322 |
+
0.07218267768621445,
|
| 323 |
+
0.4016489088535309,
|
| 324 |
+
0.16209223866462708,
|
| 325 |
+
0.192921981215477,
|
| 326 |
+
0.14427520334720612,
|
| 327 |
+
0.26766741275787354,
|
| 328 |
+
0.08319389075040817,
|
| 329 |
+
0.02173873409628868,
|
| 330 |
+
0.13788215816020966,
|
| 331 |
+
0.24289271235466003,
|
| 332 |
+
0.1938791275024414,
|
| 333 |
+
0.17991188168525696,
|
| 334 |
+
0.174061119556427,
|
| 335 |
+
0.025640971958637238,
|
| 336 |
+
0.041976913809776306,
|
| 337 |
+
0.0752870962023735,
|
| 338 |
+
0.00015065705521256548
|
| 339 |
+
],
|
| 340 |
+
"max": [
|
| 341 |
+
0.43566983938217163,
|
| 342 |
+
0.3739710748195648,
|
| 343 |
+
0.6575677990913391,
|
| 344 |
+
0.004060761071741581,
|
| 345 |
+
0.0005700877518393099,
|
| 346 |
+
0.0004725759499706328,
|
| 347 |
+
0.00010080631182063371,
|
| 348 |
+
1.310737025050912e-05,
|
| 349 |
+
0.21882089972496033,
|
| 350 |
+
0.0005271440604701638,
|
| 351 |
+
0.530737042427063,
|
| 352 |
+
1.4406861066818237,
|
| 353 |
+
1.4605127573013306,
|
| 354 |
+
1.4595911502838135,
|
| 355 |
+
0.2663630545139313,
|
| 356 |
+
0.657910943031311,
|
| 357 |
+
1.2515853643417358,
|
| 358 |
+
0.502498209476471,
|
| 359 |
+
0.8292973637580872,
|
| 360 |
+
0.5248894095420837,
|
| 361 |
+
0.4653257131576538,
|
| 362 |
+
0.18638382852077484,
|
| 363 |
+
-0.16696421802043915,
|
| 364 |
+
0.49318820238113403,
|
| 365 |
+
0.6363148093223572,
|
| 366 |
+
0.45773962140083313,
|
| 367 |
+
0.6238265037536621,
|
| 368 |
+
0.653800904750824,
|
| 369 |
+
0.1436084657907486,
|
| 370 |
+
0.25937986373901367,
|
| 371 |
+
0.26422709226608276,
|
| 372 |
+
0.7400000095367432
|
| 373 |
+
],
|
| 374 |
+
"min": [
|
| 375 |
+
-0.5564982891082764,
|
| 376 |
+
-0.48307520151138306,
|
| 377 |
+
-0.0005447770818136632,
|
| 378 |
+
-0.8388738632202148,
|
| 379 |
+
-1.3970016241073608,
|
| 380 |
+
-0.8296014666557312,
|
| 381 |
+
-1.4599460363388062,
|
| 382 |
+
-0.5806806683540344,
|
| 383 |
+
-0.5149835348129272,
|
| 384 |
+
-0.6775947213172913,
|
| 385 |
+
-0.001480442238971591,
|
| 386 |
+
-0.0002713006397243589,
|
| 387 |
+
-0.000914653530344367,
|
| 388 |
+
-0.00019419840828049928,
|
| 389 |
+
-0.4206617772579193,
|
| 390 |
+
0.13972464203834534,
|
| 391 |
+
-0.546251654624939,
|
| 392 |
+
-0.5596316456794739,
|
| 393 |
+
-0.4764360189437866,
|
| 394 |
+
-0.7253566384315491,
|
| 395 |
+
-0.9443663954734802,
|
| 396 |
+
-0.4381798803806305,
|
| 397 |
+
-0.3338131606578827,
|
| 398 |
+
-0.667724072933197,
|
| 399 |
+
-0.6881827116012573,
|
| 400 |
+
-0.7544379830360413,
|
| 401 |
+
-0.5189417600631714,
|
| 402 |
+
-0.4484957158565521,
|
| 403 |
+
-0.13709338009357452,
|
| 404 |
+
-0.07360810041427612,
|
| 405 |
+
-0.4748336970806122,
|
| 406 |
+
0.7400000095367432
|
| 407 |
+
],
|
| 408 |
+
"q01": [
|
| 409 |
+
-0.545208849310875,
|
| 410 |
+
-0.42749745190143584,
|
| 411 |
+
-0.0005246381351025775,
|
| 412 |
+
-0.6480066239833832,
|
| 413 |
+
-1.3562620949745179,
|
| 414 |
+
-0.7743040478229523,
|
| 415 |
+
-1.4221707606315612,
|
| 416 |
+
-0.5719072341918945,
|
| 417 |
+
-0.324733624458313,
|
| 418 |
+
-0.6709954166412353,
|
| 419 |
+
-1.057923989264964e-06,
|
| 420 |
+
-1.457349050326684e-07,
|
| 421 |
+
-1.5106486750937617e-06,
|
| 422 |
+
-2.423548727392699e-07,
|
| 423 |
+
-0.27424134463071825,
|
| 424 |
+
0.15975838720798494,
|
| 425 |
+
-0.40353597432374955,
|
| 426 |
+
-0.4082282695174217,
|
| 427 |
+
-0.3712728089094162,
|
| 428 |
+
-0.40305238008499145,
|
| 429 |
+
-0.7383889842033386,
|
| 430 |
+
-0.2908404359221458,
|
| 431 |
+
-0.2874874520301819,
|
| 432 |
+
-0.24262819081544876,
|
| 433 |
+
-0.6050335317850113,
|
| 434 |
+
-0.5883933693170548,
|
| 435 |
+
-0.33852073848247527,
|
| 436 |
+
-0.31927637457847596,
|
| 437 |
+
-0.07735681585967541,
|
| 438 |
+
-0.042391608729958535,
|
| 439 |
+
-0.25428820788860323,
|
| 440 |
+
0.7400000095367432
|
| 441 |
+
],
|
| 442 |
+
"q99": [
|
| 443 |
+
0.40327705115079837,
|
| 444 |
+
0.2348814429342737,
|
| 445 |
+
0.6518104630708694,
|
| 446 |
+
0.00030119536590063946,
|
| 447 |
+
0.0005192926508607343,
|
| 448 |
+
0.0001505175937199965,
|
| 449 |
+
2.3595025995746256e-05,
|
| 450 |
+
5.185912332308361e-06,
|
| 451 |
+
0.08334404386579981,
|
| 452 |
+
0.00022139013104606418,
|
| 453 |
+
0.44970364242792127,
|
| 454 |
+
1.3850609183311462,
|
| 455 |
+
1.4514530301094055,
|
| 456 |
+
1.4533516800403594,
|
| 457 |
+
0.16941204354166983,
|
| 458 |
+
0.5226882100105286,
|
| 459 |
+
1.1606279826164243,
|
| 460 |
+
0.33347084760665896,
|
| 461 |
+
0.5583706372976303,
|
| 462 |
+
0.3959252551198003,
|
| 463 |
+
0.3333164182305336,
|
| 464 |
+
0.1102284654974937,
|
| 465 |
+
-0.1776231697201729,
|
| 466 |
+
0.40905793011188507,
|
| 467 |
+
0.44925396174192406,
|
| 468 |
+
0.3893393576145172,
|
| 469 |
+
0.5468983370065689,
|
| 470 |
+
0.474879567325115,
|
| 471 |
+
0.08610126286745068,
|
| 472 |
+
0.1693275338411331,
|
| 473 |
+
0.11660626158118245,
|
| 474 |
+
0.7400000095367432
|
| 475 |
+
]
|
| 476 |
+
},
|
| 477 |
+
"num_transitions": 45530,
|
| 478 |
+
"num_trajectories": 100
|
| 479 |
+
}
|
| 480 |
+
}
|
intervla-m1/simple/G1WholebodyHandover/20260409_053020/final_model/pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e208035472e5474bac095beed97fb98fabc9423207afb4ce872a77f68efb0418
|
| 3 |
+
size 8604557774
|
intervla-m1/simple/G1WholebodyHandover/20260409_053020/summary.jsonl
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"steps": 5000}
|
| 2 |
+
{"steps": 10000}
|
| 3 |
+
{"steps": 15000}
|
| 4 |
+
{"steps": 20000}
|
| 5 |
+
{"steps": 25000}
|
| 6 |
+
{"steps": 30000}
|
| 7 |
+
{"steps": 35000}
|
| 8 |
+
{"steps": 40000}
|
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_10000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b22aeeb726c35af16a4b2648560d30867903090efb032011d21b73597620d0b
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_15000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ff18dfa42f23cffbe8bf20767cb45455f8e4ebaee7e2983f665081809ba5054
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_20000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d8b75f78f58f3529a74a8da5d91c29e7ab83a9e54f59bccc26407dfd649e58f
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_25000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:593e5f1f120302800b15e759aa5d3fcdcdd715adb08a38b9b8dc5b7c328f756b
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_30000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9bb2ca0788a6ffcc07c0c6f6b4a4212127f349b161cffc15aa3d26c7e5164555
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_35000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df7251b81bf2de426cd479000063b168734004441353b65c638703039ea295df
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_40000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ed48be8d9fdae3547ac2f4b9bedc3dc822e1bf72d72d65c3f62976f44fcaf8d
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/checkpoints/steps_5000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f1d99ec43b59b95843747d77328a8d4d2abd51115f47e3daa4cfdfda1e07b03
|
| 3 |
+
size 8604574397
|
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/config.json
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"run_id": "G1WholebodyLocomotionPickBetweenTablesTeleop",
|
| 3 |
+
"run_root_dir": "runs/InternVLA/Checkpoints",
|
| 4 |
+
"seed": 42,
|
| 5 |
+
"trackers": [
|
| 6 |
+
"jsonl",
|
| 7 |
+
"wandb"
|
| 8 |
+
],
|
| 9 |
+
"wandb_entity": "jliu530-soochow-university",
|
| 10 |
+
"wandb_project": "psi",
|
| 11 |
+
"is_debug": false,
|
| 12 |
+
"framework": {
|
| 13 |
+
"framework_py": "InternVLA-M1",
|
| 14 |
+
"qwenvl": {
|
| 15 |
+
"base_vlm": "Qwen/Qwen2.5-VL-3B-Instruct",
|
| 16 |
+
"attn_implementation": "flash_attention_2",
|
| 17 |
+
"vl_hidden_dim": 2048
|
| 18 |
+
},
|
| 19 |
+
"dino": {
|
| 20 |
+
"dino_backbone": "dinov2_vits14"
|
| 21 |
+
},
|
| 22 |
+
"layer_qformer": {
|
| 23 |
+
"qformer_end_layer": 37,
|
| 24 |
+
"qformer_start_layer": 36,
|
| 25 |
+
"num_query_tokens": 64,
|
| 26 |
+
"input_dim": 2048,
|
| 27 |
+
"ouptput_dim": 768,
|
| 28 |
+
"grad_scale": 0.5
|
| 29 |
+
},
|
| 30 |
+
"action_model": {
|
| 31 |
+
"action_model_type": "DiT-B",
|
| 32 |
+
"action_hidden_dim": 768,
|
| 33 |
+
"action_dim": 36,
|
| 34 |
+
"use_ema": false,
|
| 35 |
+
"future_action_window_size": 15,
|
| 36 |
+
"past_action_window_size": 0,
|
| 37 |
+
"repeated_diffusion_steps": 8
|
| 38 |
+
},
|
| 39 |
+
"fm_head_config": {
|
| 40 |
+
"input_embedding_dim": 1536,
|
| 41 |
+
"hidden_size": 1024,
|
| 42 |
+
"add_pos_embed": true,
|
| 43 |
+
"max_seq_len": 1024,
|
| 44 |
+
"action_dim": 36,
|
| 45 |
+
"future_action_window_size": 15,
|
| 46 |
+
"action_horizon": 16,
|
| 47 |
+
"past_action_window_size": 0,
|
| 48 |
+
"noise_beta_alpha": 1.5,
|
| 49 |
+
"noise_beta_beta": 1.0,
|
| 50 |
+
"noise_s": 0.999,
|
| 51 |
+
"num_timestep_buckets": 1000,
|
| 52 |
+
"num_inference_timesteps": 4,
|
| 53 |
+
"num_target_vision_tokens": 32,
|
| 54 |
+
"diffusion_model_cfg": {
|
| 55 |
+
"attention_head_dim": 48,
|
| 56 |
+
"cross_attention_dim": 2048,
|
| 57 |
+
"dropout": 0.2,
|
| 58 |
+
"final_dropout": true,
|
| 59 |
+
"interleave_self_attention": true,
|
| 60 |
+
"norm_type": "ada_norm",
|
| 61 |
+
"num_attention_heads": 32,
|
| 62 |
+
"num_layers": 16,
|
| 63 |
+
"output_dim": 1024,
|
| 64 |
+
"positional_embeddings": null
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"datasets": {
|
| 69 |
+
"vlm_data": {
|
| 70 |
+
"dataset_py": "vlm_datasets",
|
| 71 |
+
"dataformat": "llava_json",
|
| 72 |
+
"dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
|
| 73 |
+
"eval_dataset": "aokvqa_cauldron_llava_format",
|
| 74 |
+
"data_flatten": false,
|
| 75 |
+
"base_interval": 2,
|
| 76 |
+
"max_pixels": 50176,
|
| 77 |
+
"min_pixels": 784,
|
| 78 |
+
"model_max_length": 2048,
|
| 79 |
+
"model_type": "qwen2.5vl",
|
| 80 |
+
"per_device_batch_size": 4
|
| 81 |
+
},
|
| 82 |
+
"vla_data": {
|
| 83 |
+
"dataset_py": "lerobot_datasets",
|
| 84 |
+
"data_root_dir": "/data/jliu/data/G1WholebodyLocomotionPickBetweenTablesTeleop-v0",
|
| 85 |
+
"data_mix": "humanoid_",
|
| 86 |
+
"action_type": "abs_joints",
|
| 87 |
+
"CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
|
| 88 |
+
"CoT_answer": "bbox",
|
| 89 |
+
"default_image_resolution": [
|
| 90 |
+
3,
|
| 91 |
+
224,
|
| 92 |
+
224
|
| 93 |
+
],
|
| 94 |
+
"per_device_batch_size": 64,
|
| 95 |
+
"preload_all": true,
|
| 96 |
+
"load_all_data_for_training": true,
|
| 97 |
+
"obs": [
|
| 98 |
+
"image_0"
|
| 99 |
+
],
|
| 100 |
+
"image_size": [
|
| 101 |
+
224,
|
| 102 |
+
224
|
| 103 |
+
]
|
| 104 |
+
}
|
| 105 |
+
},
|
| 106 |
+
"trainer": {
|
| 107 |
+
"epochs": 100,
|
| 108 |
+
"max_train_steps": 40000,
|
| 109 |
+
"num_warmup_steps": 0,
|
| 110 |
+
"save_interval": 5000,
|
| 111 |
+
"eval_interval": 100,
|
| 112 |
+
"learning_rate": {
|
| 113 |
+
"base": 5e-05,
|
| 114 |
+
"qwen_vl_interface": 1e-05,
|
| 115 |
+
"action_model": 0.0001
|
| 116 |
+
},
|
| 117 |
+
"lr_scheduler_type": "cosine_with_min_lr",
|
| 118 |
+
"scheduler_specific_kwargs": {
|
| 119 |
+
"min_lr": 5e-07
|
| 120 |
+
},
|
| 121 |
+
"freeze_modules": "qwen_vl_interface,layer_qformer,dino_encoder,dino_pro",
|
| 122 |
+
"loss_scale": {
|
| 123 |
+
"vla": 1.0,
|
| 124 |
+
"vlm": 0.1
|
| 125 |
+
},
|
| 126 |
+
"pretrained_checkpoint": "/hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt",
|
| 127 |
+
"skip_reload_modules": "action_model",
|
| 128 |
+
"repeated_diffusion_steps": 4,
|
| 129 |
+
"max_grad_norm": 1.0,
|
| 130 |
+
"warmup_ratio": 0.1,
|
| 131 |
+
"weight_decay": 0.0,
|
| 132 |
+
"logging_frequency": 10,
|
| 133 |
+
"gradient_clipping": 1.0,
|
| 134 |
+
"gradient_accumulation_steps": 1,
|
| 135 |
+
"optimizer": {
|
| 136 |
+
"name": "AdamW",
|
| 137 |
+
"betas": [
|
| 138 |
+
0.9,
|
| 139 |
+
0.95
|
| 140 |
+
],
|
| 141 |
+
"eps": 1e-08,
|
| 142 |
+
"weight_decay": 1e-08
|
| 143 |
+
},
|
| 144 |
+
"is_resume": false,
|
| 145 |
+
"resume_epoch": null,
|
| 146 |
+
"resume_step": null,
|
| 147 |
+
"enable_gradient_checkpointing": true,
|
| 148 |
+
"enable_mixed_precision_training": true
|
| 149 |
+
},
|
| 150 |
+
"output_dir": "runs/InternVLA/Checkpoints/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110"
|
| 151 |
+
}
|
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/config.yaml
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run_id: G1WholebodyLocomotionPickBetweenTablesTeleop
|
| 2 |
+
run_root_dir: runs/InternVLA/Checkpoints
|
| 3 |
+
seed: 42
|
| 4 |
+
trackers:
|
| 5 |
+
- jsonl
|
| 6 |
+
- wandb
|
| 7 |
+
wandb_entity: jliu530-soochow-university
|
| 8 |
+
wandb_project: psi
|
| 9 |
+
is_debug: false
|
| 10 |
+
framework:
|
| 11 |
+
framework_py: InternVLA-M1
|
| 12 |
+
qwenvl:
|
| 13 |
+
base_vlm: Qwen/Qwen2.5-VL-3B-Instruct
|
| 14 |
+
attn_implementation: flash_attention_2
|
| 15 |
+
vl_hidden_dim: 2048
|
| 16 |
+
dino:
|
| 17 |
+
dino_backbone: dinov2_vits14
|
| 18 |
+
layer_qformer:
|
| 19 |
+
qformer_end_layer: 37
|
| 20 |
+
qformer_start_layer: 36
|
| 21 |
+
num_query_tokens: 64
|
| 22 |
+
input_dim: 2048
|
| 23 |
+
ouptput_dim: 768
|
| 24 |
+
grad_scale: 0.5
|
| 25 |
+
action_model:
|
| 26 |
+
action_model_type: DiT-B
|
| 27 |
+
action_hidden_dim: 768
|
| 28 |
+
action_dim: 36
|
| 29 |
+
use_ema: false
|
| 30 |
+
future_action_window_size: 15
|
| 31 |
+
past_action_window_size: 0
|
| 32 |
+
repeated_diffusion_steps: 8
|
| 33 |
+
fm_head_config:
|
| 34 |
+
input_embedding_dim: 1536
|
| 35 |
+
hidden_size: 1024
|
| 36 |
+
add_pos_embed: true
|
| 37 |
+
max_seq_len: 1024
|
| 38 |
+
action_dim: 36
|
| 39 |
+
future_action_window_size: 15
|
| 40 |
+
action_horizon: 16
|
| 41 |
+
past_action_window_size: 0
|
| 42 |
+
noise_beta_alpha: 1.5
|
| 43 |
+
noise_beta_beta: 1.0
|
| 44 |
+
noise_s: 0.999
|
| 45 |
+
num_timestep_buckets: 1000
|
| 46 |
+
num_inference_timesteps: 4
|
| 47 |
+
num_target_vision_tokens: 32
|
| 48 |
+
diffusion_model_cfg:
|
| 49 |
+
attention_head_dim: 48
|
| 50 |
+
cross_attention_dim: 2048
|
| 51 |
+
dropout: 0.2
|
| 52 |
+
final_dropout: true
|
| 53 |
+
interleave_self_attention: true
|
| 54 |
+
norm_type: ada_norm
|
| 55 |
+
num_attention_heads: 32
|
| 56 |
+
num_layers: 16
|
| 57 |
+
output_dim: 1024
|
| 58 |
+
positional_embeddings: null
|
| 59 |
+
datasets:
|
| 60 |
+
vlm_data:
|
| 61 |
+
dataset_py: vlm_datasets
|
| 62 |
+
dataformat: llava_json
|
| 63 |
+
dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
|
| 64 |
+
eval_dataset: aokvqa_cauldron_llava_format
|
| 65 |
+
data_flatten: false
|
| 66 |
+
base_interval: 2
|
| 67 |
+
max_pixels: 50176
|
| 68 |
+
min_pixels: 784
|
| 69 |
+
model_max_length: 2048
|
| 70 |
+
model_type: qwen2.5vl
|
| 71 |
+
per_device_batch_size: 4
|
| 72 |
+
vla_data:
|
| 73 |
+
dataset_py: lerobot_datasets
|
| 74 |
+
data_root_dir: /data/jliu/data/G1WholebodyLocomotionPickBetweenTablesTeleop-v0
|
| 75 |
+
data_mix: humanoid_
|
| 76 |
+
action_type: abs_joints
|
| 77 |
+
CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
|
| 78 |
+
Locate their bounding boxes in [x1,y1,x2,y2] format.
|
| 79 |
+
CoT_answer: bbox
|
| 80 |
+
default_image_resolution:
|
| 81 |
+
- 3
|
| 82 |
+
- 224
|
| 83 |
+
- 224
|
| 84 |
+
per_device_batch_size: 64
|
| 85 |
+
preload_all: true
|
| 86 |
+
load_all_data_for_training: true
|
| 87 |
+
obs:
|
| 88 |
+
- image_0
|
| 89 |
+
image_size:
|
| 90 |
+
- 224
|
| 91 |
+
- 224
|
| 92 |
+
trainer:
|
| 93 |
+
epochs: 100
|
| 94 |
+
max_train_steps: 40000
|
| 95 |
+
num_warmup_steps: 0
|
| 96 |
+
save_interval: 5000
|
| 97 |
+
eval_interval: 100
|
| 98 |
+
learning_rate:
|
| 99 |
+
base: 5.0e-05
|
| 100 |
+
qwen_vl_interface: 1.0e-05
|
| 101 |
+
action_model: 0.0001
|
| 102 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 103 |
+
scheduler_specific_kwargs:
|
| 104 |
+
min_lr: 5.0e-07
|
| 105 |
+
freeze_modules: qwen_vl_interface,layer_qformer,dino_encoder,dino_pro
|
| 106 |
+
loss_scale:
|
| 107 |
+
vla: 1.0
|
| 108 |
+
vlm: 0.1
|
| 109 |
+
pretrained_checkpoint: /hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt
|
| 110 |
+
skip_reload_modules: action_model
|
| 111 |
+
repeated_diffusion_steps: 4
|
| 112 |
+
max_grad_norm: 1.0
|
| 113 |
+
warmup_ratio: 0.1
|
| 114 |
+
weight_decay: 0.0
|
| 115 |
+
logging_frequency: 10
|
| 116 |
+
gradient_clipping: 1.0
|
| 117 |
+
gradient_accumulation_steps: 1
|
| 118 |
+
optimizer:
|
| 119 |
+
name: AdamW
|
| 120 |
+
betas:
|
| 121 |
+
- 0.9
|
| 122 |
+
- 0.95
|
| 123 |
+
eps: 1.0e-08
|
| 124 |
+
weight_decay: 1.0e-08
|
| 125 |
+
is_resume: false
|
| 126 |
+
resume_epoch: null
|
| 127 |
+
resume_step: null
|
| 128 |
+
enable_gradient_checkpointing: true
|
| 129 |
+
enable_mixed_precision_training: true
|
| 130 |
+
output_dir: runs/InternVLA/Checkpoints/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110
|
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/dataset_statistics.json
ADDED
|
@@ -0,0 +1,480 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"new_embodiment": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.0007652958738617599,
|
| 6 |
+
0.0010714149102568626,
|
| 7 |
+
0.0010714149102568626,
|
| 8 |
+
-0.0022958877962082624,
|
| 9 |
+
-0.0022958877962082624,
|
| 10 |
+
-0.0009183543152175844,
|
| 11 |
+
-0.0022958877962082624,
|
| 12 |
+
-0.4180760681629181,
|
| 13 |
+
-0.5853561758995056,
|
| 14 |
+
-0.5853561758995056,
|
| 15 |
+
1.254742980003357,
|
| 16 |
+
1.2550007104873657,
|
| 17 |
+
0.5019800662994385,
|
| 18 |
+
1.2550007104873657,
|
| 19 |
+
-0.06184714287519455,
|
| 20 |
+
0.19450722634792328,
|
| 21 |
+
-0.12890596687793732,
|
| 22 |
+
-0.0004327417991589755,
|
| 23 |
+
-0.11649200320243835,
|
| 24 |
+
-0.022241855040192604,
|
| 25 |
+
0.14975687861442566,
|
| 26 |
+
-0.2271433174610138,
|
| 27 |
+
-0.28912046551704407,
|
| 28 |
+
-0.12231507897377014,
|
| 29 |
+
-0.23417018353939056,
|
| 30 |
+
0.2349066436290741,
|
| 31 |
+
0.10561700165271759,
|
| 32 |
+
-0.03762618452310562,
|
| 33 |
+
-0.007439092267304659,
|
| 34 |
+
0.04567578062415123,
|
| 35 |
+
0.0297609381377697,
|
| 36 |
+
0.7396373152732849,
|
| 37 |
+
0.12372121214866638,
|
| 38 |
+
0.013200674206018448,
|
| 39 |
+
-0.2235538214445114,
|
| 40 |
+
0.4495043158531189
|
| 41 |
+
],
|
| 42 |
+
"std": [
|
| 43 |
+
0.01948833279311657,
|
| 44 |
+
0.027283955365419388,
|
| 45 |
+
0.027283955365419388,
|
| 46 |
+
0.05846178159117699,
|
| 47 |
+
0.05846178159117699,
|
| 48 |
+
0.02337435446679592,
|
| 49 |
+
0.05846178159117699,
|
| 50 |
+
0.1841326802968979,
|
| 51 |
+
0.2574934661388397,
|
| 52 |
+
0.2574934661388397,
|
| 53 |
+
0.5518325567245483,
|
| 54 |
+
0.5516064763069153,
|
| 55 |
+
0.22092333436012268,
|
| 56 |
+
0.5516064763069153,
|
| 57 |
+
0.08286559581756592,
|
| 58 |
+
0.013414013199508157,
|
| 59 |
+
0.14786243438720703,
|
| 60 |
+
0.08076202124357224,
|
| 61 |
+
0.06461654603481293,
|
| 62 |
+
0.08045286685228348,
|
| 63 |
+
0.10112713277339935,
|
| 64 |
+
0.11457119882106781,
|
| 65 |
+
0.1058330088853836,
|
| 66 |
+
0.2477482408285141,
|
| 67 |
+
0.16770882904529572,
|
| 68 |
+
0.21804748475551605,
|
| 69 |
+
0.15766489505767822,
|
| 70 |
+
0.1760520040988922,
|
| 71 |
+
0.052497588098049164,
|
| 72 |
+
0.024612687528133392,
|
| 73 |
+
0.057495709508657455,
|
| 74 |
+
0.0003625280806534051,
|
| 75 |
+
0.2089398205280304,
|
| 76 |
+
0.08656619489192963,
|
| 77 |
+
0.3711760640144348,
|
| 78 |
+
2.2139852046966553
|
| 79 |
+
],
|
| 80 |
+
"max": [
|
| 81 |
+
0.5,
|
| 82 |
+
0.699999988079071,
|
| 83 |
+
0.699999988079071,
|
| 84 |
+
0.0,
|
| 85 |
+
0.0,
|
| 86 |
+
0.0,
|
| 87 |
+
0.0,
|
| 88 |
+
6.930528109384597e-19,
|
| 89 |
+
1.108467615016421e-16,
|
| 90 |
+
1.108467615016421e-16,
|
| 91 |
+
1.5,
|
| 92 |
+
1.5,
|
| 93 |
+
1.0,
|
| 94 |
+
1.5,
|
| 95 |
+
0.2557959258556366,
|
| 96 |
+
0.35884979367256165,
|
| 97 |
+
0.5090755820274353,
|
| 98 |
+
0.19132143259048462,
|
| 99 |
+
0.21249642968177795,
|
| 100 |
+
0.3992660641670227,
|
| 101 |
+
0.4283020794391632,
|
| 102 |
+
0.1457289606332779,
|
| 103 |
+
-0.1900009959936142,
|
| 104 |
+
0.6150448322296143,
|
| 105 |
+
0.35468167066574097,
|
| 106 |
+
0.8703295588493347,
|
| 107 |
+
0.7531875371932983,
|
| 108 |
+
0.971237301826477,
|
| 109 |
+
0.13985762000083923,
|
| 110 |
+
0.15686897933483124,
|
| 111 |
+
0.4661160111427307,
|
| 112 |
+
0.7400000095367432,
|
| 113 |
+
0.5,
|
| 114 |
+
0.5,
|
| 115 |
+
1.0,
|
| 116 |
+
3.1414895057678223
|
| 117 |
+
],
|
| 118 |
+
"min": [
|
| 119 |
+
0.0,
|
| 120 |
+
0.0,
|
| 121 |
+
0.0,
|
| 122 |
+
-1.5,
|
| 123 |
+
-1.5,
|
| 124 |
+
-0.6000000238418579,
|
| 125 |
+
-1.5,
|
| 126 |
+
-0.5,
|
| 127 |
+
-0.699999988079071,
|
| 128 |
+
-0.699999988079071,
|
| 129 |
+
-2.216935230032842e-16,
|
| 130 |
+
-2.216935230032842e-16,
|
| 131 |
+
-4.0845591349633594e-18,
|
| 132 |
+
-2.216935230032842e-16,
|
| 133 |
+
-0.4883034825325012,
|
| 134 |
+
0.1900009959936142,
|
| 135 |
+
-0.5470856428146362,
|
| 136 |
+
-0.34318920969963074,
|
| 137 |
+
-0.35952919721603394,
|
| 138 |
+
-0.35302427411079407,
|
| 139 |
+
-0.4469815790653229,
|
| 140 |
+
-0.6371198296546936,
|
| 141 |
+
-0.7683824300765991,
|
| 142 |
+
-1.0653810501098633,
|
| 143 |
+
-0.8479154706001282,
|
| 144 |
+
-1.0297260284423828,
|
| 145 |
+
-0.42936205863952637,
|
| 146 |
+
-0.5147944092750549,
|
| 147 |
+
-0.16820405423641205,
|
| 148 |
+
-0.045328833162784576,
|
| 149 |
+
-0.13282617926597595,
|
| 150 |
+
0.7400000095367432,
|
| 151 |
+
-0.5,
|
| 152 |
+
-0.5,
|
| 153 |
+
-1.0,
|
| 154 |
+
-3.138223648071289
|
| 155 |
+
],
|
| 156 |
+
"q01": [
|
| 157 |
+
0.0,
|
| 158 |
+
0.0,
|
| 159 |
+
0.0,
|
| 160 |
+
0.0,
|
| 161 |
+
0.0,
|
| 162 |
+
0.0,
|
| 163 |
+
0.0,
|
| 164 |
+
-0.5,
|
| 165 |
+
-0.699999988079071,
|
| 166 |
+
-0.699999988079071,
|
| 167 |
+
0.0,
|
| 168 |
+
0.0,
|
| 169 |
+
0.0,
|
| 170 |
+
0.0,
|
| 171 |
+
-0.30536221772432326,
|
| 172 |
+
0.1900009959936142,
|
| 173 |
+
-0.4495888948440552,
|
| 174 |
+
-0.23494456708431244,
|
| 175 |
+
-0.27246662437915803,
|
| 176 |
+
-0.2315385288000107,
|
| 177 |
+
-0.18485171496868133,
|
| 178 |
+
-0.4921060320734978,
|
| 179 |
+
-0.6122316139936447,
|
| 180 |
+
-0.7731428289413452,
|
| 181 |
+
-0.5696775823831558,
|
| 182 |
+
-0.4394104504585266,
|
| 183 |
+
-0.28442258715629576,
|
| 184 |
+
-0.3538160628080368,
|
| 185 |
+
-0.11734950572252273,
|
| 186 |
+
-0.015320314802229404,
|
| 187 |
+
-0.09260479986667633,
|
| 188 |
+
0.7400000095367432,
|
| 189 |
+
0.0,
|
| 190 |
+
-0.3567708432674408,
|
| 191 |
+
-1.0,
|
| 192 |
+
-3.124027729034424
|
| 193 |
+
],
|
| 194 |
+
"q99": [
|
| 195 |
+
0.0,
|
| 196 |
+
0.0,
|
| 197 |
+
0.0,
|
| 198 |
+
0.0,
|
| 199 |
+
0.0,
|
| 200 |
+
0.0,
|
| 201 |
+
0.0,
|
| 202 |
+
0.0,
|
| 203 |
+
0.0,
|
| 204 |
+
0.0,
|
| 205 |
+
1.5,
|
| 206 |
+
1.5,
|
| 207 |
+
0.6000000238418579,
|
| 208 |
+
1.5,
|
| 209 |
+
0.12895929232239725,
|
| 210 |
+
0.26329000800848007,
|
| 211 |
+
0.2936864292621614,
|
| 212 |
+
0.14898110926151276,
|
| 213 |
+
0.06174419380724448,
|
| 214 |
+
0.2529342502355577,
|
| 215 |
+
0.3428380289673807,
|
| 216 |
+
-0.0013759000797290315,
|
| 217 |
+
-0.1900009959936142,
|
| 218 |
+
0.34379526853561404,
|
| 219 |
+
0.19408822178840665,
|
| 220 |
+
0.6508016681671143,
|
| 221 |
+
0.5123037415742882,
|
| 222 |
+
0.49184119641780855,
|
| 223 |
+
0.11030469514429586,
|
| 224 |
+
0.11221159264445316,
|
| 225 |
+
0.21934302642941514,
|
| 226 |
+
0.7400000095367432,
|
| 227 |
+
0.5,
|
| 228 |
+
0.35676584899425506,
|
| 229 |
+
0.19804689854383473,
|
| 230 |
+
3.1301991939544678
|
| 231 |
+
],
|
| 232 |
+
"mask": [
|
| 233 |
+
true,
|
| 234 |
+
true,
|
| 235 |
+
true,
|
| 236 |
+
true,
|
| 237 |
+
true,
|
| 238 |
+
true,
|
| 239 |
+
true,
|
| 240 |
+
true,
|
| 241 |
+
true,
|
| 242 |
+
true,
|
| 243 |
+
true,
|
| 244 |
+
true,
|
| 245 |
+
true,
|
| 246 |
+
true,
|
| 247 |
+
true,
|
| 248 |
+
true,
|
| 249 |
+
true,
|
| 250 |
+
true,
|
| 251 |
+
true,
|
| 252 |
+
true,
|
| 253 |
+
true,
|
| 254 |
+
true,
|
| 255 |
+
true,
|
| 256 |
+
true,
|
| 257 |
+
true,
|
| 258 |
+
true,
|
| 259 |
+
true,
|
| 260 |
+
true,
|
| 261 |
+
true,
|
| 262 |
+
true,
|
| 263 |
+
true,
|
| 264 |
+
true,
|
| 265 |
+
true,
|
| 266 |
+
true,
|
| 267 |
+
true,
|
| 268 |
+
true
|
| 269 |
+
]
|
| 270 |
+
},
|
| 271 |
+
"state": {
|
| 272 |
+
"mean": [
|
| 273 |
+
0.0007934353780001402,
|
| 274 |
+
0.0010695882374420762,
|
| 275 |
+
0.0012370680924504995,
|
| 276 |
+
-0.0009144614450633526,
|
| 277 |
+
-0.002343796193599701,
|
| 278 |
+
-0.002245287410914898,
|
| 279 |
+
-0.0023474614135921,
|
| 280 |
+
-0.43587779998779297,
|
| 281 |
+
0.009345951490104198,
|
| 282 |
+
-0.5239402651786804,
|
| 283 |
+
0.347458153963089,
|
| 284 |
+
1.1157641410827637,
|
| 285 |
+
0.3141929805278778,
|
| 286 |
+
1.061142086982727,
|
| 287 |
+
-0.02466614544391632,
|
| 288 |
+
0.18699687719345093,
|
| 289 |
+
-0.14352792501449585,
|
| 290 |
+
0.0824379175901413,
|
| 291 |
+
-0.1188783347606659,
|
| 292 |
+
0.038459114730358124,
|
| 293 |
+
0.14665654301643372,
|
| 294 |
+
-0.18040531873703003,
|
| 295 |
+
-0.2714536190032959,
|
| 296 |
+
-0.09729836881160736,
|
| 297 |
+
-0.149928018450737,
|
| 298 |
+
0.2267490029335022,
|
| 299 |
+
0.16860823333263397,
|
| 300 |
+
-0.03799568489193916,
|
| 301 |
+
-0.00529489666223526,
|
| 302 |
+
0.08189272880554199,
|
| 303 |
+
0.029461175203323364,
|
| 304 |
+
0.7396373152732849
|
| 305 |
+
],
|
| 306 |
+
"std": [
|
| 307 |
+
0.01693989522755146,
|
| 308 |
+
0.02404647134244442,
|
| 309 |
+
0.02378660999238491,
|
| 310 |
+
0.017665348947048187,
|
| 311 |
+
0.04505283385515213,
|
| 312 |
+
0.045055001974105835,
|
| 313 |
+
0.04504767060279846,
|
| 314 |
+
0.19141361117362976,
|
| 315 |
+
0.08607088029384613,
|
| 316 |
+
0.22821539640426636,
|
| 317 |
+
0.15562357008457184,
|
| 318 |
+
0.497448593378067,
|
| 319 |
+
0.16223013401031494,
|
| 320 |
+
0.47353243827819824,
|
| 321 |
+
0.07905217260122299,
|
| 322 |
+
0.013432762585580316,
|
| 323 |
+
0.1468047797679901,
|
| 324 |
+
0.07942581176757812,
|
| 325 |
+
0.06510384380817413,
|
| 326 |
+
0.0791940838098526,
|
| 327 |
+
0.10252271592617035,
|
| 328 |
+
0.11002297699451447,
|
| 329 |
+
0.09499681740999222,
|
| 330 |
+
0.24013079702854156,
|
| 331 |
+
0.1692967414855957,
|
| 332 |
+
0.21809111535549164,
|
| 333 |
+
0.158147931098938,
|
| 334 |
+
0.17850598692893982,
|
| 335 |
+
0.050502315163612366,
|
| 336 |
+
0.023258700966835022,
|
| 337 |
+
0.058882467448711395,
|
| 338 |
+
0.0003625280806534051
|
| 339 |
+
],
|
| 340 |
+
"max": [
|
| 341 |
+
0.47981399297714233,
|
| 342 |
+
0.6772664189338684,
|
| 343 |
+
0.6746510863304138,
|
| 344 |
+
0.0010172375477850437,
|
| 345 |
+
0.0007091082516126335,
|
| 346 |
+
0.001881288131698966,
|
| 347 |
+
0.0011398319620639086,
|
| 348 |
+
6.141255539660051e-07,
|
| 349 |
+
0.3043450713157654,
|
| 350 |
+
6.343479981296696e-07,
|
| 351 |
+
0.6933000087738037,
|
| 352 |
+
1.4612544775009155,
|
| 353 |
+
1.4651201963424683,
|
| 354 |
+
1.4609057903289795,
|
| 355 |
+
0.2809508740901947,
|
| 356 |
+
0.34028318524360657,
|
| 357 |
+
0.47627460956573486,
|
| 358 |
+
0.26476219296455383,
|
| 359 |
+
0.20825636386871338,
|
| 360 |
+
0.4566418528556824,
|
| 361 |
+
0.42864030599594116,
|
| 362 |
+
0.1656116098165512,
|
| 363 |
+
-0.1549365073442459,
|
| 364 |
+
0.5154499411582947,
|
| 365 |
+
0.4242899715900421,
|
| 366 |
+
0.8548054695129395,
|
| 367 |
+
0.8040095567703247,
|
| 368 |
+
0.9811649322509766,
|
| 369 |
+
0.136736661195755,
|
| 370 |
+
0.195722296833992,
|
| 371 |
+
0.45781663060188293,
|
| 372 |
+
0.7400000095367432
|
| 373 |
+
],
|
| 374 |
+
"min": [
|
| 375 |
+
-0.02442001923918724,
|
| 376 |
+
-0.0517612099647522,
|
| 377 |
+
-0.0006534014828503132,
|
| 378 |
+
-0.5095356106758118,
|
| 379 |
+
-1.323034405708313,
|
| 380 |
+
-1.3221508264541626,
|
| 381 |
+
-1.3230019807815552,
|
| 382 |
+
-0.5770347714424133,
|
| 383 |
+
-0.4338151812553406,
|
| 384 |
+
-0.6721642017364502,
|
| 385 |
+
-0.0017213862156495452,
|
| 386 |
+
-7.534810038123396e-07,
|
| 387 |
+
-0.001927333534695208,
|
| 388 |
+
-1.075333216249419e-06,
|
| 389 |
+
-0.43650975823402405,
|
| 390 |
+
0.15721464157104492,
|
| 391 |
+
-0.5489339232444763,
|
| 392 |
+
-0.2632291913032532,
|
| 393 |
+
-0.3508843183517456,
|
| 394 |
+
-0.23784859478473663,
|
| 395 |
+
-0.4281824827194214,
|
| 396 |
+
-0.5803383588790894,
|
| 397 |
+
-0.7118590474128723,
|
| 398 |
+
-1.0344431400299072,
|
| 399 |
+
-0.7932196259498596,
|
| 400 |
+
-1.0205217599868774,
|
| 401 |
+
-0.3445618450641632,
|
| 402 |
+
-0.5986371040344238,
|
| 403 |
+
-0.13537253439426422,
|
| 404 |
+
-0.0017330688424408436,
|
| 405 |
+
-0.1421850621700287,
|
| 406 |
+
0.7400000095367432
|
| 407 |
+
],
|
| 408 |
+
"q01": [
|
| 409 |
+
-2.9736981127825855e-06,
|
| 410 |
+
-0.002589050980750471,
|
| 411 |
+
-2.1223586691121456e-06,
|
| 412 |
+
-0.00027220559131819756,
|
| 413 |
+
-5.2740163209819e-06,
|
| 414 |
+
-4.3062968397862275e-05,
|
| 415 |
+
6.949997242600148e-08,
|
| 416 |
+
-0.5698864543437958,
|
| 417 |
+
-0.3627366861701012,
|
| 418 |
+
-0.6697060906887055,
|
| 419 |
+
-1.2418152664395165e-06,
|
| 420 |
+
-1.287923055315332e-07,
|
| 421 |
+
-7.915375590528129e-07,
|
| 422 |
+
-1.582540755862283e-07,
|
| 423 |
+
-0.2560530769824982,
|
| 424 |
+
0.16664464086294173,
|
| 425 |
+
-0.4414859291911125,
|
| 426 |
+
-0.14972542390227317,
|
| 427 |
+
-0.2697399368882179,
|
| 428 |
+
-0.16884329065680503,
|
| 429 |
+
-0.18771703973412515,
|
| 430 |
+
-0.4346155697107315,
|
| 431 |
+
-0.5672112548351288,
|
| 432 |
+
-0.739670946598053,
|
| 433 |
+
-0.49567418187856677,
|
| 434 |
+
-0.4431849017739296,
|
| 435 |
+
-0.22463233456015586,
|
| 436 |
+
-0.3607582712173462,
|
| 437 |
+
-0.10682432219386101,
|
| 438 |
+
0.02814220966771245,
|
| 439 |
+
-0.0969948647916317,
|
| 440 |
+
0.7400000095367432
|
| 441 |
+
],
|
| 442 |
+
"q99": [
|
| 443 |
+
3.4415612958582677e-06,
|
| 444 |
+
0.019452356398105622,
|
| 445 |
+
0.01667371392250061,
|
| 446 |
+
0.0005855054722633213,
|
| 447 |
+
0.0003600666584679857,
|
| 448 |
+
0.0006024087872356176,
|
| 449 |
+
0.0003732459741877392,
|
| 450 |
+
2.2463960860363857e-07,
|
| 451 |
+
0.15193727359175682,
|
| 452 |
+
5.1716865669959736e-08,
|
| 453 |
+
0.5587792527675629,
|
| 454 |
+
1.4585348367691042,
|
| 455 |
+
0.5443805891275406,
|
| 456 |
+
1.370901610851288,
|
| 457 |
+
0.1576733058691028,
|
| 458 |
+
0.24011337146163012,
|
| 459 |
+
0.27520660489797616,
|
| 460 |
+
0.2312955512106419,
|
| 461 |
+
0.05981415051967266,
|
| 462 |
+
0.3088961178064349,
|
| 463 |
+
0.3407434976100922,
|
| 464 |
+
0.030774814467877155,
|
| 465 |
+
-0.17435445189476012,
|
| 466 |
+
0.3570790392160416,
|
| 467 |
+
0.2687951233983051,
|
| 468 |
+
0.6410461419820785,
|
| 469 |
+
0.5765821474790573,
|
| 470 |
+
0.49433160990476627,
|
| 471 |
+
0.09976441204547885,
|
| 472 |
+
0.14455557838082314,
|
| 473 |
+
0.21845388084650053,
|
| 474 |
+
0.7400000095367432
|
| 475 |
+
]
|
| 476 |
+
},
|
| 477 |
+
"num_transitions": 62764,
|
| 478 |
+
"num_trajectories": 99
|
| 479 |
+
}
|
| 480 |
+
}
|
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/final_model/pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1109463a97d234bf908d94e5f4aefc67cc4733b29a65514b4db1ca4e43ff26a6
|
| 3 |
+
size 8604557774
|
intervla-m1/simple/G1WholebodyLocomotionPickBetweenTablesTeleop/20260409_052110/summary.jsonl
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"steps": 5000}
|
| 2 |
+
{"steps": 10000}
|
| 3 |
+
{"steps": 15000}
|
| 4 |
+
{"steps": 20000}
|
| 5 |
+
{"steps": 25000}
|
| 6 |
+
{"steps": 30000}
|
| 7 |
+
{"steps": 35000}
|
| 8 |
+
{"steps": 40000}
|
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_10000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a4c1423afa3642772d54f313d495edc8b36c26cb7369aa8a28a6efe77388975
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_15000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1e3a356b4abbfa51d31a369075f8634f7136034f1bf6f3364846ec40f39d663
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_20000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc6bb69676e3b2321d66dbc1c2318bb31e9d32036e6d7c1bf2d53862a7ddf355
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_25000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3eaf44ddba127eb83c151bb7e347080765d7b6ed3b548ed664471f7b8919fad6
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_30000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b802f0d6833b5a293bf1784129a017652841862c3189cd4860d55bc9e198f52
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_35000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95b62139bca7bb8cdc48c5ce7530154d737292045eeb416d0b9f442a6288e51b
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_40000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56c78ff6e3c141e32bb727c439373da5992fd5524c804611f5b934c6e5f42d5d
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/checkpoints/steps_5000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01afd6ef0afc4a55e55b0f021b6a9f27b609ab0c526eaab0aff52488d38ba394
|
| 3 |
+
size 8604574397
|
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/config.json
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"run_id": "G1WholebodyXMoveBendPickTeleop",
|
| 3 |
+
"run_root_dir": "runs/InternVLA/Checkpoints",
|
| 4 |
+
"seed": 42,
|
| 5 |
+
"trackers": [
|
| 6 |
+
"jsonl",
|
| 7 |
+
"wandb"
|
| 8 |
+
],
|
| 9 |
+
"wandb_entity": "jliu530-soochow-university",
|
| 10 |
+
"wandb_project": "psi",
|
| 11 |
+
"is_debug": false,
|
| 12 |
+
"framework": {
|
| 13 |
+
"framework_py": "InternVLA-M1",
|
| 14 |
+
"qwenvl": {
|
| 15 |
+
"base_vlm": "Qwen/Qwen2.5-VL-3B-Instruct",
|
| 16 |
+
"attn_implementation": "flash_attention_2",
|
| 17 |
+
"vl_hidden_dim": 2048
|
| 18 |
+
},
|
| 19 |
+
"dino": {
|
| 20 |
+
"dino_backbone": "dinov2_vits14"
|
| 21 |
+
},
|
| 22 |
+
"layer_qformer": {
|
| 23 |
+
"qformer_end_layer": 37,
|
| 24 |
+
"qformer_start_layer": 36,
|
| 25 |
+
"num_query_tokens": 64,
|
| 26 |
+
"input_dim": 2048,
|
| 27 |
+
"ouptput_dim": 768,
|
| 28 |
+
"grad_scale": 0.5
|
| 29 |
+
},
|
| 30 |
+
"action_model": {
|
| 31 |
+
"action_model_type": "DiT-B",
|
| 32 |
+
"action_hidden_dim": 768,
|
| 33 |
+
"action_dim": 36,
|
| 34 |
+
"use_ema": false,
|
| 35 |
+
"future_action_window_size": 15,
|
| 36 |
+
"past_action_window_size": 0,
|
| 37 |
+
"repeated_diffusion_steps": 8
|
| 38 |
+
},
|
| 39 |
+
"fm_head_config": {
|
| 40 |
+
"input_embedding_dim": 1536,
|
| 41 |
+
"hidden_size": 1024,
|
| 42 |
+
"add_pos_embed": true,
|
| 43 |
+
"max_seq_len": 1024,
|
| 44 |
+
"action_dim": 36,
|
| 45 |
+
"future_action_window_size": 15,
|
| 46 |
+
"action_horizon": 16,
|
| 47 |
+
"past_action_window_size": 0,
|
| 48 |
+
"noise_beta_alpha": 1.5,
|
| 49 |
+
"noise_beta_beta": 1.0,
|
| 50 |
+
"noise_s": 0.999,
|
| 51 |
+
"num_timestep_buckets": 1000,
|
| 52 |
+
"num_inference_timesteps": 4,
|
| 53 |
+
"num_target_vision_tokens": 32,
|
| 54 |
+
"diffusion_model_cfg": {
|
| 55 |
+
"attention_head_dim": 48,
|
| 56 |
+
"cross_attention_dim": 2048,
|
| 57 |
+
"dropout": 0.2,
|
| 58 |
+
"final_dropout": true,
|
| 59 |
+
"interleave_self_attention": true,
|
| 60 |
+
"norm_type": "ada_norm",
|
| 61 |
+
"num_attention_heads": 32,
|
| 62 |
+
"num_layers": 16,
|
| 63 |
+
"output_dim": 1024,
|
| 64 |
+
"positional_embeddings": null
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"datasets": {
|
| 69 |
+
"vlm_data": {
|
| 70 |
+
"dataset_py": "vlm_datasets",
|
| 71 |
+
"dataformat": "llava_json",
|
| 72 |
+
"dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
|
| 73 |
+
"eval_dataset": "aokvqa_cauldron_llava_format",
|
| 74 |
+
"data_flatten": false,
|
| 75 |
+
"base_interval": 2,
|
| 76 |
+
"max_pixels": 50176,
|
| 77 |
+
"min_pixels": 784,
|
| 78 |
+
"model_max_length": 2048,
|
| 79 |
+
"model_type": "qwen2.5vl",
|
| 80 |
+
"per_device_batch_size": 4
|
| 81 |
+
},
|
| 82 |
+
"vla_data": {
|
| 83 |
+
"dataset_py": "lerobot_datasets",
|
| 84 |
+
"data_root_dir": "/data/jliu/data/G1WholebodyXMoveBendPickTeleop-v0",
|
| 85 |
+
"data_mix": "humanoid_",
|
| 86 |
+
"action_type": "abs_joints",
|
| 87 |
+
"CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
|
| 88 |
+
"CoT_answer": "bbox",
|
| 89 |
+
"default_image_resolution": [
|
| 90 |
+
3,
|
| 91 |
+
224,
|
| 92 |
+
224
|
| 93 |
+
],
|
| 94 |
+
"per_device_batch_size": 64,
|
| 95 |
+
"preload_all": true,
|
| 96 |
+
"load_all_data_for_training": true,
|
| 97 |
+
"obs": [
|
| 98 |
+
"image_0"
|
| 99 |
+
],
|
| 100 |
+
"image_size": [
|
| 101 |
+
224,
|
| 102 |
+
224
|
| 103 |
+
]
|
| 104 |
+
}
|
| 105 |
+
},
|
| 106 |
+
"trainer": {
|
| 107 |
+
"epochs": 100,
|
| 108 |
+
"max_train_steps": 40000,
|
| 109 |
+
"num_warmup_steps": 0,
|
| 110 |
+
"save_interval": 5000,
|
| 111 |
+
"eval_interval": 100,
|
| 112 |
+
"learning_rate": {
|
| 113 |
+
"base": 5e-05,
|
| 114 |
+
"qwen_vl_interface": 1e-05,
|
| 115 |
+
"action_model": 0.0001
|
| 116 |
+
},
|
| 117 |
+
"lr_scheduler_type": "cosine_with_min_lr",
|
| 118 |
+
"scheduler_specific_kwargs": {
|
| 119 |
+
"min_lr": 5e-07
|
| 120 |
+
},
|
| 121 |
+
"freeze_modules": "qwen_vl_interface,layer_qformer,dino_encoder,dino_pro",
|
| 122 |
+
"loss_scale": {
|
| 123 |
+
"vla": 1.0,
|
| 124 |
+
"vlm": 0.1
|
| 125 |
+
},
|
| 126 |
+
"pretrained_checkpoint": "/hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt",
|
| 127 |
+
"skip_reload_modules": "action_model",
|
| 128 |
+
"repeated_diffusion_steps": 4,
|
| 129 |
+
"max_grad_norm": 1.0,
|
| 130 |
+
"warmup_ratio": 0.1,
|
| 131 |
+
"weight_decay": 0.0,
|
| 132 |
+
"logging_frequency": 10,
|
| 133 |
+
"gradient_clipping": 1.0,
|
| 134 |
+
"gradient_accumulation_steps": 1,
|
| 135 |
+
"optimizer": {
|
| 136 |
+
"name": "AdamW",
|
| 137 |
+
"betas": [
|
| 138 |
+
0.9,
|
| 139 |
+
0.95
|
| 140 |
+
],
|
| 141 |
+
"eps": 1e-08,
|
| 142 |
+
"weight_decay": 1e-08
|
| 143 |
+
},
|
| 144 |
+
"is_resume": false,
|
| 145 |
+
"resume_epoch": null,
|
| 146 |
+
"resume_step": null,
|
| 147 |
+
"enable_gradient_checkpointing": true,
|
| 148 |
+
"enable_mixed_precision_training": true
|
| 149 |
+
},
|
| 150 |
+
"output_dir": "runs/InternVLA/Checkpoints/G1WholebodyXMoveBendPickTeleop/20260403_151218"
|
| 151 |
+
}
|
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/config.yaml
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run_id: G1WholebodyXMoveBendPickTeleop
|
| 2 |
+
run_root_dir: runs/InternVLA/Checkpoints
|
| 3 |
+
seed: 42
|
| 4 |
+
trackers:
|
| 5 |
+
- jsonl
|
| 6 |
+
- wandb
|
| 7 |
+
wandb_entity: jliu530-soochow-university
|
| 8 |
+
wandb_project: psi
|
| 9 |
+
is_debug: false
|
| 10 |
+
framework:
|
| 11 |
+
framework_py: InternVLA-M1
|
| 12 |
+
qwenvl:
|
| 13 |
+
base_vlm: Qwen/Qwen2.5-VL-3B-Instruct
|
| 14 |
+
attn_implementation: flash_attention_2
|
| 15 |
+
vl_hidden_dim: 2048
|
| 16 |
+
dino:
|
| 17 |
+
dino_backbone: dinov2_vits14
|
| 18 |
+
layer_qformer:
|
| 19 |
+
qformer_end_layer: 37
|
| 20 |
+
qformer_start_layer: 36
|
| 21 |
+
num_query_tokens: 64
|
| 22 |
+
input_dim: 2048
|
| 23 |
+
ouptput_dim: 768
|
| 24 |
+
grad_scale: 0.5
|
| 25 |
+
action_model:
|
| 26 |
+
action_model_type: DiT-B
|
| 27 |
+
action_hidden_dim: 768
|
| 28 |
+
action_dim: 36
|
| 29 |
+
use_ema: false
|
| 30 |
+
future_action_window_size: 15
|
| 31 |
+
past_action_window_size: 0
|
| 32 |
+
repeated_diffusion_steps: 8
|
| 33 |
+
fm_head_config:
|
| 34 |
+
input_embedding_dim: 1536
|
| 35 |
+
hidden_size: 1024
|
| 36 |
+
add_pos_embed: true
|
| 37 |
+
max_seq_len: 1024
|
| 38 |
+
action_dim: 36
|
| 39 |
+
future_action_window_size: 15
|
| 40 |
+
action_horizon: 16
|
| 41 |
+
past_action_window_size: 0
|
| 42 |
+
noise_beta_alpha: 1.5
|
| 43 |
+
noise_beta_beta: 1.0
|
| 44 |
+
noise_s: 0.999
|
| 45 |
+
num_timestep_buckets: 1000
|
| 46 |
+
num_inference_timesteps: 4
|
| 47 |
+
num_target_vision_tokens: 32
|
| 48 |
+
diffusion_model_cfg:
|
| 49 |
+
attention_head_dim: 48
|
| 50 |
+
cross_attention_dim: 2048
|
| 51 |
+
dropout: 0.2
|
| 52 |
+
final_dropout: true
|
| 53 |
+
interleave_self_attention: true
|
| 54 |
+
norm_type: ada_norm
|
| 55 |
+
num_attention_heads: 32
|
| 56 |
+
num_layers: 16
|
| 57 |
+
output_dim: 1024
|
| 58 |
+
positional_embeddings: null
|
| 59 |
+
datasets:
|
| 60 |
+
vlm_data:
|
| 61 |
+
dataset_py: vlm_datasets
|
| 62 |
+
dataformat: llava_json
|
| 63 |
+
dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
|
| 64 |
+
eval_dataset: aokvqa_cauldron_llava_format
|
| 65 |
+
data_flatten: false
|
| 66 |
+
base_interval: 2
|
| 67 |
+
max_pixels: 50176
|
| 68 |
+
min_pixels: 784
|
| 69 |
+
model_max_length: 2048
|
| 70 |
+
model_type: qwen2.5vl
|
| 71 |
+
per_device_batch_size: 4
|
| 72 |
+
vla_data:
|
| 73 |
+
dataset_py: lerobot_datasets
|
| 74 |
+
data_root_dir: /data/jliu/data/G1WholebodyXMoveBendPickTeleop-v0
|
| 75 |
+
data_mix: humanoid_
|
| 76 |
+
action_type: abs_joints
|
| 77 |
+
CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
|
| 78 |
+
Locate their bounding boxes in [x1,y1,x2,y2] format.
|
| 79 |
+
CoT_answer: bbox
|
| 80 |
+
default_image_resolution:
|
| 81 |
+
- 3
|
| 82 |
+
- 224
|
| 83 |
+
- 224
|
| 84 |
+
per_device_batch_size: 64
|
| 85 |
+
preload_all: true
|
| 86 |
+
load_all_data_for_training: true
|
| 87 |
+
obs:
|
| 88 |
+
- image_0
|
| 89 |
+
image_size:
|
| 90 |
+
- 224
|
| 91 |
+
- 224
|
| 92 |
+
trainer:
|
| 93 |
+
epochs: 100
|
| 94 |
+
max_train_steps: 40000
|
| 95 |
+
num_warmup_steps: 0
|
| 96 |
+
save_interval: 5000
|
| 97 |
+
eval_interval: 100
|
| 98 |
+
learning_rate:
|
| 99 |
+
base: 5.0e-05
|
| 100 |
+
qwen_vl_interface: 1.0e-05
|
| 101 |
+
action_model: 0.0001
|
| 102 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 103 |
+
scheduler_specific_kwargs:
|
| 104 |
+
min_lr: 5.0e-07
|
| 105 |
+
freeze_modules: qwen_vl_interface,layer_qformer,dino_encoder,dino_pro
|
| 106 |
+
loss_scale:
|
| 107 |
+
vla: 1.0
|
| 108 |
+
vlm: 0.1
|
| 109 |
+
pretrained_checkpoint: /hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt
|
| 110 |
+
skip_reload_modules: action_model
|
| 111 |
+
repeated_diffusion_steps: 4
|
| 112 |
+
max_grad_norm: 1.0
|
| 113 |
+
warmup_ratio: 0.1
|
| 114 |
+
weight_decay: 0.0
|
| 115 |
+
logging_frequency: 10
|
| 116 |
+
gradient_clipping: 1.0
|
| 117 |
+
gradient_accumulation_steps: 1
|
| 118 |
+
optimizer:
|
| 119 |
+
name: AdamW
|
| 120 |
+
betas:
|
| 121 |
+
- 0.9
|
| 122 |
+
- 0.95
|
| 123 |
+
eps: 1.0e-08
|
| 124 |
+
weight_decay: 1.0e-08
|
| 125 |
+
is_resume: false
|
| 126 |
+
resume_epoch: null
|
| 127 |
+
resume_step: null
|
| 128 |
+
enable_gradient_checkpointing: true
|
| 129 |
+
enable_mixed_precision_training: true
|
| 130 |
+
output_dir: runs/InternVLA/Checkpoints/G1WholebodyXMoveBendPickTeleop/20260403_151218
|
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/dataset_statistics.json
ADDED
|
@@ -0,0 +1,480 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"new_embodiment": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.0,
|
| 6 |
+
0.0,
|
| 7 |
+
0.0,
|
| 8 |
+
0.0,
|
| 9 |
+
0.0,
|
| 10 |
+
0.0,
|
| 11 |
+
0.0,
|
| 12 |
+
-0.06578648090362549,
|
| 13 |
+
-0.0920981913805008,
|
| 14 |
+
-0.0920981913805008,
|
| 15 |
+
0.19735944271087646,
|
| 16 |
+
0.19735944271087646,
|
| 17 |
+
0.07894159853458405,
|
| 18 |
+
0.19735944271087646,
|
| 19 |
+
-0.08300793915987015,
|
| 20 |
+
0.19217251241207123,
|
| 21 |
+
-0.07867422699928284,
|
| 22 |
+
0.051928430795669556,
|
| 23 |
+
-0.20575666427612305,
|
| 24 |
+
0.026639869436621666,
|
| 25 |
+
0.2688467502593994,
|
| 26 |
+
-0.07158240675926208,
|
| 27 |
+
-0.1959005445241928,
|
| 28 |
+
-0.044735364615917206,
|
| 29 |
+
0.09095965325832367,
|
| 30 |
+
0.14965403079986572,
|
| 31 |
+
-0.03834051638841629,
|
| 32 |
+
-0.059758421033620834,
|
| 33 |
+
-0.0003535658761393279,
|
| 34 |
+
0.02151256427168846,
|
| 35 |
+
0.008055430836975574,
|
| 36 |
+
0.6770350337028503,
|
| 37 |
+
0.09653493016958237,
|
| 38 |
+
-0.0004846698429901153,
|
| 39 |
+
0.014787894673645496,
|
| 40 |
+
0.0
|
| 41 |
+
],
|
| 42 |
+
"std": [
|
| 43 |
+
0.0,
|
| 44 |
+
0.0,
|
| 45 |
+
0.0,
|
| 46 |
+
0.0,
|
| 47 |
+
0.0,
|
| 48 |
+
0.0,
|
| 49 |
+
0.0,
|
| 50 |
+
0.16896027326583862,
|
| 51 |
+
0.2365109622478485,
|
| 52 |
+
0.2365109622478485,
|
| 53 |
+
0.5068987011909485,
|
| 54 |
+
0.5068987011909485,
|
| 55 |
+
0.2027282416820526,
|
| 56 |
+
0.5068987011909485,
|
| 57 |
+
0.04981342703104019,
|
| 58 |
+
0.009523554705083325,
|
| 59 |
+
0.17949867248535156,
|
| 60 |
+
0.055632513016462326,
|
| 61 |
+
0.057273078709840775,
|
| 62 |
+
0.058521512895822525,
|
| 63 |
+
0.11143220216035843,
|
| 64 |
+
0.07724548876285553,
|
| 65 |
+
0.023301944136619568,
|
| 66 |
+
0.10750990360975266,
|
| 67 |
+
0.06850353628396988,
|
| 68 |
+
0.07305468618869781,
|
| 69 |
+
0.06640303879976273,
|
| 70 |
+
0.16941164433956146,
|
| 71 |
+
0.041646938771009445,
|
| 72 |
+
0.0699257031083107,
|
| 73 |
+
0.027388552203774452,
|
| 74 |
+
0.08091399818658829,
|
| 75 |
+
0.11833422631025314,
|
| 76 |
+
0.017973335459828377,
|
| 77 |
+
0.03434111177921295,
|
| 78 |
+
0.0
|
| 79 |
+
],
|
| 80 |
+
"max": [
|
| 81 |
+
0.0,
|
| 82 |
+
0.0,
|
| 83 |
+
0.0,
|
| 84 |
+
0.0,
|
| 85 |
+
0.0,
|
| 86 |
+
0.0,
|
| 87 |
+
0.0,
|
| 88 |
+
0.0,
|
| 89 |
+
0.0,
|
| 90 |
+
0.0,
|
| 91 |
+
1.5,
|
| 92 |
+
1.5,
|
| 93 |
+
0.6000000238418579,
|
| 94 |
+
1.5,
|
| 95 |
+
0.06430592387914658,
|
| 96 |
+
0.2996276319026947,
|
| 97 |
+
0.5128592252731323,
|
| 98 |
+
0.3017215132713318,
|
| 99 |
+
-0.038698144257068634,
|
| 100 |
+
0.21968720853328705,
|
| 101 |
+
0.827497124671936,
|
| 102 |
+
0.044904597103595734,
|
| 103 |
+
-0.1900009959936142,
|
| 104 |
+
0.413065105676651,
|
| 105 |
+
0.41873428225517273,
|
| 106 |
+
0.6618388891220093,
|
| 107 |
+
0.4026392698287964,
|
| 108 |
+
0.8194853663444519,
|
| 109 |
+
0.12383800745010376,
|
| 110 |
+
0.16346246004104614,
|
| 111 |
+
0.15494900941848755,
|
| 112 |
+
0.7400000095367432,
|
| 113 |
+
0.5,
|
| 114 |
+
0.21786384284496307,
|
| 115 |
+
0.1749052256345749,
|
| 116 |
+
0.0
|
| 117 |
+
],
|
| 118 |
+
"min": [
|
| 119 |
+
0.0,
|
| 120 |
+
0.0,
|
| 121 |
+
0.0,
|
| 122 |
+
0.0,
|
| 123 |
+
0.0,
|
| 124 |
+
0.0,
|
| 125 |
+
0.0,
|
| 126 |
+
-0.5,
|
| 127 |
+
-0.699999988079071,
|
| 128 |
+
-0.699999988079071,
|
| 129 |
+
0.0,
|
| 130 |
+
0.0,
|
| 131 |
+
0.0,
|
| 132 |
+
0.0,
|
| 133 |
+
-0.3314070701599121,
|
| 134 |
+
0.1900009959936142,
|
| 135 |
+
-0.8766500353813171,
|
| 136 |
+
-0.12303244322538376,
|
| 137 |
+
-0.4908517599105835,
|
| 138 |
+
-0.2786784768104553,
|
| 139 |
+
-0.022629141807556152,
|
| 140 |
+
-0.6784858703613281,
|
| 141 |
+
-0.5865002870559692,
|
| 142 |
+
-0.645729660987854,
|
| 143 |
+
-0.3608185946941376,
|
| 144 |
+
-0.15172408521175385,
|
| 145 |
+
-0.4648345112800598,
|
| 146 |
+
-0.2964947521686554,
|
| 147 |
+
-0.10700750350952148,
|
| 148 |
+
-0.21067920327186584,
|
| 149 |
+
-0.08102670311927795,
|
| 150 |
+
0.44999998807907104,
|
| 151 |
+
-0.5,
|
| 152 |
+
-0.26561295986175537,
|
| 153 |
+
-0.11697302013635635,
|
| 154 |
+
0.0
|
| 155 |
+
],
|
| 156 |
+
"q01": [
|
| 157 |
+
0.0,
|
| 158 |
+
0.0,
|
| 159 |
+
0.0,
|
| 160 |
+
0.0,
|
| 161 |
+
0.0,
|
| 162 |
+
0.0,
|
| 163 |
+
0.0,
|
| 164 |
+
-0.5,
|
| 165 |
+
-0.699999988079071,
|
| 166 |
+
-0.699999988079071,
|
| 167 |
+
0.0,
|
| 168 |
+
0.0,
|
| 169 |
+
0.0,
|
| 170 |
+
0.0,
|
| 171 |
+
-0.2336725726723671,
|
| 172 |
+
0.1900009959936142,
|
| 173 |
+
-0.628720715045929,
|
| 174 |
+
-0.08062581032514572,
|
| 175 |
+
-0.3772744107246399,
|
| 176 |
+
-0.14941381871700288,
|
| 177 |
+
0.023908816780894994,
|
| 178 |
+
-0.3680631712079048,
|
| 179 |
+
-0.3140790224075317,
|
| 180 |
+
-0.33936198383569716,
|
| 181 |
+
-0.1444373431801796,
|
| 182 |
+
-0.024716479536145926,
|
| 183 |
+
-0.25120449274778367,
|
| 184 |
+
-0.24746618106961252,
|
| 185 |
+
-0.0921607768535614,
|
| 186 |
+
-0.1331048083305359,
|
| 187 |
+
-0.055714426785707476,
|
| 188 |
+
0.5099999904632568,
|
| 189 |
+
0.0,
|
| 190 |
+
-0.08334636241197586,
|
| 191 |
+
-0.058656642064452175,
|
| 192 |
+
0.0
|
| 193 |
+
],
|
| 194 |
+
"q99": [
|
| 195 |
+
0.0,
|
| 196 |
+
0.0,
|
| 197 |
+
0.0,
|
| 198 |
+
0.0,
|
| 199 |
+
0.0,
|
| 200 |
+
0.0,
|
| 201 |
+
0.0,
|
| 202 |
+
0.0,
|
| 203 |
+
0.0,
|
| 204 |
+
0.0,
|
| 205 |
+
1.5,
|
| 206 |
+
1.5,
|
| 207 |
+
0.6000000238418579,
|
| 208 |
+
1.5,
|
| 209 |
+
0.005833799573592792,
|
| 210 |
+
0.24212055698037147,
|
| 211 |
+
0.31104624718427587,
|
| 212 |
+
0.2240664350986478,
|
| 213 |
+
-0.07993344962596893,
|
| 214 |
+
0.1509539039433002,
|
| 215 |
+
0.6122507166862484,
|
| 216 |
+
0.020442928690463276,
|
| 217 |
+
-0.1900009959936142,
|
| 218 |
+
0.20298720359802205,
|
| 219 |
+
0.26972131878137506,
|
| 220 |
+
0.33771990299224813,
|
| 221 |
+
0.14230648443102825,
|
| 222 |
+
0.5476555949449537,
|
| 223 |
+
0.10829514846205711,
|
| 224 |
+
0.10879010632634158,
|
| 225 |
+
0.09789865501224988,
|
| 226 |
+
0.7400000095367432,
|
| 227 |
+
0.4088541567325592,
|
| 228 |
+
0.057279629707336424,
|
| 229 |
+
0.11845016352832313,
|
| 230 |
+
0.0
|
| 231 |
+
],
|
| 232 |
+
"mask": [
|
| 233 |
+
true,
|
| 234 |
+
true,
|
| 235 |
+
true,
|
| 236 |
+
true,
|
| 237 |
+
true,
|
| 238 |
+
true,
|
| 239 |
+
true,
|
| 240 |
+
true,
|
| 241 |
+
true,
|
| 242 |
+
true,
|
| 243 |
+
true,
|
| 244 |
+
true,
|
| 245 |
+
true,
|
| 246 |
+
true,
|
| 247 |
+
true,
|
| 248 |
+
true,
|
| 249 |
+
true,
|
| 250 |
+
true,
|
| 251 |
+
true,
|
| 252 |
+
true,
|
| 253 |
+
true,
|
| 254 |
+
true,
|
| 255 |
+
true,
|
| 256 |
+
true,
|
| 257 |
+
true,
|
| 258 |
+
true,
|
| 259 |
+
true,
|
| 260 |
+
true,
|
| 261 |
+
true,
|
| 262 |
+
true,
|
| 263 |
+
true,
|
| 264 |
+
true,
|
| 265 |
+
true,
|
| 266 |
+
true,
|
| 267 |
+
true,
|
| 268 |
+
true
|
| 269 |
+
]
|
| 270 |
+
},
|
| 271 |
+
"state": {
|
| 272 |
+
"mean": [
|
| 273 |
+
1.6478608813486062e-05,
|
| 274 |
+
-4.823089329875074e-05,
|
| 275 |
+
-1.5274658835551236e-06,
|
| 276 |
+
2.0739900719490834e-05,
|
| 277 |
+
-4.8923579015536234e-05,
|
| 278 |
+
1.184017673949711e-05,
|
| 279 |
+
1.8281939446751494e-06,
|
| 280 |
+
-0.04951467365026474,
|
| 281 |
+
-0.03387488052248955,
|
| 282 |
+
-0.05985373631119728,
|
| 283 |
+
0.034982677549123764,
|
| 284 |
+
0.09988676756620407,
|
| 285 |
+
0.05889609828591347,
|
| 286 |
+
0.09540130198001862,
|
| 287 |
+
-0.04761885479092598,
|
| 288 |
+
0.18126359581947327,
|
| 289 |
+
-0.08981631696224213,
|
| 290 |
+
0.1303543597459793,
|
| 291 |
+
-0.20870409905910492,
|
| 292 |
+
0.08360962569713593,
|
| 293 |
+
0.26360899209976196,
|
| 294 |
+
-0.033612482249736786,
|
| 295 |
+
-0.1811068058013916,
|
| 296 |
+
-0.027028528973460197,
|
| 297 |
+
0.1747075915336609,
|
| 298 |
+
0.1506245732307434,
|
| 299 |
+
0.02000368759036064,
|
| 300 |
+
-0.07187763601541519,
|
| 301 |
+
0.0014201127924025059,
|
| 302 |
+
0.06093015894293785,
|
| 303 |
+
0.007754191290587187,
|
| 304 |
+
0.6778029799461365
|
| 305 |
+
],
|
| 306 |
+
"std": [
|
| 307 |
+
0.0003944706986658275,
|
| 308 |
+
0.0007630966720171273,
|
| 309 |
+
2.2961552531342022e-05,
|
| 310 |
+
0.000179155234945938,
|
| 311 |
+
0.0010678438702598214,
|
| 312 |
+
4.7558256483171135e-05,
|
| 313 |
+
1.1183346941834316e-05,
|
| 314 |
+
0.12474565207958221,
|
| 315 |
+
0.07307292520999908,
|
| 316 |
+
0.14443156123161316,
|
| 317 |
+
0.09493549168109894,
|
| 318 |
+
0.2512502074241638,
|
| 319 |
+
0.1241452693939209,
|
| 320 |
+
0.23682793974876404,
|
| 321 |
+
0.049122974276542664,
|
| 322 |
+
0.0104843201115727,
|
| 323 |
+
0.1711176037788391,
|
| 324 |
+
0.05156445503234863,
|
| 325 |
+
0.05493027716875076,
|
| 326 |
+
0.05776740238070488,
|
| 327 |
+
0.10714928060770035,
|
| 328 |
+
0.06816332787275314,
|
| 329 |
+
0.01825851947069168,
|
| 330 |
+
0.10530710965394974,
|
| 331 |
+
0.059736523777246475,
|
| 332 |
+
0.06796342134475708,
|
| 333 |
+
0.05937612056732178,
|
| 334 |
+
0.1552959680557251,
|
| 335 |
+
0.03811460733413696,
|
| 336 |
+
0.06448719650506973,
|
| 337 |
+
0.028375638648867607,
|
| 338 |
+
0.08062339574098587
|
| 339 |
+
],
|
| 340 |
+
"max": [
|
| 341 |
+
0.013749510049819946,
|
| 342 |
+
0.0003444451722316444,
|
| 343 |
+
5.732499630539678e-06,
|
| 344 |
+
0.0019246992887929082,
|
| 345 |
+
0.0014607172925025225,
|
| 346 |
+
0.0007710650679655373,
|
| 347 |
+
0.0006001993897370994,
|
| 348 |
+
4.888642592959513e-07,
|
| 349 |
+
0.06670719385147095,
|
| 350 |
+
1.4086220971876173e-06,
|
| 351 |
+
0.43387407064437866,
|
| 352 |
+
1.2414171695709229,
|
| 353 |
+
0.6964682936668396,
|
| 354 |
+
1.2072811126708984,
|
| 355 |
+
0.0866343304514885,
|
| 356 |
+
0.2651435434818268,
|
| 357 |
+
0.49075624346733093,
|
| 358 |
+
0.34916067123413086,
|
| 359 |
+
-0.06531530618667603,
|
| 360 |
+
0.2507650554180145,
|
| 361 |
+
0.9099032282829285,
|
| 362 |
+
0.07794909924268723,
|
| 363 |
+
-0.15903376042842865,
|
| 364 |
+
0.29115578532218933,
|
| 365 |
+
0.48632845282554626,
|
| 366 |
+
0.4680853486061096,
|
| 367 |
+
0.40000519156455994,
|
| 368 |
+
0.7901750206947327,
|
| 369 |
+
0.11165501922369003,
|
| 370 |
+
0.1871986985206604,
|
| 371 |
+
0.15685616433620453,
|
| 372 |
+
0.7400000095367432
|
| 373 |
+
],
|
| 374 |
+
"min": [
|
| 375 |
+
-0.00044060105574317276,
|
| 376 |
+
-0.029227260500192642,
|
| 377 |
+
-0.0007062808726914227,
|
| 378 |
+
-0.006396367214620113,
|
| 379 |
+
-0.034731876105070114,
|
| 380 |
+
-0.00020073111227247864,
|
| 381 |
+
-8.215621392082539e-07,
|
| 382 |
+
-0.5499086976051331,
|
| 383 |
+
-0.5100165009498596,
|
| 384 |
+
-0.613179087638855,
|
| 385 |
+
-0.0030598489101976156,
|
| 386 |
+
-0.0002515389060135931,
|
| 387 |
+
-0.00361030176281929,
|
| 388 |
+
-0.003131122561171651,
|
| 389 |
+
-0.30267173051834106,
|
| 390 |
+
0.162300705909729,
|
| 391 |
+
-0.8084174394607544,
|
| 392 |
+
-0.053157128393650055,
|
| 393 |
+
-0.48188674449920654,
|
| 394 |
+
-0.28324440121650696,
|
| 395 |
+
-0.02153456024825573,
|
| 396 |
+
-0.559512734413147,
|
| 397 |
+
-0.4063037037849426,
|
| 398 |
+
-0.625334620475769,
|
| 399 |
+
-0.17857033014297485,
|
| 400 |
+
-0.14080968499183655,
|
| 401 |
+
-0.3861367404460907,
|
| 402 |
+
-0.2920348048210144,
|
| 403 |
+
-0.0902835875749588,
|
| 404 |
+
-0.1666938215494156,
|
| 405 |
+
-0.07615894079208374,
|
| 406 |
+
0.44999998807907104
|
| 407 |
+
],
|
| 408 |
+
"q01": [
|
| 409 |
+
7.74661926357112e-07,
|
| 410 |
+
-6.3755543715160465e-06,
|
| 411 |
+
-7.83351255222442e-07,
|
| 412 |
+
3.4136806561946284e-07,
|
| 413 |
+
-0.0008449232077691706,
|
| 414 |
+
4.5431972239384775e-06,
|
| 415 |
+
7.721260197968149e-07,
|
| 416 |
+
-0.5028422969579697,
|
| 417 |
+
-0.3325865414738655,
|
| 418 |
+
-0.5741579407453536,
|
| 419 |
+
-1.5824165325284411e-06,
|
| 420 |
+
-1.323924946916577e-07,
|
| 421 |
+
-9.707011122372932e-07,
|
| 422 |
+
-1.9067205457190538e-07,
|
| 423 |
+
-0.1996450574696064,
|
| 424 |
+
0.1655060650408268,
|
| 425 |
+
-0.6192439311742782,
|
| 426 |
+
0.0036195464059710497,
|
| 427 |
+
-0.37595251262187956,
|
| 428 |
+
-0.10034843616187572,
|
| 429 |
+
0.029302983712404963,
|
| 430 |
+
-0.29443797826766965,
|
| 431 |
+
-0.26330254584550855,
|
| 432 |
+
-0.3154676526784897,
|
| 433 |
+
0.021579700019210574,
|
| 434 |
+
-0.02289357639849186,
|
| 435 |
+
-0.1849268364906311,
|
| 436 |
+
-0.24821986511349678,
|
| 437 |
+
-0.07978948682546616,
|
| 438 |
+
-0.08723165072500706,
|
| 439 |
+
-0.05445469941943884,
|
| 440 |
+
0.5099999904632568
|
| 441 |
+
],
|
| 442 |
+
"q99": [
|
| 443 |
+
2.1163743895158388e-06,
|
| 444 |
+
3.5946895195593186e-06,
|
| 445 |
+
9.480705858777559e-07,
|
| 446 |
+
0.0007039297890150918,
|
| 447 |
+
0.0005248256213963012,
|
| 448 |
+
2.909682405515922e-05,
|
| 449 |
+
3.943643955608417e-06,
|
| 450 |
+
2.646454402110975e-07,
|
| 451 |
+
2.4528015819669183e-06,
|
| 452 |
+
3.882593460957627e-07,
|
| 453 |
+
0.39650109171867354,
|
| 454 |
+
1.1101934683322905,
|
| 455 |
+
0.5173005294799803,
|
| 456 |
+
1.034569376707077,
|
| 457 |
+
0.03528875216841695,
|
| 458 |
+
0.22509524688124644,
|
| 459 |
+
0.29769810587167667,
|
| 460 |
+
0.28808553427457806,
|
| 461 |
+
-0.08342873558402061,
|
| 462 |
+
0.20056841030716893,
|
| 463 |
+
0.6333562320470806,
|
| 464 |
+
0.05092002365738146,
|
| 465 |
+
-0.1633606669306755,
|
| 466 |
+
0.21988036155700677,
|
| 467 |
+
0.343479991853237,
|
| 468 |
+
0.2987542548775673,
|
| 469 |
+
0.12391192510724047,
|
| 470 |
+
0.48378621041774733,
|
| 471 |
+
0.09747317329049104,
|
| 472 |
+
0.14307723090052604,
|
| 473 |
+
0.09777400560677017,
|
| 474 |
+
0.7400000095367432
|
| 475 |
+
]
|
| 476 |
+
},
|
| 477 |
+
"num_transitions": 23664,
|
| 478 |
+
"num_trajectories": 100
|
| 479 |
+
}
|
| 480 |
+
}
|
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/final_model/pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58eaff18dd7c1a3d468256151321af5e980b6fe219ac8291f98484134d166afd
|
| 3 |
+
size 8604557774
|
intervla-m1/simple/G1WholebodyXMoveBendPickTeleop/20260403_151218/summary.jsonl
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"steps": 5000}
|
| 2 |
+
{"steps": 10000}
|
| 3 |
+
{"steps": 15000}
|
| 4 |
+
{"steps": 20000}
|
| 5 |
+
{"steps": 25000}
|
| 6 |
+
{"steps": 30000}
|
| 7 |
+
{"steps": 35000}
|
| 8 |
+
{"steps": 40000}
|