nnh-pbbb commited on
Commit
26b988b
·
verified ·
1 Parent(s): aaa3f17

Upload folder using huggingface_hub

Browse files
InternVLPI-0302-eep-pretrain_T2_sft_23000/checkpoints/steps_23000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8d9bac614da20ddb00d404628a2c2ece9368a01e27804905f9ae1c4b9538494
3
+ size 2966606180
InternVLPI-0302-eep-pretrain_T2_sft_23000/config.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_id": "InternVLPI-0302-eep-pretrain_T2_sft",
3
+ "run_root_dir": "/lumos-vePFS/shenzhen/models/lumos_pi",
4
+ "seed": 42,
5
+ "trackers": [
6
+ "jsonl"
7
+ ],
8
+ "wandb_entity": null,
9
+ "wandb_project": null,
10
+ "is_debug": false,
11
+ "framework": {
12
+ "name": "internvlPI",
13
+ "qwenvl": {
14
+ "base_vlm": "/lumos-vePFS/shenzhen/models/InternVL3_5-1B-Instruct",
15
+ "attn_implementation": "eager",
16
+ "vl_hidden_dim": 2048
17
+ },
18
+ "action_model": {
19
+ "add_pos_embed": true,
20
+ "hidden_size": 1024,
21
+ "max_seq_len": 1024,
22
+ "action_model_type": "DiT-B",
23
+ "action_dim": 20,
24
+ "state_dim": 20,
25
+ "future_action_window_size": 15,
26
+ "past_action_window_size": 0,
27
+ "action_horizon": 30,
28
+ "repeated_diffusion_steps": 2,
29
+ "noise_beta_alpha": 1.5,
30
+ "noise_beta_beta": 1.0,
31
+ "noise_s": 0.999,
32
+ "num_timestep_buckets": 1000,
33
+ "num_inference_timesteps": 8,
34
+ "num_target_vision_tokens": 32,
35
+ "use_scaled_noise": true,
36
+ "use_kv_cache_dit": true,
37
+ "diffusion_model_cfg": {
38
+ "dropout": 0.2,
39
+ "final_dropout": true,
40
+ "interleave_self_attention": true,
41
+ "norm_type": "ada_norm",
42
+ "num_layers": 28,
43
+ "output_dim": 2560,
44
+ "positional_embeddings": null
45
+ },
46
+ "smoothness_loss_weight": 1
47
+ }
48
+ },
49
+ "datasets": {
50
+ "vla_data": {
51
+ "dataset_py": "lerobot_datasets",
52
+ "data_root_dir": "/lumos-vePFS/shenzhen/data/data_warehouse/data_warehouse_output_clean",
53
+ "data_mix": "train_QwenPI",
54
+ "per_device_batch_size": 16,
55
+ "num_workers": 4,
56
+ "action_type": "abs_ee",
57
+ "image_size": 256,
58
+ "video_backend": "pyav"
59
+ }
60
+ },
61
+ "trainer": {
62
+ "epochs": 100,
63
+ "max_train_steps": 200000,
64
+ "num_warmup_steps": 5000,
65
+ "save_interval": 1000,
66
+ "eval_interval": 10000000,
67
+ "learning_rate": {
68
+ "base": 1e-05,
69
+ "intern_vl_interface": 1e-05,
70
+ "action_model": 0.0001
71
+ },
72
+ "lr_scheduler_type": "cosine_with_min_lr",
73
+ "scheduler_specific_kwargs": {
74
+ "min_lr": 1e-06
75
+ },
76
+ "freeze_modules": null,
77
+ "loss_scale": {
78
+ "vla": 1.0,
79
+ "vlm": 0.05
80
+ },
81
+ "max_grad_norm": 1.0,
82
+ "warmup_ratio": 0.1,
83
+ "weight_decay": 0.0,
84
+ "logging_frequency": 10,
85
+ "gradient_clipping": 1.0,
86
+ "gradient_accumulation_steps": 3,
87
+ "optimizer": {
88
+ "name": "AdamW",
89
+ "betas": [
90
+ 0.9,
91
+ 0.95
92
+ ],
93
+ "eps": 1e-08,
94
+ "weight_decay": 0
95
+ },
96
+ "pretrained_checkpoint": "/lumos-vePFS/shenzhen/models/lumos_pi/InternVLPI-0225-eep-pretrain_T2/checkpoints/steps_23000_pytorch_model.pt",
97
+ "is_resume": true,
98
+ "resume_step": 11000
99
+ },
100
+ "is_resume": true,
101
+ "resume_epoch": null,
102
+ "resume_step": 5000,
103
+ "enable_gradient_checkpointing": true,
104
+ "enable_mixed_precision_training": true,
105
+ "report_to": "jsonl",
106
+ "output_dir": "/lumos-vePFS/shenzhen/models/lumos_pi/InternVLPI-0302-eep-pretrain_T2_sft"
107
+ }
InternVLPI-0302-eep-pretrain_T2_sft_23000/config.yaml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ vla_data:
3
+ data_mix: train_QwenPI
4
+ data_root_dir: /lumos-vePFS/shenzhen/data/data_warehouse/data_warehouse_output_clean
5
+ dataset_py: lerobot_datasets
6
+ image_size: 256
7
+ num_workers: 4
8
+ per_device_batch_size: 16
9
+ framework:
10
+ action_model:
11
+ action_dim: 20
12
+ add_pos_embed: true
13
+ diffusion_model_cfg:
14
+ dropout: 0.2
15
+ final_dropout: true
16
+ interleave_self_attention: true
17
+ norm_type: ada_norm
18
+ num_layers: 28
19
+ output_dim: 2560
20
+ positional_embeddings: null
21
+ future_action_window_size: 15
22
+ hidden_dim: 1024
23
+ max_seq_len: 1024
24
+ noise_beta_alpha: 1.5
25
+ noise_beta_beta: 1.0
26
+ noise_s: 0.999
27
+ num_inference_timesteps: 8
28
+ num_target_vision_tokens: 32
29
+ num_timestep_buckets: 1000
30
+ past_action_window_size: 0
31
+ smoothness_loss_weight: 1
32
+ state_dim: 20
33
+ use_kv_cache_dit: true
34
+ use_scaled_noise: true
35
+ name: internvlPI
36
+ qwenvl:
37
+ attn_implementation: eager
38
+ base_vlm: /lumos-vePFS/shenzhen/models/InternVL3_5-1B-Instruct
39
+ num_vl_layers: 28
40
+ vl_hidden_dim: 1024
41
+ output_dir: /lumos-vePFS/shenzhen/models/lumos_pi/InternVLPI-0302-eep-pretrain_T2_sft
42
+ run_id: InternVLPI-0302-eep-pretrain_T2_sft
43
+ run_root_dir: /lumos-vePFS/shenzhen/models/lumos_pi
44
+ seed: 42
45
+ trainer:
46
+ eval_interval: 10000000
47
+ freeze_modules: null
48
+ gradient_accumulation_steps: 3
49
+ gradient_clipping: 1.0
50
+ is_resume: true
51
+ learning_rate:
52
+ action_model: 0.0001
53
+ base: 1.0e-05
54
+ intern_vl_interface: 1.0e-05
55
+ logging_frequency: 10
56
+ lr_scheduler_type: cosine_with_min_lr
57
+ max_train_steps: 200000
58
+ num_warmup_steps: 5000
59
+ optimizer:
60
+ betas:
61
+ - 0.9
62
+ - 0.95
63
+ eps: 1.0e-08
64
+ weight_decay: 0
65
+ pretrained_checkpoint: /lumos-vePFS/shenzhen/models/lumos_pi/InternVLPI-0225-eep-pretrain_T2/checkpoints/steps_23000_pytorch_model.pt
66
+ save_interval: 1000
67
+ scheduler_specific_kwargs:
68
+ min_lr: 1.0e-06
InternVLPI-0302-eep-pretrain_T2_sft_23000/dataset_statistics.json ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ 0.2675580734556372,
6
+ -0.06659615870739799,
7
+ 0.19635865566405383,
8
+ -1.313161190599203,
9
+ 0.41478794440627087,
10
+ 1.4886140796271237,
11
+ 0.0327876329168,
12
+ 0.2434083236889406,
13
+ 0.09594157693738284,
14
+ 0.20976496081460608,
15
+ 1.4955806767737323,
16
+ 0.5106299925934186,
17
+ -1.5047886750914834,
18
+ 0.035419227597727025
19
+ ],
20
+ "std": [
21
+ 0.10220121447453302,
22
+ 0.11127593499077473,
23
+ 0.0693507274854023,
24
+ 2.61021122878121,
25
+ 0.31264831774938195,
26
+ 2.385310375748136,
27
+ 0.028368956930127334,
28
+ 0.09613372647295047,
29
+ 0.08261520968608296,
30
+ 0.05838800456095643,
31
+ 2.516367436105295,
32
+ 0.2863539402986677,
33
+ 2.4039184089857386,
34
+ 0.028089917285829002
35
+ ],
36
+ "max": [
37
+ 0.5965149998664856,
38
+ 0.48442599177360535,
39
+ 0.6967939734458923,
40
+ 3.1415927410125732,
41
+ 1.5707963705062866,
42
+ 3.1415927410125732,
43
+ 0.0810599997639656,
44
+ 0.5695070028305054,
45
+ 0.4637550115585327,
46
+ 0.6456500291824341,
47
+ 3.1415927410125732,
48
+ 1.5707963705062866,
49
+ 3.1415927410125732,
50
+ 0.061319999396800995
51
+ ],
52
+ "min": [
53
+ -0.10788500308990479,
54
+ -0.5191159844398499,
55
+ 0.040369000285863876,
56
+ -3.1410515308380127,
57
+ -1.5302174091339111,
58
+ -3.1410515308380127,
59
+ -0.0017500000540167093,
60
+ -0.03119499981403351,
61
+ -0.356126993894577,
62
+ 0.06903599947690964,
63
+ -3.141069173812866,
64
+ -0.7706850171089172,
65
+ -3.1410515308380127,
66
+ 0.0
67
+ ],
68
+ "q01": [
69
+ 0.03489213060587645,
70
+ -0.36912800788879396,
71
+ 0.07184209674596786,
72
+ -3.137246325016022,
73
+ -0.20070483744144438,
74
+ -3.137392144203186,
75
+ 0.0,
76
+ 0.030881709717214108,
77
+ -0.15914200246334076,
78
+ 0.10170399993658066,
79
+ -3.138237874507904,
80
+ -0.11282174646854401,
81
+ -3.1362587141990663,
82
+ 7.000000186963007e-05
83
+ ],
84
+ "q99": [
85
+ 0.5328979176282882,
86
+ 0.41213971734046934,
87
+ 0.47792605459690085,
88
+ 3.1415927410125732,
89
+ 1.5365502309799193,
90
+ 3.1415927410125732,
91
+ 0.06019999831914902,
92
+ 0.5246558457612991,
93
+ 0.35473266333341585,
94
+ 0.42531779676675785,
95
+ 3.1415927410125732,
96
+ 1.5435859155654907,
97
+ 3.1415927410125732,
98
+ 0.05992000177502632
99
+ ],
100
+ "mask": [
101
+ true,
102
+ true,
103
+ true,
104
+ true,
105
+ true,
106
+ true,
107
+ false,
108
+ true,
109
+ true,
110
+ true,
111
+ true,
112
+ true,
113
+ true,
114
+ false
115
+ ]
116
+ },
117
+ "state": {
118
+ "mean": [
119
+ 0.2675580734556372,
120
+ -0.06659615870739799,
121
+ 0.19635865566405383,
122
+ -1.313161190599203,
123
+ 0.41478794440627087,
124
+ 1.4886140796271237,
125
+ 0.0327876329168,
126
+ 0.2434083236889406,
127
+ 0.09594157693738284,
128
+ 0.20976496081460608,
129
+ 1.4955806767737323,
130
+ 0.5106299925934186,
131
+ -1.5047886750914834,
132
+ 0.035419227597727025
133
+ ],
134
+ "std": [
135
+ 0.10220121447453302,
136
+ 0.11127593499077473,
137
+ 0.0693507274854023,
138
+ 2.61021122878121,
139
+ 0.31264831774938195,
140
+ 2.385310375748136,
141
+ 0.028368956930127334,
142
+ 0.09613372647295047,
143
+ 0.08261520968608296,
144
+ 0.05838800456095643,
145
+ 2.516367436105295,
146
+ 0.2863539402986677,
147
+ 2.4039184089857386,
148
+ 0.028089917285829002
149
+ ],
150
+ "max": [
151
+ 0.5965149998664856,
152
+ 0.48442599177360535,
153
+ 0.6967939734458923,
154
+ 3.1415927410125732,
155
+ 1.5707963705062866,
156
+ 3.1415927410125732,
157
+ 0.0810599997639656,
158
+ 0.5695070028305054,
159
+ 0.4637550115585327,
160
+ 0.6456500291824341,
161
+ 3.1415927410125732,
162
+ 1.5707963705062866,
163
+ 3.1415927410125732,
164
+ 0.061319999396800995
165
+ ],
166
+ "min": [
167
+ -0.10788500308990479,
168
+ -0.5191159844398499,
169
+ 0.040369000285863876,
170
+ -3.1410515308380127,
171
+ -1.5302174091339111,
172
+ -3.1410515308380127,
173
+ -0.0017500000540167093,
174
+ -0.03119499981403351,
175
+ -0.356126993894577,
176
+ 0.06903599947690964,
177
+ -3.141069173812866,
178
+ -0.7706850171089172,
179
+ -3.1410515308380127,
180
+ 0.0
181
+ ],
182
+ "q01": [
183
+ 0.03489213060587645,
184
+ -0.36912800788879396,
185
+ 0.07184209674596786,
186
+ -3.137246325016022,
187
+ -0.20070483744144438,
188
+ -3.137392144203186,
189
+ 0.0,
190
+ 0.030881709717214108,
191
+ -0.15914200246334076,
192
+ 0.10170399993658066,
193
+ -3.138237874507904,
194
+ -0.11282174646854401,
195
+ -3.1362587141990663,
196
+ 7.000000186963007e-05
197
+ ],
198
+ "q99": [
199
+ 0.5328979176282882,
200
+ 0.41213971734046934,
201
+ 0.47792605459690085,
202
+ 3.1415927410125732,
203
+ 1.5365502309799193,
204
+ 3.1415927410125732,
205
+ 0.06019999831914902,
206
+ 0.5246558457612991,
207
+ 0.35473266333341585,
208
+ 0.42531779676675785,
209
+ 3.1415927410125732,
210
+ 1.5435859155654907,
211
+ 3.1415927410125732,
212
+ 0.05992000177502632
213
+ ]
214
+ },
215
+ "num_transitions": 3335583,
216
+ "num_trajectories": 2411
217
+ }
218
+ }