masonlimx commited on
Commit
e365611
·
verified ·
1 Parent(s): 54236fd

Add dreamzero_libero_10_full_finetune_bs64 checkpoints

Browse files
dreamzero_libero_10_full_finetune_bs64/checkpoints/step-019032-epoch-12-loss=0.0578.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8062b3ff6d4d47a85d1d0aa9c62b58690de64357861a7416900517ae4b30d7f4
3
+ size 91696959464
dreamzero_libero_10_full_finetune_bs64/config.json ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_ckpt_root": "./checkpoints",
3
+ "_frame_window_size": 9,
4
+ "_tokenizer": "./checkpoints/Wan2.1-I2V-14B-480P/google/umt5-xxl",
5
+ "eval": {
6
+ "dataset": {
7
+ "transforms": [
8
+ {
9
+ "img_keys": [
10
+ "agentview_image",
11
+ "robot0_eye_in_hand_image"
12
+ ],
13
+ "type": "ProcessLiberoEvalInputs"
14
+ },
15
+ {
16
+ "image_resize_strategy": "resize-naive",
17
+ "input_sizes": [
18
+ [
19
+ 3,
20
+ 128,
21
+ 128
22
+ ],
23
+ [
24
+ 3,
25
+ 128,
26
+ 128
27
+ ]
28
+ ],
29
+ "means": [
30
+ [
31
+ 127.5,
32
+ 127.5,
33
+ 127.5
34
+ ],
35
+ [
36
+ 127.5,
37
+ 127.5,
38
+ 127.5
39
+ ]
40
+ ],
41
+ "stds": [
42
+ [
43
+ 127.5,
44
+ 127.5,
45
+ 127.5
46
+ ],
47
+ [
48
+ 127.5,
49
+ 127.5,
50
+ 127.5
51
+ ]
52
+ ],
53
+ "type": "TransformImage"
54
+ },
55
+ {
56
+ "gripper_key": "robot0_gripper_qpos",
57
+ "norm_type": "mean_std",
58
+ "out_key": "states",
59
+ "pos_key": "robot0_eef_pos",
60
+ "quat_key": "robot0_eef_quat",
61
+ "state_dim": 32,
62
+ "type": "LiberoProprioFromInputs"
63
+ },
64
+ {
65
+ "max_len": 512,
66
+ "tokenizer": {
67
+ "model_path": "./checkpoints/Wan2.1-I2V-14B-480P/google/umt5-xxl",
68
+ "type": "PretrainedTokenizer"
69
+ },
70
+ "type": "LiberoPromptFromInputs",
71
+ "use_conversation": false
72
+ },
73
+ {
74
+ "frame_window_size": 1,
75
+ "num_views": 2,
76
+ "type": "PrepareVideo"
77
+ }
78
+ ],
79
+ "type": "LiberoParquetEvalDataset"
80
+ },
81
+ "denormalize_action": {
82
+ "action_dim": 7,
83
+ "norm_type": "mean_std",
84
+ "type": "DenormalizeLiberoAction"
85
+ },
86
+ "enable_mixed_precision_training": true,
87
+ "eval_chunk_size": 10,
88
+ "mixed_precision_dtype": "bf16",
89
+ "model_family": "dreamzero",
90
+ "num_steps_wait": 10,
91
+ "num_trials_per_task": 50,
92
+ "resize_size": 128,
93
+ "seed": 7,
94
+ "task_suite_name": "libero_10",
95
+ "type": "LiberoEvalRunner"
96
+ },
97
+ "model": {
98
+ "frame_window_size": 9,
99
+ "name_mapping": {
100
+ "vla_head.model": "action_head.model",
101
+ "vlm_backbone.image_encoder": "action_head.image_encoder",
102
+ "vlm_backbone.text_encoder": "action_head.text_encoder",
103
+ "vlm_backbone.vae": "action_head.vae"
104
+ },
105
+ "num_views": 2,
106
+ "pretrained_name_or_path": "./checkpoints/DreamZero-AgiBot",
107
+ "type": "DreamZeroVLA",
108
+ "vla_head": {
109
+ "action_dim": 7,
110
+ "action_horizon": 10,
111
+ "dit_dim": 5120,
112
+ "dit_ffn_dim": 13824,
113
+ "dit_freq_dim": 256,
114
+ "dit_in_dim": 36,
115
+ "dit_num_heads": 40,
116
+ "dit_num_layers": 40,
117
+ "dit_out_dim": 16,
118
+ "frame_seqlen": 128,
119
+ "hidden_size": 1024,
120
+ "input_embedding_dim": 1536,
121
+ "max_action_dim": 32,
122
+ "max_num_embodiments": 32,
123
+ "max_state_dim": 64,
124
+ "noise_beta_alpha": 1.5,
125
+ "noise_beta_beta": 1.0,
126
+ "noise_s": 0.999,
127
+ "num_action_per_block": 10,
128
+ "num_frame_per_block": 2,
129
+ "num_frames": 9,
130
+ "num_inference_steps": 16,
131
+ "num_state_per_block": 1,
132
+ "type": "DreamZeroHead",
133
+ "use_gradient_checkpointing": true
134
+ },
135
+ "vlm_backbone": {
136
+ "image_encoder_path": null,
137
+ "text_encoder_path": null,
138
+ "tiled": false,
139
+ "type": "WanBackbone",
140
+ "vae_path": null
141
+ }
142
+ },
143
+ "per_device_num_workers": 4,
144
+ "runner": {
145
+ "change_key_name": false,
146
+ "collator": {
147
+ "keys": [
148
+ "states",
149
+ "images",
150
+ "img_masks",
151
+ "actions",
152
+ "action_masks",
153
+ "embodiment_ids",
154
+ "frame_masks",
155
+ "lang_tokens",
156
+ "lang_masks"
157
+ ],
158
+ "meta_keys": [
159
+ "task_description",
160
+ "prompt",
161
+ "info",
162
+ "stats",
163
+ "timestamp"
164
+ ],
165
+ "type": "DictCollator"
166
+ },
167
+ "enable_gradient_checkpointing": true,
168
+ "enable_mixed_precision_training": true,
169
+ "learning_rate": 1e-05,
170
+ "lr_scheduler_type": "linear-warmup+cosine-decay",
171
+ "max_epochs": 12,
172
+ "max_grad_norm": 1.0,
173
+ "metric": {
174
+ "active_trackers": [
175
+ "jsonl",
176
+ "wandb"
177
+ ],
178
+ "grad_accumulation_steps": 1,
179
+ "run_dir": "work_dirs",
180
+ "type": "VLAMetric",
181
+ "window_size": 1
182
+ },
183
+ "mixed_precision_dtype": "bf16",
184
+ "sampler": null,
185
+ "sharding_strategy": "full-shard",
186
+ "type": "FSDPTrainRunner",
187
+ "warmup_ratio": 0.05,
188
+ "weight_decay": 1e-05
189
+ },
190
+ "train_dataloader": {
191
+ "dataset": {
192
+ "datasets": {
193
+ "action_key": "action",
194
+ "action_window_size": 10,
195
+ "data_root_path": "./datasets/libero_10_no_noops_lerobotv2.1",
196
+ "frame_window_size": 9,
197
+ "statistic_name": "libero_10_no_noops",
198
+ "transforms": [
199
+ {
200
+ "embodiment_id": 0,
201
+ "name_mappings": {
202
+ "actions": [
203
+ "actions"
204
+ ],
205
+ "observation.state": [
206
+ "states"
207
+ ]
208
+ },
209
+ "parquet_keys": [
210
+ "observation.state",
211
+ "timestamp",
212
+ "actions",
213
+ "info",
214
+ "stats",
215
+ "action_masks"
216
+ ],
217
+ "type": "ProcessParquetInputs",
218
+ "video_keys": [
219
+ "observation.images.image",
220
+ "observation.images.wrist_image"
221
+ ]
222
+ },
223
+ {
224
+ "type": "ParquetPrompter",
225
+ "use_conversation": false
226
+ },
227
+ {
228
+ "max_len": 512,
229
+ "tokenizer": {
230
+ "model_path": "./checkpoints/Wan2.1-I2V-14B-480P/google/umt5-xxl",
231
+ "type": "PretrainedTokenizer"
232
+ },
233
+ "type": "ProcessPrompts"
234
+ },
235
+ {
236
+ "height": 128,
237
+ "type": "ResizeImages",
238
+ "width": 128
239
+ },
240
+ {
241
+ "type": "SimpleNormalizeImages"
242
+ },
243
+ {
244
+ "action_dim": 32,
245
+ "action_key": "action",
246
+ "norm_type": "mean_std",
247
+ "state_dim": 32,
248
+ "state_key": "proprio",
249
+ "type": "NormalizeStatesAndActions"
250
+ },
251
+ {
252
+ "frame_window_size": 9,
253
+ "num_views": 2,
254
+ "type": "PrepareVideo"
255
+ }
256
+ ],
257
+ "type": "ParquetDataset",
258
+ "use_delta": false,
259
+ "window_start_idx": 0
260
+ },
261
+ "name_mappings": {
262
+ "action": [
263
+ "action"
264
+ ],
265
+ "observation.state": [
266
+ "proprio"
267
+ ]
268
+ },
269
+ "statistic_keys": [
270
+ "observation.state",
271
+ "timestamp",
272
+ "action"
273
+ ],
274
+ "statistic_name": "libero_10_no_noops",
275
+ "type": "DistributedRepeatingDataset"
276
+ },
277
+ "per_device_batch_size": 4,
278
+ "per_device_num_workers": 4
279
+ }
280
+ }
dreamzero_libero_10_full_finetune_bs64/config.yaml ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _ckpt_root: ./checkpoints
2
+ _frame_window_size: 9
3
+ _tokenizer: ./checkpoints/Wan2.1-I2V-14B-480P/google/umt5-xxl
4
+ eval:
5
+ dataset:
6
+ transforms:
7
+ - img_keys:
8
+ - agentview_image
9
+ - robot0_eye_in_hand_image
10
+ type: ProcessLiberoEvalInputs
11
+ - image_resize_strategy: resize-naive
12
+ input_sizes:
13
+ - - 3
14
+ - 128
15
+ - 128
16
+ - - 3
17
+ - 128
18
+ - 128
19
+ means:
20
+ - - 127.5
21
+ - 127.5
22
+ - 127.5
23
+ - - 127.5
24
+ - 127.5
25
+ - 127.5
26
+ stds:
27
+ - - 127.5
28
+ - 127.5
29
+ - 127.5
30
+ - - 127.5
31
+ - 127.5
32
+ - 127.5
33
+ type: TransformImage
34
+ - gripper_key: robot0_gripper_qpos
35
+ norm_type: mean_std
36
+ out_key: states
37
+ pos_key: robot0_eef_pos
38
+ quat_key: robot0_eef_quat
39
+ state_dim: 32
40
+ type: LiberoProprioFromInputs
41
+ - max_len: 512
42
+ tokenizer:
43
+ model_path: ./checkpoints/Wan2.1-I2V-14B-480P/google/umt5-xxl
44
+ type: PretrainedTokenizer
45
+ type: LiberoPromptFromInputs
46
+ use_conversation: false
47
+ - frame_window_size: 1
48
+ num_views: 2
49
+ type: PrepareVideo
50
+ type: LiberoParquetEvalDataset
51
+ denormalize_action:
52
+ action_dim: 7
53
+ norm_type: mean_std
54
+ type: DenormalizeLiberoAction
55
+ enable_mixed_precision_training: true
56
+ eval_chunk_size: 10
57
+ mixed_precision_dtype: bf16
58
+ model_family: dreamzero
59
+ num_steps_wait: 10
60
+ num_trials_per_task: 50
61
+ resize_size: 128
62
+ seed: 7
63
+ task_suite_name: libero_10
64
+ type: LiberoEvalRunner
65
+ model:
66
+ frame_window_size: 9
67
+ name_mapping:
68
+ vla_head.model: action_head.model
69
+ vlm_backbone.image_encoder: action_head.image_encoder
70
+ vlm_backbone.text_encoder: action_head.text_encoder
71
+ vlm_backbone.vae: action_head.vae
72
+ num_views: 2
73
+ pretrained_name_or_path: ./checkpoints/DreamZero-AgiBot
74
+ type: DreamZeroVLA
75
+ vla_head:
76
+ action_dim: 7
77
+ action_horizon: 10
78
+ dit_dim: 5120
79
+ dit_ffn_dim: 13824
80
+ dit_freq_dim: 256
81
+ dit_in_dim: 36
82
+ dit_num_heads: 40
83
+ dit_num_layers: 40
84
+ dit_out_dim: 16
85
+ frame_seqlen: 128
86
+ hidden_size: 1024
87
+ input_embedding_dim: 1536
88
+ max_action_dim: 32
89
+ max_num_embodiments: 32
90
+ max_state_dim: 64
91
+ noise_beta_alpha: 1.5
92
+ noise_beta_beta: 1.0
93
+ noise_s: 0.999
94
+ num_action_per_block: 10
95
+ num_frame_per_block: 2
96
+ num_frames: 9
97
+ num_inference_steps: 16
98
+ num_state_per_block: 1
99
+ type: DreamZeroHead
100
+ use_gradient_checkpointing: true
101
+ vlm_backbone:
102
+ image_encoder_path: null
103
+ text_encoder_path: null
104
+ tiled: false
105
+ type: WanBackbone
106
+ vae_path: null
107
+ per_device_num_workers: 4
108
+ runner:
109
+ change_key_name: false
110
+ collator:
111
+ keys:
112
+ - states
113
+ - images
114
+ - img_masks
115
+ - actions
116
+ - action_masks
117
+ - embodiment_ids
118
+ - frame_masks
119
+ - lang_tokens
120
+ - lang_masks
121
+ meta_keys:
122
+ - task_description
123
+ - prompt
124
+ - info
125
+ - stats
126
+ - timestamp
127
+ type: DictCollator
128
+ enable_gradient_checkpointing: true
129
+ enable_mixed_precision_training: true
130
+ learning_rate: 1.0e-05
131
+ lr_scheduler_type: linear-warmup+cosine-decay
132
+ max_epochs: 12
133
+ max_grad_norm: 1.0
134
+ metric:
135
+ active_trackers:
136
+ - jsonl
137
+ - wandb
138
+ grad_accumulation_steps: 1
139
+ run_dir: work_dirs
140
+ type: VLAMetric
141
+ window_size: 1
142
+ mixed_precision_dtype: bf16
143
+ sampler: null
144
+ sharding_strategy: full-shard
145
+ type: FSDPTrainRunner
146
+ warmup_ratio: 0.05
147
+ weight_decay: 1.0e-05
148
+ train_dataloader:
149
+ dataset:
150
+ datasets:
151
+ action_key: action
152
+ action_window_size: 10
153
+ data_root_path: ./datasets/libero_10_no_noops_lerobotv2.1
154
+ frame_window_size: 9
155
+ statistic_name: libero_10_no_noops
156
+ transforms:
157
+ - embodiment_id: 0
158
+ name_mappings:
159
+ actions:
160
+ - actions
161
+ observation.state:
162
+ - states
163
+ parquet_keys:
164
+ - observation.state
165
+ - timestamp
166
+ - actions
167
+ - info
168
+ - stats
169
+ - action_masks
170
+ type: ProcessParquetInputs
171
+ video_keys:
172
+ - observation.images.image
173
+ - observation.images.wrist_image
174
+ - type: ParquetPrompter
175
+ use_conversation: false
176
+ - max_len: 512
177
+ tokenizer:
178
+ model_path: ./checkpoints/Wan2.1-I2V-14B-480P/google/umt5-xxl
179
+ type: PretrainedTokenizer
180
+ type: ProcessPrompts
181
+ - height: 128
182
+ type: ResizeImages
183
+ width: 128
184
+ - type: SimpleNormalizeImages
185
+ - action_dim: 32
186
+ action_key: action
187
+ norm_type: mean_std
188
+ state_dim: 32
189
+ state_key: proprio
190
+ type: NormalizeStatesAndActions
191
+ - frame_window_size: 9
192
+ num_views: 2
193
+ type: PrepareVideo
194
+ type: ParquetDataset
195
+ use_delta: false
196
+ window_start_idx: 0
197
+ name_mappings:
198
+ action:
199
+ - action
200
+ observation.state:
201
+ - proprio
202
+ statistic_keys:
203
+ - observation.state
204
+ - timestamp
205
+ - action
206
+ statistic_name: libero_10_no_noops
207
+ type: DistributedRepeatingDataset
208
+ per_device_batch_size: 4
209
+ per_device_num_workers: 4
dreamzero_libero_10_full_finetune_bs64/dataset_statistics.json ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "libero_10_no_noops": {
3
+ "proprio": {
4
+ "mean": [
5
+ -0.0419064655693921,
6
+ 0.0353943785769225,
7
+ 0.8257066448085474,
8
+ 2.908315654671235,
9
+ -0.5562158603122547,
10
+ -0.1664910329554594,
11
+ 0.02831534785236664,
12
+ -0.028561558922556265
13
+ ],
14
+ "std": [
15
+ 0.037983810285504724,
16
+ 0.05099922690402999,
17
+ 0.09094586143443492,
18
+ 0.12167118781966886,
19
+ 0.43643697181350727,
20
+ 0.12656789603066015,
21
+ 0.004705366661198258,
22
+ 0.004657921514447958
23
+ ],
24
+ "min": [
25
+ -0.4828203022480011,
26
+ -0.3255046010017395,
27
+ 0.445506751537323,
28
+ 1.1321442127227783,
29
+ -3.641430377960205,
30
+ -1.842738389968872,
31
+ -0.0010040868073701859,
32
+ -0.04111652821302414
33
+ ],
34
+ "max": [
35
+ 0.21031762659549713,
36
+ 0.39128610491752625,
37
+ 1.3332009315490723,
38
+ 3.6714255809783936,
39
+ 3.560650587081909,
40
+ 1.386339545249939,
41
+ 0.04160946607589722,
42
+ 0.0013633022317662835
43
+ ],
44
+ "q01": null,
45
+ "q99": null
46
+ },
47
+ "timestamp": {
48
+ "mean": [
49
+ 6.968810671239492
50
+ ],
51
+ "std": [
52
+ 4.4205853432820845
53
+ ],
54
+ "min": [
55
+ 0.0
56
+ ],
57
+ "max": [
58
+ 25.2
59
+ ],
60
+ "q01": null,
61
+ "q99": null
62
+ },
63
+ "action": {
64
+ "mean": [
65
+ 0.018203219580245917,
66
+ 0.05858386677049721,
67
+ -0.05592356325431262,
68
+ 0.004626933903665416,
69
+ 0.0028960781014207345,
70
+ -0.0076731359981381505,
71
+ 0.5457824565452817
72
+ ],
73
+ "std": [
74
+ 0.10678436772960577,
75
+ 0.13569355116695744,
76
+ 0.1388675428804427,
77
+ 0.014251597889066525,
78
+ 0.020520837090261576,
79
+ 0.03297657922665584,
80
+ 0.1881883528070125
81
+ ],
82
+ "min": [
83
+ -0.9375,
84
+ -0.9375,
85
+ -0.9375,
86
+ -0.23642857372760773,
87
+ -0.3053571283817291,
88
+ -0.3675000071525574,
89
+ 0.0
90
+ ],
91
+ "max": [
92
+ 0.9375,
93
+ 0.9375,
94
+ 0.9375,
95
+ 0.30000001192092896,
96
+ 0.29357144236564636,
97
+ 0.375,
98
+ 1.0
99
+ ],
100
+ "q01": null,
101
+ "q99": null
102
+ }
103
+ }
104
+ }
dreamzero_libero_10_full_finetune_bs64/dreamzero_libero_10_full_finetune_2026_04_19_03_59_36.jsonl ADDED
The diff for this file is too large to render. See raw diff