lllliuxiao23 commited on
Commit
5d6875b
·
verified ·
1 Parent(s): 5dc9974

update g0plus fold

Browse files
G0Plus_3B_Fold/config.yaml ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 7
2
+ resume_ckpt: null
3
+ output_dir: ${hydra:runtime.output_dir}
4
+ dataset_stats_cache_dir: ${oc.env:GALAXEA_FM_DATASET_STATS_CACHE_DIR}
5
+ checkpointing_steps: 5000
6
+ logger:
7
+ type: swanlab
8
+ log_steps: 10
9
+ task: ${hydra:runtime.choices.task}
10
+ project: ${split:${logger.task},0}
11
+ experiment_name: ${split:${logger.task},-1}
12
+ mode: cloud
13
+ workspace: Galaxea-AI
14
+ dir: null
15
+ batch_size_val: 16
16
+ eval_episodes_num: 1
17
+ ckpt_path: null
18
+ env: R1ProBlocksStackEasy
19
+ target_controller_type: bimanual_relaxed_ik
20
+ tags: null
21
+ edp:
22
+ card: null
23
+ training_time: ${now:%Y-%m-%d}_${now:%H-%M-%S}
24
+ git_branch: null
25
+ git_commit: null
26
+ root: null
27
+ repo_ids: null
28
+ save_dir: ${output_dir}
29
+ tags: ${tags}
30
+ max_steps: ${model.max_steps}
31
+ batch_size: ${model.batch_size}
32
+ libero_eval:
33
+ task_suite_names:
34
+ - libero_10
35
+ - libero_spatial
36
+ - libero_object
37
+ - libero_goal
38
+ num_steps_wait: 10
39
+ replan_steps: 5
40
+ num_trials: 50
41
+ output_dir: ${output_dir}
42
+ run_id_note: null
43
+ env_num: 50
44
+ data:
45
+ dataset:
46
+ _target_: galaxea_fm.data.galaxea_lerobot_dataset.GalaxeaLerobotDataset
47
+ dataset_dirs:
48
+ - /efm-nas/efm-nas/efm-shared/data/fold/fold_towel_fixcam_demo/Bench_Fold_Towels_20250922_003_v20260127_121939
49
+ shape_meta:
50
+ action:
51
+ - key: left_arm
52
+ raw_shape: 6
53
+ shape: 6
54
+ - key: left_gripper
55
+ raw_shape: 1
56
+ shape: 1
57
+ - key: right_arm
58
+ raw_shape: 6
59
+ shape: 6
60
+ - key: right_gripper
61
+ raw_shape: 1
62
+ shape: 1
63
+ - key: torso.velocities
64
+ raw_shape: 6
65
+ shape: 6
66
+ - key: chassis.velocities
67
+ raw_shape: 6
68
+ shape: 6
69
+ state:
70
+ - key: left_arm
71
+ raw_shape: 6
72
+ shape: 6
73
+ - key: left_gripper
74
+ raw_shape: 1
75
+ shape: 1
76
+ - key: right_arm
77
+ raw_shape: 6
78
+ shape: 6
79
+ - key: right_gripper
80
+ raw_shape: 1
81
+ shape: 1
82
+ - key: torso
83
+ raw_shape: 4
84
+ shape: 4
85
+ - key: chassis
86
+ raw_shape: 3
87
+ shape: 3
88
+ images:
89
+ - key: head_rgb
90
+ raw_shape:
91
+ - 3
92
+ - 720
93
+ - 1280
94
+ shape:
95
+ - 3
96
+ - ${model.model_meta.input_image_size.0}
97
+ - ${model.model_meta.input_image_size.1}
98
+ - key: left_wrist_rgb
99
+ raw_shape:
100
+ - 3
101
+ - 720
102
+ - 1280
103
+ shape:
104
+ - 3
105
+ - ${model.model_meta.input_image_size.0}
106
+ - ${model.model_meta.input_image_size.1}
107
+ - key: right_wrist_rgb
108
+ raw_shape:
109
+ - 3
110
+ - 720
111
+ - 1280
112
+ shape:
113
+ - 3
114
+ - ${model.model_meta.input_image_size.0}
115
+ - ${model.model_meta.input_image_size.1}
116
+ action_size: 32
117
+ past_action_size: 0
118
+ obs_size: 1
119
+ ee_start_moving_thresh: 0.0
120
+ val_set_proportion: 0.05
121
+ processor:
122
+ _target_: galaxea_fm.processors.base_processor.BaseProcessor
123
+ shape_meta: ${data.dataset.shape_meta}
124
+ num_obs_steps: ${data.dataset.obs_size}
125
+ num_output_cameras: 3
126
+ action_output_dim: ${sum_shapes:${data.dataset.shape_meta.action}}
127
+ proprio_output_dim: ${sum_shapes:${data.dataset.shape_meta.state}}
128
+ action_state_transforms:
129
+ - _target_: galaxea_fm.transforms.relative_action.RelativeJointTransform
130
+ keys:
131
+ - left_arm
132
+ - right_arm
133
+ - _target_: galaxea_fm.transforms.misc.WrapStateAngle
134
+ keys:
135
+ - chassis
136
+ use_stepwise_action_norm: true
137
+ norm_default_mode: ${model.model_meta.norm_default_mode}
138
+ norm_exception_mode:
139
+ action:
140
+ left_gripper: 0/100
141
+ right_gripper: 0/100
142
+ action_state_merger:
143
+ _target_: galaxea_fm.transforms.action_state_merger.ConcatLeftAlign
144
+ train_transforms:
145
+ head_rgb:
146
+ - _target_: torchvision.transforms.Resize
147
+ size: ${model.model_meta.input_image_size}
148
+ - _target_: galaxea_fm.transforms.image.ToTensor
149
+ - _target_: torchvision.transforms.Normalize
150
+ mean:
151
+ - 0.5
152
+ - 0.5
153
+ - 0.5
154
+ std:
155
+ - 0.5
156
+ - 0.5
157
+ - 0.5
158
+ left_wrist_rgb: ${data.processor.train_transforms.head_rgb}
159
+ right_wrist_rgb: ${data.processor.train_transforms.head_rgb}
160
+ val_transforms:
161
+ head_rgb:
162
+ - _target_: torchvision.transforms.Resize
163
+ size: ${model.model_meta.input_image_size}
164
+ - _target_: galaxea_fm.transforms.image.ToTensor
165
+ - _target_: torchvision.transforms.Normalize
166
+ mean:
167
+ - 0.5
168
+ - 0.5
169
+ - 0.5
170
+ std:
171
+ - 0.5
172
+ - 0.5
173
+ - 0.5
174
+ left_wrist_rgb: ${data.processor.val_transforms.head_rgb}
175
+ right_wrist_rgb: ${data.processor.val_transforms.head_rgb}
176
+ drop_high_level_prob: 1.0
177
+ use_zh_instruction: false
178
+ tokenizer: ${model.tokenizer}
179
+ model:
180
+ pretrained_ckpt:
181
+ use_pretrained_norm_stats: true
182
+ model_weights_to_bf16: false
183
+ enable_bf16_training: true
184
+ use_torch_compile: false
185
+ find_unused_parameters: false
186
+ batch_size: 8
187
+ num_workers: 4
188
+ pin_memory: true
189
+ persistent_workers: true
190
+ max_epochs: 10
191
+ max_steps: null
192
+ grad_accumulation_steps: 1
193
+ use_8bit_optimizer: false
194
+ learning_rate: 0.00012
195
+ weight_decay: 0.0001
196
+ betas:
197
+ - 0.9
198
+ - 0.95
199
+ lr_scheduler_type: cosine
200
+ warmup_steps: 5000
201
+ max_grad_norm: 1.0
202
+ use_ema: false
203
+ ema:
204
+ update_after_step: 0
205
+ power: 0.67
206
+ use_sync_bn: false
207
+ tokenizer:
208
+ _target_: galaxea_fm.models.galaxea_zero.paligemma.tokenizer.PaliGemmaTokenizer
209
+ tokenizer_params:
210
+ pretrained_model_name_or_path: /efm-nas/efm-nas/efm-shared/pretrained_model/google/paligemma-3b-pt-224
211
+ local_files_only: true
212
+ pad_token_id: ${model.model_arch.pad_token_id}
213
+ image_token_index: ${model.model_arch.image_token_index}
214
+ max_text_tokens: ${model.model_arch.max_text_tokens}
215
+ num_tokens_per_image: ${model.model_arch.vision.num_image_tokens}
216
+ num_input_images: ${model.model_arch.num_input_images}
217
+ model_arch:
218
+ _target_: galaxea_fm.models.galaxea_zero.galaxea_zero_policy.GalaxeaZeroPolicy
219
+ model_name: galaxea_fm.models.galaxea_zero.galaxea_zero_policy.GalaxeaZero
220
+ pretrained_model_path: /efm-nas/efm-nas/efm-shared/pretrained_model/google/paligemma-3b-pt-224
221
+ vla_training_strategy: vla-full-train
222
+ backbone_lr_multiplier: 1.0
223
+ image_token_index: 257152
224
+ pad_token_id: 0
225
+ vocab_size: 257216
226
+ fill_padded_with_token: false
227
+ embed_token_key_prefix: language_model.model.embed_tokens
228
+ cond_steps: ${data.dataset.obs_size}
229
+ horizon_steps: ${data.dataset.action_size}
230
+ max_text_tokens: 55
231
+ num_input_images: ${eval:'${model.model_arch.cond_steps} * ${data.processor.num_output_cameras}'}
232
+ max_image_text_tokens: ${eval:'${model.model_arch.num_input_images} * ${model.model_arch.vision.num_image_tokens}
233
+ + ${model.model_arch.max_text_tokens}'}
234
+ final_action_clip_value: null
235
+ action_dim: ${data.processor.action_output_dim}
236
+ proprio_dim: ${data.processor.proprio_output_dim}
237
+ action_decoder_layers: 2
238
+ action_expert_adaptive_mode: null
239
+ flow_sampling: beta
240
+ num_inference_steps: 10
241
+ vision:
242
+ name: galaxea_fm.models.galaxea_zero.paligemma.siglip.SiglipVisionModel
243
+ key_prefix: vision_tower
244
+ hidden_size: 1152
245
+ intermediate_size: 4304
246
+ num_hidden_layers: 27
247
+ num_attention_heads: 16
248
+ num_channels: 3
249
+ image_size: 224
250
+ patch_size: 14
251
+ layer_norm_eps: 1.0e-06
252
+ attention_dropout: 0.0
253
+ num_image_tokens: 256
254
+ vision_projector:
255
+ name: galaxea_fm.models.galaxea_zero.paligemma.siglip.PaliGemmaMultiModalProjector
256
+ key_prefix: multi_modal_projector
257
+ vision_config:
258
+ hidden_size: 1152
259
+ projection_dim: 2048
260
+ joint:
261
+ name: galaxea_fm.models.galaxea_zero.joint_model.JointModel
262
+ key_prefix: language_model.model
263
+ action_expert_adaptive_mode: null
264
+ module_names:
265
+ mlp: galaxea_fm.models.galaxea_zero.paligemma.modules.GemmaMLP
266
+ norm: galaxea_fm.models.galaxea_zero.paligemma.modules.GemmaRMSNorm
267
+ rope: galaxea_fm.models.galaxea_zero.paligemma.modules.GemmaRotaryEmbedding
268
+ mixture:
269
+ vlm:
270
+ hidden_size: 2048
271
+ intermediate_size: 16384
272
+ use_final_norm: false
273
+ cache: true
274
+ proprio:
275
+ hidden_size: 1024
276
+ intermediate_size: 4096
277
+ use_final_norm: true
278
+ cache: true
279
+ adaptive_mode: null
280
+ action:
281
+ hidden_size: 1024
282
+ intermediate_size: 4096
283
+ use_final_norm: true
284
+ cache: false
285
+ adaptive_mode: null
286
+ time_hidden_size: 256
287
+ num_hidden_layers: 18
288
+ num_attention_heads: 8
289
+ num_key_value_heads: 1
290
+ head_dim: 256
291
+ max_position_embeddings: 8192
292
+ rms_norm_eps: 1.0e-06
293
+ rope_theta: 10000.0
294
+ attention_bias: false
295
+ attention_dropout: 0.0
296
+ model_meta:
297
+ norm_default_mode: z-score
298
+ input_image_size:
299
+ - ${model.model_arch.vision.image_size}
300
+ - ${model.model_arch.vision.image_size}
301
+ pretrained_dataset_stats:
G0Plus_3B_Fold/dataset_stats.json ADDED
The diff for this file is too large to render. See raw diff
 
G0Plus_3B_Fold/efmnode.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [robot]
2
+ hardware = "R1_LITE"
3
+ enable_publish = [
4
+ "left_arm",
5
+ "left_gripper",
6
+ #"left_ee_pose",
7
+ "right_arm",
8
+ "right_gripper",
9
+ #"right_ee_pose",
10
+ "torso",
11
+ #"chassis",
12
+ ]
13
+
14
+
15
+ [basic]
16
+ use_ehi = false
17
+ control_frequency = 17.0
18
+ step_mode = "sync" # Optional["sync", "async"]
19
+ action_steps = 18
20
+
21
+ [model]
22
+ ckpt_dir = "/xxxx/fold_towel_g0fast"
23
+
24
+ processor = "default"
25
+ use_trt = false
26
+ is_torch_compile = false
27
+
28
+ [websocket]
29
+ use_websocket = true
30
+ host = "0.0.0.0"
31
+ port = 8080
32
+
33
+ [trajectory]
34
+ ensemble_mode="none"
35
+ execution_mode="JOINT_STATE"
36
+
37
+ [instruction]
38
+ use_vlm = false
39
+ bbox_as_instruction = false
40
+ image_condition_lang_prefix = false
41
+ pp_lower_half = false
42
+ image_as_condition = false
G0Plus_3B_Fold/model_state_dict.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe68a453342af3d39c07fb049ee8dc1beeb7b41063ccbb5bf851e62f045ecd2c
3
+ size 12957213816