| seed: 7 |
| resume_ckpt: null |
| output_dir: ${hydra:runtime.output_dir} |
| dataset_stats_cache_dir: ${oc.env:GALAXEA_FM_DATASET_STATS_CACHE_DIR} |
| checkpointing_steps: 5000 |
| logger: |
| type: swanlab |
| log_steps: 10 |
| task: ${hydra:runtime.choices.task} |
| project: ${split:${logger.task},0} |
| experiment_name: ${split:${logger.task},-1} |
| mode: cloud |
| workspace: Galaxea-AI |
| dir: null |
| batch_size_val: 16 |
| eval_episodes_num: 1 |
| ckpt_path: null |
| env: R1ProBlocksStackEasy |
| target_controller_type: bimanual_relaxed_ik |
| tags: null |
| edp: |
| card: null |
| training_time: ${now:%Y-%m-%d}_${now:%H-%M-%S} |
| git_branch: null |
| git_commit: null |
| root: null |
| repo_ids: null |
| save_dir: ${output_dir} |
| tags: ${tags} |
| max_steps: ${model.max_steps} |
| batch_size: ${model.batch_size} |
| libero_eval: |
| task_suite_names: |
| - libero_10 |
| - libero_spatial |
| - libero_object |
| - libero_goal |
| num_steps_wait: 10 |
| replan_steps: 5 |
| num_trials: 50 |
| output_dir: ${output_dir} |
| run_id_note: null |
| env_num: 50 |
| data: |
| dataset: |
| _target_: galaxea_fm.data.galaxea_lerobot_dataset.GalaxeaLerobotDataset |
| dataset_dirs: |
| - /To/Your/Path |
| shape_meta: |
| action: |
| - key: left_arm |
| raw_shape: 6 |
| shape: 6 |
| - key: left_gripper |
| raw_shape: 1 |
| shape: 1 |
| - key: right_arm |
| raw_shape: 6 |
| shape: 6 |
| - key: right_gripper |
| raw_shape: 1 |
| shape: 1 |
| - key: torso.velocities |
| raw_shape: 6 |
| shape: 6 |
| - key: chassis.velocities |
| raw_shape: 6 |
| shape: 6 |
| state: |
| - key: left_arm |
| raw_shape: 6 |
| shape: 6 |
| - key: left_gripper |
| raw_shape: 1 |
| shape: 1 |
| - key: right_arm |
| raw_shape: 6 |
| shape: 6 |
| - key: right_gripper |
| raw_shape: 1 |
| shape: 1 |
| - key: torso |
| raw_shape: 4 |
| shape: 4 |
| - key: chassis |
| raw_shape: 3 |
| shape: 3 |
| images: |
| - key: head_rgb |
| raw_shape: |
| - 3 |
| - 720 |
| - 1280 |
| shape: |
| - 3 |
| - ${model.model_meta.input_image_size.0} |
| - ${model.model_meta.input_image_size.1} |
| - key: left_wrist_rgb |
| raw_shape: |
| - 3 |
| - 720 |
| - 1280 |
| shape: |
| - 3 |
| - ${model.model_meta.input_image_size.0} |
| - ${model.model_meta.input_image_size.1} |
| - key: right_wrist_rgb |
| raw_shape: |
| - 3 |
| - 720 |
| - 1280 |
| shape: |
| - 3 |
| - ${model.model_meta.input_image_size.0} |
| - ${model.model_meta.input_image_size.1} |
| action_size: 32 |
| past_action_size: 0 |
| obs_size: 1 |
| ee_start_moving_thresh: 0.0 |
| val_set_proportion: 0.05 |
| processor: |
| _target_: galaxea_fm.processors.base_processor.BaseProcessor |
| shape_meta: ${data.dataset.shape_meta} |
| num_obs_steps: ${data.dataset.obs_size} |
| num_output_cameras: 3 |
| action_output_dim: ${sum_shapes:${data.dataset.shape_meta.action}} |
| proprio_output_dim: ${sum_shapes:${data.dataset.shape_meta.state}} |
| action_state_transforms: |
| - _target_: galaxea_fm.transforms.relative_action.RelativeJointTransform |
| keys: |
| - left_arm |
| - right_arm |
| - _target_: galaxea_fm.transforms.misc.WrapStateAngle |
| keys: |
| - chassis |
| use_stepwise_action_norm: true |
| norm_default_mode: ${model.model_meta.norm_default_mode} |
| norm_exception_mode: |
| action: |
| left_gripper: 0/100 |
| right_gripper: 0/100 |
| action_state_merger: |
| _target_: galaxea_fm.transforms.action_state_merger.ConcatLeftAlign |
| train_transforms: |
| head_rgb: |
| - _target_: torchvision.transforms.Resize |
| size: ${model.model_meta.input_image_size} |
| - _target_: galaxea_fm.transforms.image.ToTensor |
| - _target_: torchvision.transforms.Normalize |
| mean: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| std: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| left_wrist_rgb: ${data.processor.train_transforms.head_rgb} |
| right_wrist_rgb: ${data.processor.train_transforms.head_rgb} |
| val_transforms: |
| head_rgb: |
| - _target_: torchvision.transforms.Resize |
| size: ${model.model_meta.input_image_size} |
| - _target_: galaxea_fm.transforms.image.ToTensor |
| - _target_: torchvision.transforms.Normalize |
| mean: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| std: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| left_wrist_rgb: ${data.processor.val_transforms.head_rgb} |
| right_wrist_rgb: ${data.processor.val_transforms.head_rgb} |
| drop_high_level_prob: 1.0 |
| use_zh_instruction: false |
| tokenizer: ${model.tokenizer} |
| model: |
| pretrained_ckpt: |
| use_pretrained_norm_stats: true |
| model_weights_to_bf16: false |
| enable_bf16_training: true |
| use_torch_compile: false |
| find_unused_parameters: false |
| batch_size: 8 |
| num_workers: 4 |
| pin_memory: true |
| persistent_workers: true |
| max_epochs: 10 |
| max_steps: null |
| grad_accumulation_steps: 1 |
| use_8bit_optimizer: false |
| learning_rate: 0.00012 |
| weight_decay: 0.0001 |
| betas: |
| - 0.9 |
| - 0.95 |
| lr_scheduler_type: cosine |
| warmup_steps: 5000 |
| max_grad_norm: 1.0 |
| use_ema: false |
| ema: |
| update_after_step: 0 |
| power: 0.67 |
| use_sync_bn: false |
| tokenizer: |
| _target_: galaxea_fm.models.galaxea_zero.paligemma.tokenizer.PaliGemmaTokenizer |
| tokenizer_params: |
| pretrained_model_name_or_path: /efm-nas/efm-nas/efm-shared/pretrained_model/google/paligemma-3b-pt-224 |
| local_files_only: true |
| pad_token_id: ${model.model_arch.pad_token_id} |
| image_token_index: ${model.model_arch.image_token_index} |
| max_text_tokens: ${model.model_arch.max_text_tokens} |
| num_tokens_per_image: ${model.model_arch.vision.num_image_tokens} |
| num_input_images: ${model.model_arch.num_input_images} |
| model_arch: |
| _target_: galaxea_fm.models.galaxea_zero.galaxea_zero_policy.GalaxeaZeroPolicy |
| model_name: galaxea_fm.models.galaxea_zero.galaxea_zero_policy.GalaxeaZero |
| pretrained_model_path: /efm-nas/efm-nas/efm-shared/pretrained_model/google/paligemma-3b-pt-224 |
| vla_training_strategy: vla-full-train |
| backbone_lr_multiplier: 1.0 |
| image_token_index: 257152 |
| pad_token_id: 0 |
| vocab_size: 257216 |
| fill_padded_with_token: false |
| embed_token_key_prefix: language_model.model.embed_tokens |
| cond_steps: ${data.dataset.obs_size} |
| horizon_steps: ${data.dataset.action_size} |
| max_text_tokens: 55 |
| num_input_images: ${eval:'${model.model_arch.cond_steps} * ${data.processor.num_output_cameras}'} |
| max_image_text_tokens: ${eval:'${model.model_arch.num_input_images} * ${model.model_arch.vision.num_image_tokens} |
| + ${model.model_arch.max_text_tokens}'} |
| final_action_clip_value: null |
| action_dim: ${data.processor.action_output_dim} |
| proprio_dim: ${data.processor.proprio_output_dim} |
| action_decoder_layers: 2 |
| action_expert_adaptive_mode: null |
| flow_sampling: beta |
| num_inference_steps: 10 |
| vision: |
| name: galaxea_fm.models.galaxea_zero.paligemma.siglip.SiglipVisionModel |
| key_prefix: vision_tower |
| hidden_size: 1152 |
| intermediate_size: 4304 |
| num_hidden_layers: 27 |
| num_attention_heads: 16 |
| num_channels: 3 |
| image_size: 224 |
| patch_size: 14 |
| layer_norm_eps: 1.0e-06 |
| attention_dropout: 0.0 |
| num_image_tokens: 256 |
| vision_projector: |
| name: galaxea_fm.models.galaxea_zero.paligemma.siglip.PaliGemmaMultiModalProjector |
| key_prefix: multi_modal_projector |
| vision_config: |
| hidden_size: 1152 |
| projection_dim: 2048 |
| joint: |
| name: galaxea_fm.models.galaxea_zero.joint_model.JointModel |
| key_prefix: language_model.model |
| action_expert_adaptive_mode: null |
| module_names: |
| mlp: galaxea_fm.models.galaxea_zero.paligemma.modules.GemmaMLP |
| norm: galaxea_fm.models.galaxea_zero.paligemma.modules.GemmaRMSNorm |
| rope: galaxea_fm.models.galaxea_zero.paligemma.modules.GemmaRotaryEmbedding |
| mixture: |
| vlm: |
| hidden_size: 2048 |
| intermediate_size: 16384 |
| use_final_norm: false |
| cache: true |
| proprio: |
| hidden_size: 1024 |
| intermediate_size: 4096 |
| use_final_norm: true |
| cache: true |
| adaptive_mode: null |
| action: |
| hidden_size: 1024 |
| intermediate_size: 4096 |
| use_final_norm: true |
| cache: false |
| adaptive_mode: null |
| time_hidden_size: 256 |
| num_hidden_layers: 18 |
| num_attention_heads: 8 |
| num_key_value_heads: 1 |
| head_dim: 256 |
| max_position_embeddings: 8192 |
| rms_norm_eps: 1.0e-06 |
| rope_theta: 10000.0 |
| attention_bias: false |
| attention_dropout: 0.0 |
| model_meta: |
| norm_default_mode: z-score |
| input_image_size: |
| - ${model.model_arch.vision.image_size} |
| - ${model.model_arch.vision.image_size} |
| pretrained_dataset_stats: |
|
|