| seed: 7 |
| resume_ckpt: /vla_fulltime/jianning.cui/code/GalaxeaFM/runs/merge_pipeline/real/r1lite_g0_pp_bbox_400_tasks/2025-12-22_05-53-31/checkpoints/step_124838.pt |
| output_dir: ${hydra:runtime.output_dir} |
| checkpointing_steps: 17834 |
| logger: |
| type: wandb |
| log_steps: 10 |
| task: ${hydra:runtime.choices.task} |
| project: ${split:${logger.task},0} |
| experiment_name: ${split:${logger.task},-1} |
| mode: online |
| workspace: cuijianning1996-galaxea-ai |
| dir: ${output_dir}/wandb |
| batch_size_val: 16 |
| eval_episodes_num: 1 |
| ckpt_path: /data/trt_ckpts/model_state_dict.pt |
| env: R1ProBlocksStackEasy |
| target_controller_type: bimanual_relaxed_ik |
| tags: null |
| edp: |
| card: null |
| training_time: ${now:%Y-%m-%d}_${now:%H-%M-%S} |
| git_branch: null |
| git_commit: null |
| root: null |
| repo_ids: null |
| save_dir: ${output_dir} |
| tags: ${tags} |
| max_steps: ${model.max_steps} |
| batch_size: ${model.batch_size} |
| data: |
| _target_: galaxea_fm.data.galaxea_lerobot_dataset.GalaxeaLerobotDataset |
| dataset_dirs: null |
| shape_meta: |
| action: |
| - key: left_arm |
| raw_shape: 6 |
| shape: 6 |
| - key: left_gripper |
| raw_shape: 1 |
| shape: 1 |
| - key: right_arm |
| raw_shape: 6 |
| shape: 6 |
| - key: right_gripper |
| raw_shape: 1 |
| shape: 1 |
| state: |
| - key: left_arm |
| raw_shape: 6 |
| shape: 6 |
| - key: left_gripper |
| raw_shape: 1 |
| shape: 1 |
| - key: right_arm |
| raw_shape: 6 |
| shape: 6 |
| - key: right_gripper |
| raw_shape: 1 |
| shape: 1 |
| images: |
| - key: head_condition |
| raw_shape: |
| - 3 |
| - 224 |
| - 224 |
| shape: |
| - 3 |
| - 224 |
| - 224 |
| - key: head_rgb |
| raw_shape: |
| - 3 |
| - 720 |
| - 1280 |
| shape: |
| - 3 |
| - 224 |
| - 224 |
| - key: left_wrist_rgb |
| raw_shape: |
| - 3 |
| - 720 |
| - 1280 |
| shape: |
| - 3 |
| - 224 |
| - 224 |
| - key: right_wrist_rgb |
| raw_shape: |
| - 3 |
| - 720 |
| - 1280 |
| shape: |
| - 3 |
| - 224 |
| - 224 |
| action_size: 32 |
| past_action_size: 0 |
| obs_size: 1 |
| ee_start_moving_thresh: 0.0 |
| val_set_proportion: 0.05 |
| use_bbox_condition: true |
| dataset_root: /galaxea_dataset/galaxea/pp_project/lerobot_with_bbox |
| dataset_prefixes: |
| - BENCH |
| - Bench |
| model: |
| pretrained_ckpt: /galaxea_dataset/mnt/tmp/pp_wt_img_cond/checkpoints/org2fm_v2.pt |
| use_pretrained_norm_stats: true |
| model_weights_to_bf16: false |
| enable_bf16_training: true |
| use_torch_compile: false |
| find_unused_parameters: false |
| batch_size: 2 |
| num_workers: 4 |
| pin_memory: true |
| persistent_workers: true |
| max_epochs: 4 |
| max_steps: null |
| grad_accumulation_steps: 2 |
| use_8bit_optimizer: false |
| learning_rate: 2.5e-05 |
| weight_decay: 1.0e-06 |
| betas: |
| - 0.9 |
| - 0.999 |
| lr_scheduler_type: cosine |
| warmup_steps: 500 |
| max_grad_norm: 1.0 |
| use_ema: false |
| ema: |
| update_after_step: 0 |
| power: 0.67 |
| use_sync_bn: false |
| processor: |
| _target_: galaxea_fm.processors.galaxea_zero_processor.GalaxeaZeroProcessor |
| shape_meta: ${data.shape_meta} |
| num_obs_steps: ${data.obs_size} |
| action_state_transforms: |
| - _target_: galaxea_fm.transforms.relative_action.RelativeJointTransform |
| keys: |
| - left_arm |
| - right_arm |
| use_stepwise_action_norm: true |
| norm_default_mode: z-score |
| norm_exception_mode: |
| action: |
| left_gripper: 0/100 |
| right_gripper: 0/100 |
| action_state_merger: |
| _target_: galaxea_fm.transforms.action_state_merger.ConcatLeftAlign |
| train_transforms: |
| head_condition: |
| - _target_: torchvision.transforms.Resize |
| size: |
| - 224 |
| - 224 |
| - _target_: galaxea_fm.transforms.image.ToTensor |
| - _target_: torchvision.transforms.Normalize |
| mean: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| std: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| head_rgb: |
| - _target_: torchvision.transforms.Resize |
| size: |
| - 224 |
| - 224 |
| - _target_: galaxea_fm.transforms.image.ToTensor |
| - _target_: torchvision.transforms.Normalize |
| mean: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| std: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| left_wrist_rgb: |
| - _target_: torchvision.transforms.Resize |
| size: |
| - 224 |
| - 224 |
| - _target_: galaxea_fm.transforms.image.ToTensor |
| - _target_: torchvision.transforms.Normalize |
| mean: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| std: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| right_wrist_rgb: |
| - _target_: torchvision.transforms.Resize |
| size: |
| - 224 |
| - 224 |
| - _target_: galaxea_fm.transforms.image.ToTensor |
| - _target_: torchvision.transforms.Normalize |
| mean: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| std: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| val_transforms: |
| head_condition: |
| - _target_: torchvision.transforms.Resize |
| size: |
| - 224 |
| - 224 |
| - _target_: galaxea_fm.transforms.image.ToTensor |
| - _target_: torchvision.transforms.Normalize |
| mean: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| std: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| head_rgb: |
| - _target_: torchvision.transforms.Resize |
| size: |
| - 224 |
| - 224 |
| - _target_: galaxea_fm.transforms.image.ToTensor |
| - _target_: torchvision.transforms.Normalize |
| mean: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| std: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| left_wrist_rgb: |
| - _target_: torchvision.transforms.Resize |
| size: |
| - 224 |
| - 224 |
| - _target_: galaxea_fm.transforms.image.ToTensor |
| - _target_: torchvision.transforms.Normalize |
| mean: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| std: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| right_wrist_rgb: |
| - _target_: torchvision.transforms.Resize |
| size: |
| - 224 |
| - 224 |
| - _target_: galaxea_fm.transforms.image.ToTensor |
| - _target_: torchvision.transforms.Normalize |
| mean: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| std: |
| - 0.5 |
| - 0.5 |
| - 0.5 |
| num_output_cameras: 4 |
| use_zh_instruction: false |
| drop_high_level_prob: 1.0 |
| pad_token_id: ${model.model_arch.pad_token_id} |
| image_token_index: ${model.model_arch.image_token_index} |
| tokenizer_params: |
| pretrained_model_name_or_path: /data/google/paligemma-3b-pt-224 |
| local_files_only: false |
| token: null |
| max_text_tokens: ${model.model_arch.max_text_tokens} |
| max_image_text_tokens: ${model.model_arch.max_image_text_tokens} |
| num_input_cameras: ${model.model_arch.num_input_images} |
| num_image_tokens_per_camera: ${model.model_arch.vision.num_image_tokens} |
| model_arch: |
| _target_: galaxea_fm.models.galaxea_zero.galaxea_zero_policy.GalaxeaZeroPolicy |
| model_name: galaxea_fm.models.galaxea_zero.galaxea_zero_policy.GalaxeaZero |
| pretrained_model_path: /data/google/paligemma-3b-pt-224 |
| vla_training_strategy: vla-full-train |
| backbone_lr_multiplier: 1.0 |
| image_token_index: 257152 |
| pad_token_id: 0 |
| vocab_size: 257216 |
| cond_steps: ${data.obs_size} |
| horizon_steps: ${data.action_size} |
| max_text_tokens: 55 |
| max_image_text_tokens: ${eval:'${model.model_arch.num_input_images} * ${model.model_arch.vision.num_image_tokens} |
| + ${model.model_arch.max_text_tokens}'} |
| num_input_images: ${eval:'${model.model_arch.cond_steps} * ${model.processor.num_output_cameras}'} |
| num_extra_image_tokens_per_camera: 0 |
| final_action_clip_value: null |
| action_dim: 14 |
| proprio_dim: 14 |
| action_decoder_layers: 2 |
| action_expert_adaptive_mode: null |
| flow_sampling: beta |
| num_inference_steps: 10 |
| vision: |
| name: galaxea_fm.models.galaxea_zero.paligemma.siglip.SiglipVisionModel |
| hidden_size: 1152 |
| intermediate_size: 4304 |
| num_hidden_layers: 27 |
| num_attention_heads: 16 |
| num_channels: 3 |
| image_size: 224 |
| patch_size: 14 |
| layer_norm_eps: 1.0e-06 |
| attention_dropout: 0.0 |
| num_image_tokens: 256 |
| vision_projector: |
| name: galaxea_fm.models.galaxea_zero.paligemma.siglip.PaliGemmaMultiModalProjector |
| vision_config: |
| hidden_size: 1152 |
| projection_dim: 2048 |
| joint: |
| name: galaxea_fm.models.galaxea_zero.joint_model.JointModel |
| action_expert_adaptive_mode: null |
| mixture: |
| vlm: |
| hidden_size: 2048 |
| intermediate_size: 16384 |
| use_final_norm: false |
| cache: true |
| proprio: |
| hidden_size: 1024 |
| intermediate_size: 4096 |
| use_final_norm: true |
| cache: true |
| adaptive_mode: null |
| action: |
| hidden_size: 1024 |
| intermediate_size: 4096 |
| use_final_norm: true |
| cache: false |
| adaptive_mode: null |
| time_hidden_size: 256 |
| num_hidden_layers: 18 |
| num_attention_heads: 8 |
| num_key_value_heads: 1 |
| head_dim: 256 |
| max_position_embeddings: 8192 |
| rms_norm_eps: 1.0e-06 |
| rope_theta: 10000.0 |
| attention_bias: false |
| attention_dropout: 0.0 |
|
|