update g0plus fold

Browse files

Files changed (4) hide show

G0Plus_3B_Fold/config.yaml +301 -0
G0Plus_3B_Fold/dataset_stats.json +0 -0
G0Plus_3B_Fold/efmnode.toml +42 -0
G0Plus_3B_Fold/model_state_dict.pt +3 -0

G0Plus_3B_Fold/config.yaml ADDED Viewed

	@@ -0,0 +1,301 @@

+seed: 7
+resume_ckpt: null
+output_dir: ${hydra:runtime.output_dir}
+dataset_stats_cache_dir: ${oc.env:GALAXEA_FM_DATASET_STATS_CACHE_DIR}
+checkpointing_steps: 5000
+logger:
+  type: swanlab
+  log_steps: 10
+  task: ${hydra:runtime.choices.task}
+  project: ${split:${logger.task},0}
+  experiment_name: ${split:${logger.task},-1}
+  mode: cloud
+  workspace: Galaxea-AI
+  dir: null
+batch_size_val: 16
+eval_episodes_num: 1
+ckpt_path: null
+env: R1ProBlocksStackEasy
+target_controller_type: bimanual_relaxed_ik
+tags: null
+edp:
+  card: null
+  training_time: ${now:%Y-%m-%d}_${now:%H-%M-%S}
+  git_branch: null
+  git_commit: null
+  root: null
+  repo_ids: null
+  save_dir: ${output_dir}
+  tags: ${tags}
+  max_steps: ${model.max_steps}
+  batch_size: ${model.batch_size}
+libero_eval:
+  task_suite_names:
+  - libero_10
+  - libero_spatial
+  - libero_object
+  - libero_goal
+  num_steps_wait: 10
+  replan_steps: 5
+  num_trials: 50
+  output_dir: ${output_dir}
+  run_id_note: null
+  env_num: 50
+data:
+  dataset:
+    _target_: galaxea_fm.data.galaxea_lerobot_dataset.GalaxeaLerobotDataset
+    dataset_dirs:
+    - /efm-nas/efm-nas/efm-shared/data/fold/fold_towel_fixcam_demo/Bench_Fold_Towels_20250922_003_v20260127_121939
+    shape_meta:
+      action:
+      - key: left_arm
+        raw_shape: 6
+        shape: 6
+      - key: left_gripper
+        raw_shape: 1
+        shape: 1
+      - key: right_arm
+        raw_shape: 6
+        shape: 6
+      - key: right_gripper
+        raw_shape: 1
+        shape: 1
+      - key: torso.velocities
+        raw_shape: 6
+        shape: 6
+      - key: chassis.velocities
+        raw_shape: 6
+        shape: 6
+      state:
+      - key: left_arm
+        raw_shape: 6
+        shape: 6
+      - key: left_gripper
+        raw_shape: 1
+        shape: 1
+      - key: right_arm
+        raw_shape: 6
+        shape: 6
+      - key: right_gripper
+        raw_shape: 1
+        shape: 1
+      - key: torso
+        raw_shape: 4
+        shape: 4
+      - key: chassis
+        raw_shape: 3
+        shape: 3
+      images:
+      - key: head_rgb
+        raw_shape:
+        - 3
+        - 720
+        - 1280
+        shape:
+        - 3
+        - ${model.model_meta.input_image_size.0}
+        - ${model.model_meta.input_image_size.1}
+      - key: left_wrist_rgb
+        raw_shape:
+        - 3
+        - 720
+        - 1280
+        shape:
+        - 3
+        - ${model.model_meta.input_image_size.0}
+        - ${model.model_meta.input_image_size.1}
+      - key: right_wrist_rgb
+        raw_shape:
+        - 3
+        - 720
+        - 1280
+        shape:
+        - 3
+        - ${model.model_meta.input_image_size.0}
+        - ${model.model_meta.input_image_size.1}
+    action_size: 32
+    past_action_size: 0
+    obs_size: 1
+    ee_start_moving_thresh: 0.0
+    val_set_proportion: 0.05
+  processor:
+    _target_: galaxea_fm.processors.base_processor.BaseProcessor
+    shape_meta: ${data.dataset.shape_meta}
+    num_obs_steps: ${data.dataset.obs_size}
+    num_output_cameras: 3
+    action_output_dim: ${sum_shapes:${data.dataset.shape_meta.action}}
+    proprio_output_dim: ${sum_shapes:${data.dataset.shape_meta.state}}
+    action_state_transforms:
+    - _target_: galaxea_fm.transforms.relative_action.RelativeJointTransform
+      keys:
+      - left_arm
+      - right_arm
+    - _target_: galaxea_fm.transforms.misc.WrapStateAngle
+      keys:
+      - chassis
+    use_stepwise_action_norm: true
+    norm_default_mode: ${model.model_meta.norm_default_mode}
+    norm_exception_mode:
+      action:
+        left_gripper: 0/100
+        right_gripper: 0/100
+    action_state_merger:
+      _target_: galaxea_fm.transforms.action_state_merger.ConcatLeftAlign
+    train_transforms:
+      head_rgb:
+      - _target_: torchvision.transforms.Resize
+        size: ${model.model_meta.input_image_size}
+      - _target_: galaxea_fm.transforms.image.ToTensor
+      - _target_: torchvision.transforms.Normalize
+        mean:
+        - 0.5
+        - 0.5
+        - 0.5
+        std:
+        - 0.5
+        - 0.5
+        - 0.5
+      left_wrist_rgb: ${data.processor.train_transforms.head_rgb}
+      right_wrist_rgb: ${data.processor.train_transforms.head_rgb}
+    val_transforms:
+      head_rgb:
+      - _target_: torchvision.transforms.Resize
+        size: ${model.model_meta.input_image_size}
+      - _target_: galaxea_fm.transforms.image.ToTensor
+      - _target_: torchvision.transforms.Normalize
+        mean:
+        - 0.5
+        - 0.5
+        - 0.5
+        std:
+        - 0.5
+        - 0.5
+        - 0.5
+      left_wrist_rgb: ${data.processor.val_transforms.head_rgb}
+      right_wrist_rgb: ${data.processor.val_transforms.head_rgb}
+    drop_high_level_prob: 1.0
+    use_zh_instruction: false
+    tokenizer: ${model.tokenizer}
+model:
+  pretrained_ckpt:
+  use_pretrained_norm_stats: true
+  model_weights_to_bf16: false
+  enable_bf16_training: true
+  use_torch_compile: false
+  find_unused_parameters: false
+  batch_size: 8
+  num_workers: 4
+  pin_memory: true
+  persistent_workers: true
+  max_epochs: 10
+  max_steps: null
+  grad_accumulation_steps: 1
+  use_8bit_optimizer: false
+  learning_rate: 0.00012
+  weight_decay: 0.0001
+  betas:
+  - 0.9
+  - 0.95
+  lr_scheduler_type: cosine
+  warmup_steps: 5000
+  max_grad_norm: 1.0
+  use_ema: false
+  ema:
+    update_after_step: 0
+    power: 0.67
+  use_sync_bn: false
+  tokenizer:
+    _target_: galaxea_fm.models.galaxea_zero.paligemma.tokenizer.PaliGemmaTokenizer
+    tokenizer_params:
+      pretrained_model_name_or_path: /efm-nas/efm-nas/efm-shared/pretrained_model/google/paligemma-3b-pt-224
+      local_files_only: true
+    pad_token_id: ${model.model_arch.pad_token_id}
+    image_token_index: ${model.model_arch.image_token_index}
+    max_text_tokens: ${model.model_arch.max_text_tokens}
+    num_tokens_per_image: ${model.model_arch.vision.num_image_tokens}
+    num_input_images: ${model.model_arch.num_input_images}
+  model_arch:
+    _target_: galaxea_fm.models.galaxea_zero.galaxea_zero_policy.GalaxeaZeroPolicy
+    model_name: galaxea_fm.models.galaxea_zero.galaxea_zero_policy.GalaxeaZero
+    pretrained_model_path: /efm-nas/efm-nas/efm-shared/pretrained_model/google/paligemma-3b-pt-224
+    vla_training_strategy: vla-full-train
+    backbone_lr_multiplier: 1.0
+    image_token_index: 257152
+    pad_token_id: 0
+    vocab_size: 257216
+    fill_padded_with_token: false
+    embed_token_key_prefix: language_model.model.embed_tokens
+    cond_steps: ${data.dataset.obs_size}
+    horizon_steps: ${data.dataset.action_size}
+    max_text_tokens: 55
+    num_input_images: ${eval:'${model.model_arch.cond_steps} * ${data.processor.num_output_cameras}'}
+    max_image_text_tokens: ${eval:'${model.model_arch.num_input_images} * ${model.model_arch.vision.num_image_tokens}
+      + ${model.model_arch.max_text_tokens}'}
+    final_action_clip_value: null
+    action_dim: ${data.processor.action_output_dim}
+    proprio_dim: ${data.processor.proprio_output_dim}
+    action_decoder_layers: 2
+    action_expert_adaptive_mode: null
+    flow_sampling: beta
+    num_inference_steps: 10
+    vision:
+      name: galaxea_fm.models.galaxea_zero.paligemma.siglip.SiglipVisionModel
+      key_prefix: vision_tower
+      hidden_size: 1152
+      intermediate_size: 4304
+      num_hidden_layers: 27
+      num_attention_heads: 16
+      num_channels: 3
+      image_size: 224
+      patch_size: 14
+      layer_norm_eps: 1.0e-06
+      attention_dropout: 0.0
+      num_image_tokens: 256
+    vision_projector:
+      name: galaxea_fm.models.galaxea_zero.paligemma.siglip.PaliGemmaMultiModalProjector
+      key_prefix: multi_modal_projector
+      vision_config:
+        hidden_size: 1152
+        projection_dim: 2048
+    joint:
+      name: galaxea_fm.models.galaxea_zero.joint_model.JointModel
+      key_prefix: language_model.model
+      action_expert_adaptive_mode: null
+      module_names:
+        mlp: galaxea_fm.models.galaxea_zero.paligemma.modules.GemmaMLP
+        norm: galaxea_fm.models.galaxea_zero.paligemma.modules.GemmaRMSNorm
+        rope: galaxea_fm.models.galaxea_zero.paligemma.modules.GemmaRotaryEmbedding
+      mixture:
+        vlm:
+          hidden_size: 2048
+          intermediate_size: 16384
+          use_final_norm: false
+          cache: true
+        proprio:
+          hidden_size: 1024
+          intermediate_size: 4096
+          use_final_norm: true
+          cache: true
+          adaptive_mode: null
+        action:
+          hidden_size: 1024
+          intermediate_size: 4096
+          use_final_norm: true
+          cache: false
+          adaptive_mode: null
+      time_hidden_size: 256
+      num_hidden_layers: 18
+      num_attention_heads: 8
+      num_key_value_heads: 1
+      head_dim: 256
+      max_position_embeddings: 8192
+      rms_norm_eps: 1.0e-06
+      rope_theta: 10000.0
+      attention_bias: false
+      attention_dropout: 0.0
+  model_meta:
+    norm_default_mode: z-score
+    input_image_size:
+    - ${model.model_arch.vision.image_size}
+    - ${model.model_arch.vision.image_size}
+  pretrained_dataset_stats:

G0Plus_3B_Fold/dataset_stats.json ADDED Viewed

The diff for this file is too large to render. See raw diff

G0Plus_3B_Fold/efmnode.toml ADDED Viewed

	@@ -0,0 +1,42 @@

+[robot]
+hardware = "R1_LITE"
+enable_publish = [
+    "left_arm",
+    "left_gripper",
+    #"left_ee_pose",
+    "right_arm",
+    "right_gripper",
+    #"right_ee_pose",
+    "torso",
+    #"chassis",
+]
+[basic]
+use_ehi = false
+control_frequency = 17.0
+step_mode = "sync" # Optional["sync", "async"]
+action_steps = 18
+[model]
+ckpt_dir = "/xxxx/fold_towel_g0fast"
+processor = "default"
+use_trt = false
+is_torch_compile = false
+[websocket]
+use_websocket = true
+host = "0.0.0.0"
+port = 8080
+[trajectory]
+ensemble_mode="none"
+execution_mode="JOINT_STATE"
+[instruction]
+use_vlm = false
+bbox_as_instruction = false
+image_condition_lang_prefix = false
+pp_lower_half = false
+image_as_condition = false

G0Plus_3B_Fold/model_state_dict.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fe68a453342af3d39c07fb049ee8dc1beeb7b41063ccbb5bf851e62f045ecd2c
+size 12957213816