binarykoder's picture
Duplicate from nvidia/GR00T-N1.7-3B
fd0ed6b
!!python/object:groot.vla.omni.configs.base_config.Config
data: !!python/object:groot.vla.omni.configs.data.data_config.DataConfig
datasets:
- !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/xdof.yam_v7_all_merged_global_task_exclude_bad_subtasks
dataset_type: physical_embodiment
embodiment_tag: xdof_relative_eef_relative_joint
mix_ratio: 0.1
- !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/xdof.yam_v7_subtask_only_merged_global_task
dataset_type: physical_embodiment
embodiment_tag: xdof_relative_eef_relative_joint_subtask
mix_ratio: 0.2
- !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/droid_101_success_idlefiltered_n17
- /mnt/aws-lfs-02/shared/datasets/droid_101_success_idlefiltered_n17_swapped
dataset_type: physical_embodiment
embodiment_tag: oxe_droid_relative_eef_relative_joint
mix_ratio: 0.1
- !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_g1.g1-in-the-wild-merged
dataset_type: physical_embodiment
embodiment_tag: real_g1_relative_eef_relative_joints
mix_ratio: 0.05
- !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_real_robot_batch_1
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_real_robot_batch_2
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.miscellaneous_1k_trajectories
dataset_type: physical_embodiment
embodiment_tag: real_r1_pro_sharpa_relative_eef
mix_ratio: 0.05
- !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch1-2025-12-10-merged
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch3_2026-01-04-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch4_2026-01-05-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch5_2026-01-05-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch6_2026-01-05-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch10_2026-01-10-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch11_2026-01-10-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch12_2026-01-10-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch8_2026-01-10-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch9_2026-01-10-merged_backup
dataset_type: physical_embodiment
embodiment_tag: real_r1_pro_sharpa_relative_eef_mecka
mix_ratio: 0.25
- !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/maxinsights_lerobot_updated/1530hrs/real_r1_pro_sharpa.maxinsights_1530hrs_updated_train_set_merged
dataset_type: physical_embodiment
embodiment_tag: real_r1_pro_sharpa_relative_eef_maxinsights
mix_ratio: 0.2
- !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig
dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_human_batch1
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_human_batch2
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.shirt_rolling_task24_2000_human_video_filter_n6_keep1619_demo_stats
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.shirt_rolling_task15_2000_human_video_filter_n6_keep572_demo_stats
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.sort_cards_human_filter_n6_keep523_demo_stats_overwrite_left_side_stats
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.tong_task38_2000_human_video_overwrite_left_side_stats
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.syringe_task30i_2000_human_video_filtered
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.unscrew_bottle_task43_2000_human_video_fixed-duration
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.unscrew_Jim_bottle_task47_600_human_video
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.fold_shirt_task30b_500_human_video_halfdone
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.fold_towel_task30c_500_human_video_halfdone
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.sort_cards_task32e_1000_human_video
dataset_type: physical_embodiment
embodiment_tag: real_r1_pro_sharpa_relative_eef_human
mix_ratio: 0.05
download_cache: false
episode_sampling_rate: 0.1
image_crop_size:
- 244
- 244
image_target_size:
- 224
- 224
max_prompt_trajectories: 5
mock_dataset_mode: false
modality_configs:
oxe_droid_relative_eef_relative_joint:
action: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format:
- &id004 !!python/object/apply:groot.vla.omni.data.types.ActionFormat
- xyz+rot6d
- &id001 !!python/object/apply:groot.vla.omni.data.types.ActionFormat
- default
- *id001
action_representation:
- &id002 !!python/object/apply:groot.vla.omni.data.types.ActionRepresentation
- relative
- &id005 !!python/object/apply:groot.vla.omni.data.types.ActionRepresentation
- absolute
- *id002
action_type:
- &id006 !!python/object/apply:groot.vla.omni.data.types.ActionType
- eef
- &id003 !!python/object/apply:groot.vla.omni.data.types.ActionType
- non_eef
- *id003
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
exclude_state: false
extra_keys:
- joint_position
hand_keys:
- gripper_position
loss_weights: null
modality_keys:
- eef_9d
- gripper_position
- joint_position
normalization_mode: null
normalize_rotation: true
wrist_keys:
- eef_9d
language: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- annotation.language.language_instruction
- annotation.language.language_instruction_2
- annotation.language.language_instruction_3
normalization_mode: null
normalize_rotation: true
wrist_keys: null
state: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- eef_9d
- gripper_position
- joint_position
normalization_mode: null
normalize_rotation: true
wrist_keys: null
video: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- -15
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- exterior_image_1_left
- wrist_image_left
normalization_mode: null
normalize_rotation: true
wrist_keys: null
real_g1_relative_eef_relative_joints:
action: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format:
- *id004
- *id004
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
action_representation:
- *id002
- *id002
- *id005
- *id005
- *id002
- *id002
- *id005
- *id005
- *id005
action_type:
- *id006
- *id006
- *id003
- *id003
- *id003
- *id003
- *id003
- *id003
- *id003
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
exclude_state: false
extra_keys:
- left_arm
- right_arm
- waist
- base_height_command
- navigate_command
hand_keys:
- left_hand
- right_hand
loss_weights: null
modality_keys:
- left_wrist_eef_9d
- right_wrist_eef_9d
- left_hand
- right_hand
- left_arm
- right_arm
- waist
- base_height_command
- navigate_command
normalization_mode: null
normalize_rotation: true
wrist_keys:
- left_wrist_eef_9d
- right_wrist_eef_9d
language: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- annotation.human.task_description
normalization_mode: null
normalize_rotation: true
wrist_keys: null
state: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- left_wrist_eef_9d
- right_wrist_eef_9d
- left_hand
- right_hand
- left_arm
- right_arm
- waist
normalization_mode: null
normalize_rotation: true
wrist_keys: null
video: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- -20
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- ego_view
normalization_mode: null
normalize_rotation: true
wrist_keys: null
real_r1_pro_sharpa_relative_eef:
action: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format:
- *id004
- *id004
- *id001
- *id001
action_representation:
- *id002
- *id002
- *id005
- *id005
action_type:
- *id006
- *id006
- *id003
- *id003
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
exclude_state: false
extra_keys: []
hand_keys:
- left_hand_joints
- right_hand_joints
loss_weights: null
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
normalize_rotation: true
wrist_keys:
- left_wrist_eef
- right_wrist_eef
language: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- annotation.human.coarse_action
normalization_mode: null
normalize_rotation: true
wrist_keys: null
state: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
normalize_rotation: true
wrist_keys: null
video: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- -20
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- ego_view_res320x240_freq20
- left_wrist_view_res320x240_freq20
- right_wrist_view_res320x240_freq20
normalization_mode: null
normalize_rotation: true
wrist_keys: null
real_r1_pro_sharpa_relative_eef_human:
action: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format:
- *id004
- *id004
- *id001
- *id001
action_representation:
- *id002
- *id002
- *id005
- *id005
action_type:
- *id006
- *id006
- *id003
- *id003
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
exclude_state: false
extra_keys: []
hand_keys:
- left_hand_joints
- right_hand_joints
loss_weights: null
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
normalize_rotation: true
wrist_keys:
- left_wrist_eef
- right_wrist_eef
language: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- annotation.human.coarse_action
normalization_mode: null
normalize_rotation: true
wrist_keys: null
state: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: true
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
normalize_rotation: true
wrist_keys: null
video: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- -20
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- ego_view_res320x240_freq20
- left_wrist_view_res320x240_freq20
- right_wrist_view_res320x240_freq20
normalization_mode: null
normalize_rotation: true
wrist_keys: null
real_r1_pro_sharpa_relative_eef_maxinsights:
action: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format:
- *id004
- *id004
- *id001
- *id001
action_representation:
- *id002
- *id002
- *id005
- *id005
action_type:
- *id006
- *id006
- *id003
- *id003
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
exclude_state: false
extra_keys: []
hand_keys:
- left_hand_joints
- right_hand_joints
loss_weights: null
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
normalize_rotation: true
wrist_keys:
- left_wrist_eef
- right_wrist_eef
language: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- annotation.human.coarse_action
normalization_mode: null
normalize_rotation: true
wrist_keys: null
state: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: true
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
normalize_rotation: true
wrist_keys: null
video: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- -30
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- ego_view_cropratio_res320x240_freq30
normalization_mode: null
normalize_rotation: true
wrist_keys: null
real_r1_pro_sharpa_relative_eef_mecka:
action: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format:
- *id004
- *id004
- *id001
- *id001
action_representation:
- *id002
- *id002
- *id005
- *id005
action_type:
- *id006
- *id006
- *id003
- *id003
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
exclude_state: false
extra_keys: []
hand_keys:
- left_hand_joints
- right_hand_joints
loss_weights: null
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
normalize_rotation: true
wrist_keys:
- left_wrist_eef
- right_wrist_eef
language: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- annotation.human.coarse_action
normalization_mode: null
normalize_rotation: true
wrist_keys: null
state: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: true
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
normalize_rotation: true
wrist_keys: null
video: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- -30
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- ego_view_cropratio_res320x240_freq30
normalization_mode: null
normalize_rotation: true
wrist_keys: null
xdof_relative_eef_relative_joint:
action: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format:
- *id004
- *id004
- *id001
- *id001
- *id001
- *id001
action_representation:
- *id002
- *id002
- *id005
- *id005
- *id002
- *id002
action_type:
- *id006
- *id006
- *id003
- *id003
- *id003
- *id003
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
exclude_state: false
extra_keys:
- left_joint_pos
- right_joint_pos
hand_keys:
- left_gripper_pos
- right_gripper_pos
loss_weights: null
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_gripper_pos
- right_gripper_pos
- left_joint_pos
- right_joint_pos
normalization_mode: null
normalize_rotation: true
wrist_keys:
- left_wrist_eef
- right_wrist_eef
language: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- annotation.task
normalization_mode: null
normalize_rotation: true
wrist_keys: null
state: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_gripper_pos
- right_gripper_pos
- left_joint_pos
- right_joint_pos
normalization_mode: null
normalize_rotation: true
wrist_keys: null
video: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- -30
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- top_camera-images-rgb_320_240
- left_camera-images-rgb_320_240
- right_camera-images-rgb_320_240
normalization_mode: null
normalize_rotation: true
wrist_keys: null
xdof_relative_eef_relative_joint_subtask:
action: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format:
- *id004
- *id004
- *id001
- *id001
- *id001
- *id001
action_representation:
- *id002
- *id002
- *id005
- *id005
- *id002
- *id002
action_type:
- *id006
- *id006
- *id003
- *id003
- *id003
- *id003
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
exclude_state: false
extra_keys:
- left_joint_pos
- right_joint_pos
hand_keys:
- left_gripper_pos
- right_gripper_pos
loss_weights: null
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_gripper_pos
- right_gripper_pos
- left_joint_pos
- right_joint_pos
normalization_mode: null
normalize_rotation: true
wrist_keys:
- left_wrist_eef
- right_wrist_eef
language: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- annotation.sub_task
normalization_mode: null
normalize_rotation: true
wrist_keys: null
state: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_gripper_pos
- right_gripper_pos
- left_joint_pos
- right_joint_pos
normalization_mode: null
normalize_rotation: true
wrist_keys: null
video: !!python/object:groot.vla.omni.data.types.ModalityConfig
action_format: null
action_representation: null
action_type: null
delta_indices:
- -30
- 0
exclude_state: false
extra_keys: null
hand_keys: null
loss_weights: null
modality_keys:
- top_camera-images-rgb_320_240
- left_camera-images-rgb_320_240
- right_camera-images-rgb_320_240
normalization_mode: null
normalize_rotation: true
wrist_keys: null
mode: single_turn
num_prompt_trajectories: 2
num_shards_per_epoch: 100000
override_pretraining_statistics: false
random_chop: 0.0
seed: 24
shard_size: 1024
shuffle: true
subsample_ratio: 1.0
variable_num_demos: false
video_backend: torchcodec
load_config_path: groot/vla/omni/configs/experiments/r1_pro/sharpa/n17_pretrain/n17_pretrain_human_robot_cross_embodiment_fix_yam_absolute_hand_2step.yaml
model: !!python/object:groot.vla.omni.configs.model.groot_n1d5_qwen.GrootN1d5QwenConfig
_attn_implementation_internal: null
_commit_hash: null
_name_or_path: ''
_output_attentions: false
action_horizon: 40
action_space_prompt: false
add_cross_attention: false
add_pos_embed: true
apply_sincos_state_encoding: false
architectures: null
attn_dropout: 0.2
backbone_embedding_dim: 2048
bad_words_ids: null
begin_suppress_tokens: null
bos_token_id: null
chunk_size_feed_forward: 0
color_jitter_params:
brightness: 0.3
contrast: 0.4
hue: 0.08
saturation: 0.5
crop_fraction: 0.95
cross_attention_hidden_size: null
decoder_start_token_id: null
diffusion_model_cfg:
attention_head_dim: 48
cross_attention_dim: 2048
dropout: 0.2
final_dropout: true
interleave_self_attention: true
norm_type: ada_norm
num_attention_heads: 32
num_layers: 32
output_dim: 1024
positional_embeddings: null
dit_latent_dim: 1536
diversity_penalty: 0.0
do_human_interpolation: false
do_sample: false
dtype: null
early_stopping: false
encoder_no_repeat_ngram_size: 0
eos_token_id: null
exclude_state: false
exponential_decay_length_penalty: null
finetuning_task: null
forced_bos_token_id: null
forced_eos_token_id: null
formalize_language: true
hidden_size: 1024
human_embodiment_tags: null
id2label:
0: LABEL_0
1: LABEL_1
image_crop_size: !!python/tuple
- 230
- 230
image_target_size: !!python/tuple
- 256
- 256
interpolation_steps: 20
is_decoder: false
is_encoder_decoder: false
label2id:
LABEL_0: 0
LABEL_1: 1
language_dropout_prob: 0.0
length_penalty: 1.0
letter_box_transform: false
load_bf16: true
max_action_dim: 132
max_length: 20
max_num_embodiments: 32
max_seq_len: 1024
max_state_dim: 132
min_length: 0
model_dtype: bfloat16
model_type: GrootN1d5Qwen
no_repeat_ngram_size: 0
noise_beta_alpha: 1.5
noise_beta_beta: 1.0
noise_s: 0.999
num_beam_groups: 1
num_beams: 1
num_inference_timesteps: 4
num_return_sequences: 1
num_timestep_buckets: 1000
output_hidden_states: false
output_scores: false
pad_token_id: null
prefix: null
problem_type: null
pruned_heads: {}
random_history_crop: true
random_rotation_angle: 0
remove_invalid_values: false
repetition_penalty: 1.0
reproject_vision: false
return_dict: true
return_dict_in_generate: false
rtc_ramp_rate: 6.0
select_layer: 16
sep_token_id: null
shortest_image_edge: 256
state_dropout_prob: 0.2
state_gaussian_noise_std: 0.0
suppress_tokens: null
task_specific_params: null
temperature: 1.0
tf_legacy_loss: false
tie_encoder_decoder: false
tie_word_embeddings: true
tokenizer_class: null
top_k: 50
top_p: 1.0
torchscript: false
transformers_version: null
tune_diffusion_model: true
tune_linear: true
tune_llm: false
tune_projector: true
tune_top_llm_layers: 0
tune_visual: false
tune_vlln: true
typical_p: 1.0
use_albumentations: true
use_alternate_vl_dit: true
use_bfloat16: false
use_flash_attention: true
use_future_tokens: false
use_mean_std: false
use_percentiles: true
use_vl_self_attention: true
use_vlln: true
vl_self_attention_cfg:
attention_head_dim: 64
dropout: 0.2
final_dropout: true
num_attention_heads: 32
num_layers: 4
positional_embeddings: null
vlm_backend: qwen3
vlm_model_path: nvidia/Cosmos-Reason2-2B
training: !!python/object:groot.vla.omni.configs.training.training_config.TrainingConfig
assert_loss_less_than: null
batch_size: 32
bf16: true
dataloader_num_workers: 4
deepspeed_stage: 2
enable_profiling: false
eval_batch_size: 2
eval_bf16: true
eval_set_split_ratio: 0.1
eval_steps: 500
eval_strategy: 'no'
experiment_name: null
fp16: false
global_batch_size: 1024
gradient_accumulation_steps: 1
gradient_checkpointing: false
learning_rate: 5.0e-05
logging_steps: 10
lr_scheduler_type: cosine
max_concurrent_uploads: 2
max_grad_norm: 1.0
max_retries: 3
max_steps: 200000
muon_lr: 0.005
num_gpus: 256
optim: adamw_torch_fused
output_dir: nvidia/Cosmos-Reason2-2B
remove_unused_columns: false
save_best_eval_metric_greater_is_better: true
save_best_eval_metric_name: ''
save_steps: 1000
save_total_limit: 5
save_vl_model: false
skip_spike: true
skip_spike_ema_alpha: 0.99
skip_spike_max_consecutive: 10
skip_spike_threshold: 5.0
start_from_checkpoint: null
tf32: true
upload_checkpoints: true
upload_every: 1000
upload_last_n_checkpoints: 5
use_ddp: false
use_legacy_wd_application: false
use_muon: false
use_wandb: true
wandb_project: human_pretraining_n15_galaxea_sharpa
warmup_ratio: 0.05
warmup_steps: 0
weight_decay: 1.0e-05
wsd_decay_type: cosine
wsd_stable_ratio: 0.8