| !!python/object:groot.vla.omni.configs.base_config.Config |
| data: !!python/object:groot.vla.omni.configs.data.data_config.DataConfig |
| datasets: |
| - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig |
| dataset_paths: |
| - /mnt/aws-lfs-02/shared/datasets/xdof.yam_v7_all_merged_global_task_exclude_bad_subtasks |
| dataset_type: physical_embodiment |
| embodiment_tag: xdof_relative_eef_relative_joint |
| mix_ratio: 0.1 |
| - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig |
| dataset_paths: |
| - /mnt/aws-lfs-02/shared/datasets/xdof.yam_v7_subtask_only_merged_global_task |
| dataset_type: physical_embodiment |
| embodiment_tag: xdof_relative_eef_relative_joint_subtask |
| mix_ratio: 0.2 |
| - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig |
| dataset_paths: |
| - /mnt/aws-lfs-02/shared/datasets/droid_101_success_idlefiltered_n17 |
| - /mnt/aws-lfs-02/shared/datasets/droid_101_success_idlefiltered_n17_swapped |
| dataset_type: physical_embodiment |
| embodiment_tag: oxe_droid_relative_eef_relative_joint |
| mix_ratio: 0.1 |
| - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig |
| dataset_paths: |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_g1.g1-in-the-wild-merged |
| dataset_type: physical_embodiment |
| embodiment_tag: real_g1_relative_eef_relative_joints |
| mix_ratio: 0.05 |
| - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig |
| dataset_paths: |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_real_robot_batch_1 |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_real_robot_batch_2 |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.miscellaneous_1k_trajectories |
| dataset_type: physical_embodiment |
| embodiment_tag: real_r1_pro_sharpa_relative_eef |
| mix_ratio: 0.05 |
| - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig |
| dataset_paths: |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch1-2025-12-10-merged |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch3_2026-01-04-merged_backup |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch4_2026-01-05-merged_backup |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch5_2026-01-05-merged_backup |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch6_2026-01-05-merged_backup |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch10_2026-01-10-merged_backup |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch11_2026-01-10-merged_backup |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch12_2026-01-10-merged_backup |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch8_2026-01-10-merged_backup |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch9_2026-01-10-merged_backup |
| dataset_type: physical_embodiment |
| embodiment_tag: real_r1_pro_sharpa_relative_eef_mecka |
| mix_ratio: 0.25 |
| - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig |
| dataset_paths: |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/maxinsights_lerobot_updated/1530hrs/real_r1_pro_sharpa.maxinsights_1530hrs_updated_train_set_merged |
| dataset_type: physical_embodiment |
| embodiment_tag: real_r1_pro_sharpa_relative_eef_maxinsights |
| mix_ratio: 0.2 |
| - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig |
| dataset_paths: |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_human_batch1 |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_human_batch2 |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.shirt_rolling_task24_2000_human_video_filter_n6_keep1619_demo_stats |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.shirt_rolling_task15_2000_human_video_filter_n6_keep572_demo_stats |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.sort_cards_human_filter_n6_keep523_demo_stats_overwrite_left_side_stats |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.tong_task38_2000_human_video_overwrite_left_side_stats |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.syringe_task30i_2000_human_video_filtered |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.unscrew_bottle_task43_2000_human_video_fixed-duration |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.unscrew_Jim_bottle_task47_600_human_video |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.fold_shirt_task30b_500_human_video_halfdone |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.fold_towel_task30c_500_human_video_halfdone |
| - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.sort_cards_task32e_1000_human_video |
| dataset_type: physical_embodiment |
| embodiment_tag: real_r1_pro_sharpa_relative_eef_human |
| mix_ratio: 0.05 |
| download_cache: false |
| episode_sampling_rate: 0.1 |
| image_crop_size: |
| - 244 |
| - 244 |
| image_target_size: |
| - 224 |
| - 224 |
| max_prompt_trajectories: 5 |
| mock_dataset_mode: false |
| modality_configs: |
| oxe_droid_relative_eef_relative_joint: |
| action: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: |
| - &id004 !!python/object/apply:groot.vla.omni.data.types.ActionFormat |
| - xyz+rot6d |
| - &id001 !!python/object/apply:groot.vla.omni.data.types.ActionFormat |
| - default |
| - *id001 |
| action_representation: |
| - &id002 !!python/object/apply:groot.vla.omni.data.types.ActionRepresentation |
| - relative |
| - &id005 !!python/object/apply:groot.vla.omni.data.types.ActionRepresentation |
| - absolute |
| - *id002 |
| action_type: |
| - &id006 !!python/object/apply:groot.vla.omni.data.types.ActionType |
| - eef |
| - &id003 !!python/object/apply:groot.vla.omni.data.types.ActionType |
| - non_eef |
| - *id003 |
| delta_indices: |
| - 0 |
| - 1 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 7 |
| - 8 |
| - 9 |
| - 10 |
| - 11 |
| - 12 |
| - 13 |
| - 14 |
| - 15 |
| - 16 |
| - 17 |
| - 18 |
| - 19 |
| - 20 |
| - 21 |
| - 22 |
| - 23 |
| - 24 |
| - 25 |
| - 26 |
| - 27 |
| - 28 |
| - 29 |
| - 30 |
| - 31 |
| - 32 |
| - 33 |
| - 34 |
| - 35 |
| - 36 |
| - 37 |
| - 38 |
| - 39 |
| exclude_state: false |
| extra_keys: |
| - joint_position |
| hand_keys: |
| - gripper_position |
| loss_weights: null |
| modality_keys: |
| - eef_9d |
| - gripper_position |
| - joint_position |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: |
| - eef_9d |
| language: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - annotation.language.language_instruction |
| - annotation.language.language_instruction_2 |
| - annotation.language.language_instruction_3 |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| state: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - eef_9d |
| - gripper_position |
| - joint_position |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| video: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - -15 |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - exterior_image_1_left |
| - wrist_image_left |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| real_g1_relative_eef_relative_joints: |
| action: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: |
| - *id004 |
| - *id004 |
| - *id001 |
| - *id001 |
| - *id001 |
| - *id001 |
| - *id001 |
| - *id001 |
| - *id001 |
| action_representation: |
| - *id002 |
| - *id002 |
| - *id005 |
| - *id005 |
| - *id002 |
| - *id002 |
| - *id005 |
| - *id005 |
| - *id005 |
| action_type: |
| - *id006 |
| - *id006 |
| - *id003 |
| - *id003 |
| - *id003 |
| - *id003 |
| - *id003 |
| - *id003 |
| - *id003 |
| delta_indices: |
| - 0 |
| - 1 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 7 |
| - 8 |
| - 9 |
| - 10 |
| - 11 |
| - 12 |
| - 13 |
| - 14 |
| - 15 |
| - 16 |
| - 17 |
| - 18 |
| - 19 |
| - 20 |
| - 21 |
| - 22 |
| - 23 |
| - 24 |
| - 25 |
| - 26 |
| - 27 |
| - 28 |
| - 29 |
| - 30 |
| - 31 |
| - 32 |
| - 33 |
| - 34 |
| - 35 |
| - 36 |
| - 37 |
| - 38 |
| - 39 |
| exclude_state: false |
| extra_keys: |
| - left_arm |
| - right_arm |
| - waist |
| - base_height_command |
| - navigate_command |
| hand_keys: |
| - left_hand |
| - right_hand |
| loss_weights: null |
| modality_keys: |
| - left_wrist_eef_9d |
| - right_wrist_eef_9d |
| - left_hand |
| - right_hand |
| - left_arm |
| - right_arm |
| - waist |
| - base_height_command |
| - navigate_command |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: |
| - left_wrist_eef_9d |
| - right_wrist_eef_9d |
| language: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - annotation.human.task_description |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| state: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - left_wrist_eef_9d |
| - right_wrist_eef_9d |
| - left_hand |
| - right_hand |
| - left_arm |
| - right_arm |
| - waist |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| video: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - -20 |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - ego_view |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| real_r1_pro_sharpa_relative_eef: |
| action: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: |
| - *id004 |
| - *id004 |
| - *id001 |
| - *id001 |
| action_representation: |
| - *id002 |
| - *id002 |
| - *id005 |
| - *id005 |
| action_type: |
| - *id006 |
| - *id006 |
| - *id003 |
| - *id003 |
| delta_indices: |
| - 0 |
| - 1 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 7 |
| - 8 |
| - 9 |
| - 10 |
| - 11 |
| - 12 |
| - 13 |
| - 14 |
| - 15 |
| - 16 |
| - 17 |
| - 18 |
| - 19 |
| - 20 |
| - 21 |
| - 22 |
| - 23 |
| - 24 |
| - 25 |
| - 26 |
| - 27 |
| - 28 |
| - 29 |
| - 30 |
| - 31 |
| - 32 |
| - 33 |
| - 34 |
| - 35 |
| - 36 |
| - 37 |
| - 38 |
| - 39 |
| exclude_state: false |
| extra_keys: [] |
| hand_keys: |
| - left_hand_joints |
| - right_hand_joints |
| loss_weights: null |
| modality_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| - left_hand_joints |
| - right_hand_joints |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| language: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - annotation.human.coarse_action |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| state: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| - left_hand_joints |
| - right_hand_joints |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| video: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - -20 |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - ego_view_res320x240_freq20 |
| - left_wrist_view_res320x240_freq20 |
| - right_wrist_view_res320x240_freq20 |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| real_r1_pro_sharpa_relative_eef_human: |
| action: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: |
| - *id004 |
| - *id004 |
| - *id001 |
| - *id001 |
| action_representation: |
| - *id002 |
| - *id002 |
| - *id005 |
| - *id005 |
| action_type: |
| - *id006 |
| - *id006 |
| - *id003 |
| - *id003 |
| delta_indices: |
| - 0 |
| - 1 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 7 |
| - 8 |
| - 9 |
| - 10 |
| - 11 |
| - 12 |
| - 13 |
| - 14 |
| - 15 |
| - 16 |
| - 17 |
| - 18 |
| - 19 |
| - 20 |
| - 21 |
| - 22 |
| - 23 |
| - 24 |
| - 25 |
| - 26 |
| - 27 |
| - 28 |
| - 29 |
| - 30 |
| - 31 |
| - 32 |
| - 33 |
| - 34 |
| - 35 |
| - 36 |
| - 37 |
| - 38 |
| - 39 |
| exclude_state: false |
| extra_keys: [] |
| hand_keys: |
| - left_hand_joints |
| - right_hand_joints |
| loss_weights: null |
| modality_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| - left_hand_joints |
| - right_hand_joints |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| language: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - annotation.human.coarse_action |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| state: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: true |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| - left_hand_joints |
| - right_hand_joints |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| video: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - -20 |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - ego_view_res320x240_freq20 |
| - left_wrist_view_res320x240_freq20 |
| - right_wrist_view_res320x240_freq20 |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| real_r1_pro_sharpa_relative_eef_maxinsights: |
| action: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: |
| - *id004 |
| - *id004 |
| - *id001 |
| - *id001 |
| action_representation: |
| - *id002 |
| - *id002 |
| - *id005 |
| - *id005 |
| action_type: |
| - *id006 |
| - *id006 |
| - *id003 |
| - *id003 |
| delta_indices: |
| - 0 |
| - 1 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 7 |
| - 8 |
| - 9 |
| - 10 |
| - 11 |
| - 12 |
| - 13 |
| - 14 |
| - 15 |
| - 16 |
| - 17 |
| - 18 |
| - 19 |
| - 20 |
| - 21 |
| - 22 |
| - 23 |
| - 24 |
| - 25 |
| - 26 |
| - 27 |
| - 28 |
| - 29 |
| - 30 |
| - 31 |
| - 32 |
| - 33 |
| - 34 |
| - 35 |
| - 36 |
| - 37 |
| - 38 |
| - 39 |
| exclude_state: false |
| extra_keys: [] |
| hand_keys: |
| - left_hand_joints |
| - right_hand_joints |
| loss_weights: null |
| modality_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| - left_hand_joints |
| - right_hand_joints |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| language: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - annotation.human.coarse_action |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| state: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: true |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| - left_hand_joints |
| - right_hand_joints |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| video: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - -30 |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - ego_view_cropratio_res320x240_freq30 |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| real_r1_pro_sharpa_relative_eef_mecka: |
| action: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: |
| - *id004 |
| - *id004 |
| - *id001 |
| - *id001 |
| action_representation: |
| - *id002 |
| - *id002 |
| - *id005 |
| - *id005 |
| action_type: |
| - *id006 |
| - *id006 |
| - *id003 |
| - *id003 |
| delta_indices: |
| - 0 |
| - 1 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 7 |
| - 8 |
| - 9 |
| - 10 |
| - 11 |
| - 12 |
| - 13 |
| - 14 |
| - 15 |
| - 16 |
| - 17 |
| - 18 |
| - 19 |
| - 20 |
| - 21 |
| - 22 |
| - 23 |
| - 24 |
| - 25 |
| - 26 |
| - 27 |
| - 28 |
| - 29 |
| - 30 |
| - 31 |
| - 32 |
| - 33 |
| - 34 |
| - 35 |
| - 36 |
| - 37 |
| - 38 |
| - 39 |
| exclude_state: false |
| extra_keys: [] |
| hand_keys: |
| - left_hand_joints |
| - right_hand_joints |
| loss_weights: null |
| modality_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| - left_hand_joints |
| - right_hand_joints |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| language: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - annotation.human.coarse_action |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| state: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: true |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| - left_hand_joints |
| - right_hand_joints |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| video: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - -30 |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - ego_view_cropratio_res320x240_freq30 |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| xdof_relative_eef_relative_joint: |
| action: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: |
| - *id004 |
| - *id004 |
| - *id001 |
| - *id001 |
| - *id001 |
| - *id001 |
| action_representation: |
| - *id002 |
| - *id002 |
| - *id005 |
| - *id005 |
| - *id002 |
| - *id002 |
| action_type: |
| - *id006 |
| - *id006 |
| - *id003 |
| - *id003 |
| - *id003 |
| - *id003 |
| delta_indices: |
| - 0 |
| - 1 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 7 |
| - 8 |
| - 9 |
| - 10 |
| - 11 |
| - 12 |
| - 13 |
| - 14 |
| - 15 |
| - 16 |
| - 17 |
| - 18 |
| - 19 |
| - 20 |
| - 21 |
| - 22 |
| - 23 |
| - 24 |
| - 25 |
| - 26 |
| - 27 |
| - 28 |
| - 29 |
| - 30 |
| - 31 |
| - 32 |
| - 33 |
| - 34 |
| - 35 |
| - 36 |
| - 37 |
| - 38 |
| - 39 |
| exclude_state: false |
| extra_keys: |
| - left_joint_pos |
| - right_joint_pos |
| hand_keys: |
| - left_gripper_pos |
| - right_gripper_pos |
| loss_weights: null |
| modality_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| - left_gripper_pos |
| - right_gripper_pos |
| - left_joint_pos |
| - right_joint_pos |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| language: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - annotation.task |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| state: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| - left_gripper_pos |
| - right_gripper_pos |
| - left_joint_pos |
| - right_joint_pos |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| video: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - -30 |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - top_camera-images-rgb_320_240 |
| - left_camera-images-rgb_320_240 |
| - right_camera-images-rgb_320_240 |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| xdof_relative_eef_relative_joint_subtask: |
| action: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: |
| - *id004 |
| - *id004 |
| - *id001 |
| - *id001 |
| - *id001 |
| - *id001 |
| action_representation: |
| - *id002 |
| - *id002 |
| - *id005 |
| - *id005 |
| - *id002 |
| - *id002 |
| action_type: |
| - *id006 |
| - *id006 |
| - *id003 |
| - *id003 |
| - *id003 |
| - *id003 |
| delta_indices: |
| - 0 |
| - 1 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 7 |
| - 8 |
| - 9 |
| - 10 |
| - 11 |
| - 12 |
| - 13 |
| - 14 |
| - 15 |
| - 16 |
| - 17 |
| - 18 |
| - 19 |
| - 20 |
| - 21 |
| - 22 |
| - 23 |
| - 24 |
| - 25 |
| - 26 |
| - 27 |
| - 28 |
| - 29 |
| - 30 |
| - 31 |
| - 32 |
| - 33 |
| - 34 |
| - 35 |
| - 36 |
| - 37 |
| - 38 |
| - 39 |
| exclude_state: false |
| extra_keys: |
| - left_joint_pos |
| - right_joint_pos |
| hand_keys: |
| - left_gripper_pos |
| - right_gripper_pos |
| loss_weights: null |
| modality_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| - left_gripper_pos |
| - right_gripper_pos |
| - left_joint_pos |
| - right_joint_pos |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| language: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - annotation.sub_task |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| state: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - left_wrist_eef |
| - right_wrist_eef |
| - left_gripper_pos |
| - right_gripper_pos |
| - left_joint_pos |
| - right_joint_pos |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| video: !!python/object:groot.vla.omni.data.types.ModalityConfig |
| action_format: null |
| action_representation: null |
| action_type: null |
| delta_indices: |
| - -30 |
| - 0 |
| exclude_state: false |
| extra_keys: null |
| hand_keys: null |
| loss_weights: null |
| modality_keys: |
| - top_camera-images-rgb_320_240 |
| - left_camera-images-rgb_320_240 |
| - right_camera-images-rgb_320_240 |
| normalization_mode: null |
| normalize_rotation: true |
| wrist_keys: null |
| mode: single_turn |
| num_prompt_trajectories: 2 |
| num_shards_per_epoch: 100000 |
| override_pretraining_statistics: false |
| random_chop: 0.0 |
| seed: 24 |
| shard_size: 1024 |
| shuffle: true |
| subsample_ratio: 1.0 |
| variable_num_demos: false |
| video_backend: torchcodec |
| load_config_path: groot/vla/omni/configs/experiments/r1_pro/sharpa/n17_pretrain/n17_pretrain_human_robot_cross_embodiment_fix_yam_absolute_hand_2step.yaml |
| model: !!python/object:groot.vla.omni.configs.model.groot_n1d5_qwen.GrootN1d5QwenConfig |
| _attn_implementation_internal: null |
| _commit_hash: null |
| _name_or_path: '' |
| _output_attentions: false |
| action_horizon: 40 |
| action_space_prompt: false |
| add_cross_attention: false |
| add_pos_embed: true |
| apply_sincos_state_encoding: false |
| architectures: null |
| attn_dropout: 0.2 |
| backbone_embedding_dim: 2048 |
| bad_words_ids: null |
| begin_suppress_tokens: null |
| bos_token_id: null |
| chunk_size_feed_forward: 0 |
| color_jitter_params: |
| brightness: 0.3 |
| contrast: 0.4 |
| hue: 0.08 |
| saturation: 0.5 |
| crop_fraction: 0.95 |
| cross_attention_hidden_size: null |
| decoder_start_token_id: null |
| diffusion_model_cfg: |
| attention_head_dim: 48 |
| cross_attention_dim: 2048 |
| dropout: 0.2 |
| final_dropout: true |
| interleave_self_attention: true |
| norm_type: ada_norm |
| num_attention_heads: 32 |
| num_layers: 32 |
| output_dim: 1024 |
| positional_embeddings: null |
| dit_latent_dim: 1536 |
| diversity_penalty: 0.0 |
| do_human_interpolation: false |
| do_sample: false |
| dtype: null |
| early_stopping: false |
| encoder_no_repeat_ngram_size: 0 |
| eos_token_id: null |
| exclude_state: false |
| exponential_decay_length_penalty: null |
| finetuning_task: null |
| forced_bos_token_id: null |
| forced_eos_token_id: null |
| formalize_language: true |
| hidden_size: 1024 |
| human_embodiment_tags: null |
| id2label: |
| 0: LABEL_0 |
| 1: LABEL_1 |
| image_crop_size: !!python/tuple |
| - 230 |
| - 230 |
| image_target_size: !!python/tuple |
| - 256 |
| - 256 |
| interpolation_steps: 20 |
| is_decoder: false |
| is_encoder_decoder: false |
| label2id: |
| LABEL_0: 0 |
| LABEL_1: 1 |
| language_dropout_prob: 0.0 |
| length_penalty: 1.0 |
| letter_box_transform: false |
| load_bf16: true |
| max_action_dim: 132 |
| max_length: 20 |
| max_num_embodiments: 32 |
| max_seq_len: 1024 |
| max_state_dim: 132 |
| min_length: 0 |
| model_dtype: bfloat16 |
| model_type: GrootN1d5Qwen |
| no_repeat_ngram_size: 0 |
| noise_beta_alpha: 1.5 |
| noise_beta_beta: 1.0 |
| noise_s: 0.999 |
| num_beam_groups: 1 |
| num_beams: 1 |
| num_inference_timesteps: 4 |
| num_return_sequences: 1 |
| num_timestep_buckets: 1000 |
| output_hidden_states: false |
| output_scores: false |
| pad_token_id: null |
| prefix: null |
| problem_type: null |
| pruned_heads: {} |
| random_history_crop: true |
| random_rotation_angle: 0 |
| remove_invalid_values: false |
| repetition_penalty: 1.0 |
| reproject_vision: false |
| return_dict: true |
| return_dict_in_generate: false |
| rtc_ramp_rate: 6.0 |
| select_layer: 16 |
| sep_token_id: null |
| shortest_image_edge: 256 |
| state_dropout_prob: 0.2 |
| state_gaussian_noise_std: 0.0 |
| suppress_tokens: null |
| task_specific_params: null |
| temperature: 1.0 |
| tf_legacy_loss: false |
| tie_encoder_decoder: false |
| tie_word_embeddings: true |
| tokenizer_class: null |
| top_k: 50 |
| top_p: 1.0 |
| torchscript: false |
| transformers_version: null |
| tune_diffusion_model: true |
| tune_linear: true |
| tune_llm: false |
| tune_projector: true |
| tune_top_llm_layers: 0 |
| tune_visual: false |
| tune_vlln: true |
| typical_p: 1.0 |
| use_albumentations: true |
| use_alternate_vl_dit: true |
| use_bfloat16: false |
| use_flash_attention: true |
| use_future_tokens: false |
| use_mean_std: false |
| use_percentiles: true |
| use_vl_self_attention: true |
| use_vlln: true |
| vl_self_attention_cfg: |
| attention_head_dim: 64 |
| dropout: 0.2 |
| final_dropout: true |
| num_attention_heads: 32 |
| num_layers: 4 |
| positional_embeddings: null |
| vlm_backend: qwen3 |
| vlm_model_path: nvidia/Cosmos-Reason2-2B |
| training: !!python/object:groot.vla.omni.configs.training.training_config.TrainingConfig |
| assert_loss_less_than: null |
| batch_size: 32 |
| bf16: true |
| dataloader_num_workers: 4 |
| deepspeed_stage: 2 |
| enable_profiling: false |
| eval_batch_size: 2 |
| eval_bf16: true |
| eval_set_split_ratio: 0.1 |
| eval_steps: 500 |
| eval_strategy: 'no' |
| experiment_name: null |
| fp16: false |
| global_batch_size: 1024 |
| gradient_accumulation_steps: 1 |
| gradient_checkpointing: false |
| learning_rate: 5.0e-05 |
| logging_steps: 10 |
| lr_scheduler_type: cosine |
| max_concurrent_uploads: 2 |
| max_grad_norm: 1.0 |
| max_retries: 3 |
| max_steps: 200000 |
| muon_lr: 0.005 |
| num_gpus: 256 |
| optim: adamw_torch_fused |
| output_dir: nvidia/Cosmos-Reason2-2B |
| remove_unused_columns: false |
| save_best_eval_metric_greater_is_better: true |
| save_best_eval_metric_name: '' |
| save_steps: 1000 |
| save_total_limit: 5 |
| save_vl_model: false |
| skip_spike: true |
| skip_spike_ema_alpha: 0.99 |
| skip_spike_max_consecutive: 10 |
| skip_spike_threshold: 5.0 |
| start_from_checkpoint: null |
| tf32: true |
| upload_checkpoints: true |
| upload_every: 1000 |
| upload_last_n_checkpoints: 5 |
| use_ddp: false |
| use_legacy_wd_application: false |
| use_muon: false |
| use_wandb: true |
| wandb_project: human_pretraining_n15_galaxea_sharpa |
| warmup_ratio: 0.05 |
| warmup_steps: 0 |
| weight_decay: 1.0e-05 |
| wsd_decay_type: cosine |
| wsd_stable_ratio: 0.8 |
|
|