!!python/object:groot.vla.omni.configs.base_config.Config data: !!python/object:groot.vla.omni.configs.data.data_config.DataConfig datasets: - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig dataset_paths: - /mnt/aws-lfs-02/shared/datasets/xdof.yam_v7_all_merged_global_task_exclude_bad_subtasks dataset_type: physical_embodiment embodiment_tag: xdof_relative_eef_relative_joint mix_ratio: 0.1 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig dataset_paths: - /mnt/aws-lfs-02/shared/datasets/xdof.yam_v7_subtask_only_merged_global_task dataset_type: physical_embodiment embodiment_tag: xdof_relative_eef_relative_joint_subtask mix_ratio: 0.2 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig dataset_paths: - /mnt/aws-lfs-02/shared/datasets/droid_101_success_idlefiltered_n17 - /mnt/aws-lfs-02/shared/datasets/droid_101_success_idlefiltered_n17_swapped dataset_type: physical_embodiment embodiment_tag: oxe_droid_relative_eef_relative_joint mix_ratio: 0.1 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig dataset_paths: - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_g1.g1-in-the-wild-merged dataset_type: physical_embodiment embodiment_tag: real_g1_relative_eef_relative_joints mix_ratio: 0.05 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig dataset_paths: - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_real_robot_batch_1 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_real_robot_batch_2 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.miscellaneous_1k_trajectories dataset_type: physical_embodiment embodiment_tag: real_r1_pro_sharpa_relative_eef mix_ratio: 0.05 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig dataset_paths: - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch1-2025-12-10-merged - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch3_2026-01-04-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch4_2026-01-05-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch5_2026-01-05-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch6_2026-01-05-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch10_2026-01-10-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch11_2026-01-10-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch12_2026-01-10-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch8_2026-01-10-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch9_2026-01-10-merged_backup dataset_type: physical_embodiment embodiment_tag: real_r1_pro_sharpa_relative_eef_mecka mix_ratio: 0.25 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig dataset_paths: - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/maxinsights_lerobot_updated/1530hrs/real_r1_pro_sharpa.maxinsights_1530hrs_updated_train_set_merged dataset_type: physical_embodiment embodiment_tag: real_r1_pro_sharpa_relative_eef_maxinsights mix_ratio: 0.2 - !!python/object:groot.vla.omni.configs.data.data_config.SingleDatasetConfig dataset_paths: - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_human_batch1 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_human_batch2 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.shirt_rolling_task24_2000_human_video_filter_n6_keep1619_demo_stats - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.shirt_rolling_task15_2000_human_video_filter_n6_keep572_demo_stats - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.sort_cards_human_filter_n6_keep523_demo_stats_overwrite_left_side_stats - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.tong_task38_2000_human_video_overwrite_left_side_stats - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.syringe_task30i_2000_human_video_filtered - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.unscrew_bottle_task43_2000_human_video_fixed-duration - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.unscrew_Jim_bottle_task47_600_human_video - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.fold_shirt_task30b_500_human_video_halfdone - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.fold_towel_task30c_500_human_video_halfdone - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.sort_cards_task32e_1000_human_video dataset_type: physical_embodiment embodiment_tag: real_r1_pro_sharpa_relative_eef_human mix_ratio: 0.05 download_cache: false episode_sampling_rate: 0.1 image_crop_size: - 244 - 244 image_target_size: - 224 - 224 max_prompt_trajectories: 5 mock_dataset_mode: false modality_configs: oxe_droid_relative_eef_relative_joint: action: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: - &id004 !!python/object/apply:groot.vla.omni.data.types.ActionFormat - xyz+rot6d - &id001 !!python/object/apply:groot.vla.omni.data.types.ActionFormat - default - *id001 action_representation: - &id002 !!python/object/apply:groot.vla.omni.data.types.ActionRepresentation - relative - &id005 !!python/object/apply:groot.vla.omni.data.types.ActionRepresentation - absolute - *id002 action_type: - &id006 !!python/object/apply:groot.vla.omni.data.types.ActionType - eef - &id003 !!python/object/apply:groot.vla.omni.data.types.ActionType - non_eef - *id003 delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 exclude_state: false extra_keys: - joint_position hand_keys: - gripper_position loss_weights: null modality_keys: - eef_9d - gripper_position - joint_position normalization_mode: null normalize_rotation: true wrist_keys: - eef_9d language: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - annotation.language.language_instruction - annotation.language.language_instruction_2 - annotation.language.language_instruction_3 normalization_mode: null normalize_rotation: true wrist_keys: null state: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - eef_9d - gripper_position - joint_position normalization_mode: null normalize_rotation: true wrist_keys: null video: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - -15 - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - exterior_image_1_left - wrist_image_left normalization_mode: null normalize_rotation: true wrist_keys: null real_g1_relative_eef_relative_joints: action: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: - *id004 - *id004 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 - *id001 action_representation: - *id002 - *id002 - *id005 - *id005 - *id002 - *id002 - *id005 - *id005 - *id005 action_type: - *id006 - *id006 - *id003 - *id003 - *id003 - *id003 - *id003 - *id003 - *id003 delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 exclude_state: false extra_keys: - left_arm - right_arm - waist - base_height_command - navigate_command hand_keys: - left_hand - right_hand loss_weights: null modality_keys: - left_wrist_eef_9d - right_wrist_eef_9d - left_hand - right_hand - left_arm - right_arm - waist - base_height_command - navigate_command normalization_mode: null normalize_rotation: true wrist_keys: - left_wrist_eef_9d - right_wrist_eef_9d language: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - annotation.human.task_description normalization_mode: null normalize_rotation: true wrist_keys: null state: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - left_wrist_eef_9d - right_wrist_eef_9d - left_hand - right_hand - left_arm - right_arm - waist normalization_mode: null normalize_rotation: true wrist_keys: null video: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - -20 - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - ego_view normalization_mode: null normalize_rotation: true wrist_keys: null real_r1_pro_sharpa_relative_eef: action: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: - *id004 - *id004 - *id001 - *id001 action_representation: - *id002 - *id002 - *id005 - *id005 action_type: - *id006 - *id006 - *id003 - *id003 delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 exclude_state: false extra_keys: [] hand_keys: - left_hand_joints - right_hand_joints loss_weights: null modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null normalize_rotation: true wrist_keys: - left_wrist_eef - right_wrist_eef language: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - annotation.human.coarse_action normalization_mode: null normalize_rotation: true wrist_keys: null state: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null normalize_rotation: true wrist_keys: null video: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - -20 - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - ego_view_res320x240_freq20 - left_wrist_view_res320x240_freq20 - right_wrist_view_res320x240_freq20 normalization_mode: null normalize_rotation: true wrist_keys: null real_r1_pro_sharpa_relative_eef_human: action: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: - *id004 - *id004 - *id001 - *id001 action_representation: - *id002 - *id002 - *id005 - *id005 action_type: - *id006 - *id006 - *id003 - *id003 delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 exclude_state: false extra_keys: [] hand_keys: - left_hand_joints - right_hand_joints loss_weights: null modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null normalize_rotation: true wrist_keys: - left_wrist_eef - right_wrist_eef language: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - annotation.human.coarse_action normalization_mode: null normalize_rotation: true wrist_keys: null state: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: true extra_keys: null hand_keys: null loss_weights: null modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null normalize_rotation: true wrist_keys: null video: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - -20 - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - ego_view_res320x240_freq20 - left_wrist_view_res320x240_freq20 - right_wrist_view_res320x240_freq20 normalization_mode: null normalize_rotation: true wrist_keys: null real_r1_pro_sharpa_relative_eef_maxinsights: action: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: - *id004 - *id004 - *id001 - *id001 action_representation: - *id002 - *id002 - *id005 - *id005 action_type: - *id006 - *id006 - *id003 - *id003 delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 exclude_state: false extra_keys: [] hand_keys: - left_hand_joints - right_hand_joints loss_weights: null modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null normalize_rotation: true wrist_keys: - left_wrist_eef - right_wrist_eef language: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - annotation.human.coarse_action normalization_mode: null normalize_rotation: true wrist_keys: null state: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: true extra_keys: null hand_keys: null loss_weights: null modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null normalize_rotation: true wrist_keys: null video: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - -30 - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - ego_view_cropratio_res320x240_freq30 normalization_mode: null normalize_rotation: true wrist_keys: null real_r1_pro_sharpa_relative_eef_mecka: action: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: - *id004 - *id004 - *id001 - *id001 action_representation: - *id002 - *id002 - *id005 - *id005 action_type: - *id006 - *id006 - *id003 - *id003 delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 exclude_state: false extra_keys: [] hand_keys: - left_hand_joints - right_hand_joints loss_weights: null modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null normalize_rotation: true wrist_keys: - left_wrist_eef - right_wrist_eef language: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - annotation.human.coarse_action normalization_mode: null normalize_rotation: true wrist_keys: null state: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: true extra_keys: null hand_keys: null loss_weights: null modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null normalize_rotation: true wrist_keys: null video: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - -30 - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - ego_view_cropratio_res320x240_freq30 normalization_mode: null normalize_rotation: true wrist_keys: null xdof_relative_eef_relative_joint: action: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: - *id004 - *id004 - *id001 - *id001 - *id001 - *id001 action_representation: - *id002 - *id002 - *id005 - *id005 - *id002 - *id002 action_type: - *id006 - *id006 - *id003 - *id003 - *id003 - *id003 delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 exclude_state: false extra_keys: - left_joint_pos - right_joint_pos hand_keys: - left_gripper_pos - right_gripper_pos loss_weights: null modality_keys: - left_wrist_eef - right_wrist_eef - left_gripper_pos - right_gripper_pos - left_joint_pos - right_joint_pos normalization_mode: null normalize_rotation: true wrist_keys: - left_wrist_eef - right_wrist_eef language: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - annotation.task normalization_mode: null normalize_rotation: true wrist_keys: null state: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - left_wrist_eef - right_wrist_eef - left_gripper_pos - right_gripper_pos - left_joint_pos - right_joint_pos normalization_mode: null normalize_rotation: true wrist_keys: null video: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - -30 - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - top_camera-images-rgb_320_240 - left_camera-images-rgb_320_240 - right_camera-images-rgb_320_240 normalization_mode: null normalize_rotation: true wrist_keys: null xdof_relative_eef_relative_joint_subtask: action: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: - *id004 - *id004 - *id001 - *id001 - *id001 - *id001 action_representation: - *id002 - *id002 - *id005 - *id005 - *id002 - *id002 action_type: - *id006 - *id006 - *id003 - *id003 - *id003 - *id003 delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 exclude_state: false extra_keys: - left_joint_pos - right_joint_pos hand_keys: - left_gripper_pos - right_gripper_pos loss_weights: null modality_keys: - left_wrist_eef - right_wrist_eef - left_gripper_pos - right_gripper_pos - left_joint_pos - right_joint_pos normalization_mode: null normalize_rotation: true wrist_keys: - left_wrist_eef - right_wrist_eef language: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - annotation.sub_task normalization_mode: null normalize_rotation: true wrist_keys: null state: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - left_wrist_eef - right_wrist_eef - left_gripper_pos - right_gripper_pos - left_joint_pos - right_joint_pos normalization_mode: null normalize_rotation: true wrist_keys: null video: !!python/object:groot.vla.omni.data.types.ModalityConfig action_format: null action_representation: null action_type: null delta_indices: - -30 - 0 exclude_state: false extra_keys: null hand_keys: null loss_weights: null modality_keys: - top_camera-images-rgb_320_240 - left_camera-images-rgb_320_240 - right_camera-images-rgb_320_240 normalization_mode: null normalize_rotation: true wrist_keys: null mode: single_turn num_prompt_trajectories: 2 num_shards_per_epoch: 100000 override_pretraining_statistics: false random_chop: 0.0 seed: 24 shard_size: 1024 shuffle: true subsample_ratio: 1.0 variable_num_demos: false video_backend: torchcodec load_config_path: groot/vla/omni/configs/experiments/r1_pro/sharpa/n17_pretrain/n17_pretrain_human_robot_cross_embodiment_fix_yam_absolute_hand_2step.yaml model: !!python/object:groot.vla.omni.configs.model.groot_n1d5_qwen.GrootN1d5QwenConfig _attn_implementation_internal: null _commit_hash: null _name_or_path: '' _output_attentions: false action_horizon: 40 action_space_prompt: false add_cross_attention: false add_pos_embed: true apply_sincos_state_encoding: false architectures: null attn_dropout: 0.2 backbone_embedding_dim: 2048 bad_words_ids: null begin_suppress_tokens: null bos_token_id: null chunk_size_feed_forward: 0 color_jitter_params: brightness: 0.3 contrast: 0.4 hue: 0.08 saturation: 0.5 crop_fraction: 0.95 cross_attention_hidden_size: null decoder_start_token_id: null diffusion_model_cfg: attention_head_dim: 48 cross_attention_dim: 2048 dropout: 0.2 final_dropout: true interleave_self_attention: true norm_type: ada_norm num_attention_heads: 32 num_layers: 32 output_dim: 1024 positional_embeddings: null dit_latent_dim: 1536 diversity_penalty: 0.0 do_human_interpolation: false do_sample: false dtype: null early_stopping: false encoder_no_repeat_ngram_size: 0 eos_token_id: null exclude_state: false exponential_decay_length_penalty: null finetuning_task: null forced_bos_token_id: null forced_eos_token_id: null formalize_language: true hidden_size: 1024 human_embodiment_tags: null id2label: 0: LABEL_0 1: LABEL_1 image_crop_size: !!python/tuple - 230 - 230 image_target_size: !!python/tuple - 256 - 256 interpolation_steps: 20 is_decoder: false is_encoder_decoder: false label2id: LABEL_0: 0 LABEL_1: 1 language_dropout_prob: 0.0 length_penalty: 1.0 letter_box_transform: false load_bf16: true max_action_dim: 132 max_length: 20 max_num_embodiments: 32 max_seq_len: 1024 max_state_dim: 132 min_length: 0 model_dtype: bfloat16 model_type: GrootN1d5Qwen no_repeat_ngram_size: 0 noise_beta_alpha: 1.5 noise_beta_beta: 1.0 noise_s: 0.999 num_beam_groups: 1 num_beams: 1 num_inference_timesteps: 4 num_return_sequences: 1 num_timestep_buckets: 1000 output_hidden_states: false output_scores: false pad_token_id: null prefix: null problem_type: null pruned_heads: {} random_history_crop: true random_rotation_angle: 0 remove_invalid_values: false repetition_penalty: 1.0 reproject_vision: false return_dict: true return_dict_in_generate: false rtc_ramp_rate: 6.0 select_layer: 16 sep_token_id: null shortest_image_edge: 256 state_dropout_prob: 0.2 state_gaussian_noise_std: 0.0 suppress_tokens: null task_specific_params: null temperature: 1.0 tf_legacy_loss: false tie_encoder_decoder: false tie_word_embeddings: true tokenizer_class: null top_k: 50 top_p: 1.0 torchscript: false transformers_version: null tune_diffusion_model: true tune_linear: true tune_llm: false tune_projector: true tune_top_llm_layers: 0 tune_visual: false tune_vlln: true typical_p: 1.0 use_albumentations: true use_alternate_vl_dit: true use_bfloat16: false use_flash_attention: true use_future_tokens: false use_mean_std: false use_percentiles: true use_vl_self_attention: true use_vlln: true vl_self_attention_cfg: attention_head_dim: 64 dropout: 0.2 final_dropout: true num_attention_heads: 32 num_layers: 4 positional_embeddings: null vlm_backend: qwen3 vlm_model_path: nvidia/Cosmos-Reason2-2B training: !!python/object:groot.vla.omni.configs.training.training_config.TrainingConfig assert_loss_less_than: null batch_size: 32 bf16: true dataloader_num_workers: 4 deepspeed_stage: 2 enable_profiling: false eval_batch_size: 2 eval_bf16: true eval_set_split_ratio: 0.1 eval_steps: 500 eval_strategy: 'no' experiment_name: null fp16: false global_batch_size: 1024 gradient_accumulation_steps: 1 gradient_checkpointing: false learning_rate: 5.0e-05 logging_steps: 10 lr_scheduler_type: cosine max_concurrent_uploads: 2 max_grad_norm: 1.0 max_retries: 3 max_steps: 200000 muon_lr: 0.005 num_gpus: 256 optim: adamw_torch_fused output_dir: nvidia/Cosmos-Reason2-2B remove_unused_columns: false save_best_eval_metric_greater_is_better: true save_best_eval_metric_name: '' save_steps: 1000 save_total_limit: 5 save_vl_model: false skip_spike: true skip_spike_ema_alpha: 0.99 skip_spike_max_consecutive: 10 skip_spike_threshold: 5.0 start_from_checkpoint: null tf32: true upload_checkpoints: true upload_every: 1000 upload_last_n_checkpoints: 5 use_ddp: false use_legacy_wd_application: false use_muon: false use_wandb: true wandb_project: human_pretraining_n15_galaxea_sharpa warmup_ratio: 0.05 warmup_steps: 0 weight_decay: 1.0e-05 wsd_decay_type: cosine wsd_stable_ratio: 0.8