load_config_path: groot/vla/omni/configs/experiments/r1_pro/sharpa/n17_pretrain/n17_pretrain_human_robot_cross_embodiment_fix_yam_absolute_hand_2step.yaml model: return_dict: true output_hidden_states: false torchscript: false dtype: null pruned_heads: {} tie_word_embeddings: true chunk_size_feed_forward: 0 is_encoder_decoder: false is_decoder: false cross_attention_hidden_size: null add_cross_attention: false tie_encoder_decoder: false architectures: null finetuning_task: null id2label: 0: LABEL_0 1: LABEL_1 label2id: LABEL_0: 0 LABEL_1: 1 task_specific_params: null problem_type: null tokenizer_class: null prefix: null bos_token_id: null pad_token_id: null eos_token_id: null sep_token_id: null decoder_start_token_id: null max_length: 20 min_length: 0 do_sample: false early_stopping: false num_beams: 1 temperature: 1.0 top_k: 50 top_p: 1.0 typical_p: 1.0 repetition_penalty: 1.0 length_penalty: 1.0 no_repeat_ngram_size: 0 encoder_no_repeat_ngram_size: 0 bad_words_ids: null num_return_sequences: 1 output_scores: false return_dict_in_generate: false forced_bos_token_id: null forced_eos_token_id: null remove_invalid_values: false exponential_decay_length_penalty: null suppress_tokens: null begin_suppress_tokens: null num_beam_groups: 1 diversity_penalty: 0.0 transformers_version: null model_type: GrootN1d5Qwen model_dtype: bfloat16 vlm_backend: qwen3 vlm_model_path: nvidia/Cosmos-Reason2-2B backbone_embedding_dim: 2048 tune_llm: false tune_top_llm_layers: 0 tune_visual: false tune_linear: true select_layer: 16 reproject_vision: false use_flash_attention: true load_bf16: true exclude_state: false image_crop_size: - 230 - 230 image_target_size: - 256 - 256 random_rotation_angle: 0 color_jitter_params: brightness: 0.3 contrast: 0.4 saturation: 0.5 hue: 0.08 formalize_language: true action_space_prompt: false apply_sincos_state_encoding: false letter_box_transform: false use_percentiles: true use_mean_std: false use_albumentations: true shortest_image_edge: 256 crop_fraction: 0.95 random_history_crop: true state_gaussian_noise_std: 0.0 do_human_interpolation: false interpolation_steps: 20 human_embodiment_tags: null max_state_dim: 132 max_action_dim: 132 action_horizon: 40 hidden_size: 1024 dit_latent_dim: 1536 state_dropout_prob: 0.2 language_dropout_prob: 0.0 add_pos_embed: true attn_dropout: 0.2 use_vlln: true use_vl_self_attention: true max_seq_len: 1024 use_future_tokens: false use_alternate_vl_dit: true vl_self_attention_cfg: positional_embeddings: null num_layers: 4 num_attention_heads: 32 attention_head_dim: 64 dropout: 0.2 final_dropout: true diffusion_model_cfg: positional_embeddings: null num_layers: 32 num_attention_heads: 32 attention_head_dim: 48 norm_type: ada_norm dropout: 0.2 final_dropout: true output_dim: 1024 interleave_self_attention: true cross_attention_dim: 2048 num_inference_timesteps: 4 noise_beta_alpha: 1.5 noise_beta_beta: 1.0 noise_s: 0.999 num_timestep_buckets: 1000 tune_projector: true tune_diffusion_model: true tune_vlln: true max_num_embodiments: 32 rtc_ramp_rate: 6.0 tf_legacy_loss: false use_bfloat16: false data: datasets: - dataset_paths: - /mnt/aws-lfs-02/shared/datasets/xdof.yam_v7_all_merged_global_task_exclude_bad_subtasks embodiment_tag: xdof_relative_eef_relative_joint mix_ratio: 0.1 dataset_type: physical_embodiment - dataset_paths: - /mnt/aws-lfs-02/shared/datasets/xdof.yam_v7_subtask_only_merged_global_task embodiment_tag: xdof_relative_eef_relative_joint_subtask mix_ratio: 0.2 dataset_type: physical_embodiment - dataset_paths: - /mnt/aws-lfs-02/shared/datasets/droid_101_success_idlefiltered_n17 - /mnt/aws-lfs-02/shared/datasets/droid_101_success_idlefiltered_n17_swapped embodiment_tag: oxe_droid_relative_eef_relative_joint mix_ratio: 0.1 dataset_type: physical_embodiment - dataset_paths: - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_g1.g1-in-the-wild-merged embodiment_tag: real_g1_relative_eef_relative_joints mix_ratio: 0.05 dataset_type: physical_embodiment - dataset_paths: - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_real_robot_batch_1 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_real_robot_batch_2 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.miscellaneous_1k_trajectories embodiment_tag: real_r1_pro_sharpa_relative_eef mix_ratio: 0.05 dataset_type: physical_embodiment - dataset_paths: - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch1-2025-12-10-merged - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch3_2026-01-04-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch4_2026-01-05-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch5_2026-01-05-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch6_2026-01-05-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch10_2026-01-10-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch11_2026-01-10-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch12_2026-01-10-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch8_2026-01-10-merged_backup - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch9_2026-01-10-merged_backup embodiment_tag: real_r1_pro_sharpa_relative_eef_mecka mix_ratio: 0.25 dataset_type: physical_embodiment - dataset_paths: - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/maxinsights_lerobot_updated/1530hrs/real_r1_pro_sharpa.maxinsights_1530hrs_updated_train_set_merged embodiment_tag: real_r1_pro_sharpa_relative_eef_maxinsights mix_ratio: 0.2 dataset_type: physical_embodiment - dataset_paths: - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_human_batch1 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_human_batch2 - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.shirt_rolling_task24_2000_human_video_filter_n6_keep1619_demo_stats - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.shirt_rolling_task15_2000_human_video_filter_n6_keep572_demo_stats - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.sort_cards_human_filter_n6_keep523_demo_stats_overwrite_left_side_stats - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.tong_task38_2000_human_video_overwrite_left_side_stats - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.syringe_task30i_2000_human_video_filtered - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.unscrew_bottle_task43_2000_human_video_fixed-duration - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.unscrew_Jim_bottle_task47_600_human_video - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.fold_shirt_task30b_500_human_video_halfdone - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.fold_towel_task30c_500_human_video_halfdone - /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.sort_cards_task32e_1000_human_video embodiment_tag: real_r1_pro_sharpa_relative_eef_human mix_ratio: 0.05 dataset_type: physical_embodiment modality_configs: real_g1_relative_eef_relative_joints: video: delta_indices: - -20 - 0 modality_keys: - ego_view normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null state: delta_indices: - 0 modality_keys: - left_wrist_eef_9d - right_wrist_eef_9d - left_hand - right_hand - left_arm - right_arm - waist normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null action: delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 modality_keys: - left_wrist_eef_9d - right_wrist_eef_9d - left_hand - right_hand - left_arm - right_arm - waist - base_height_command - navigate_command normalization_mode: null action_representation: - {} - {} - {} - {} - {} - {} - {} - {} - {} exclude_state: false action_type: - {} - {} - {} - {} - {} - {} - {} - {} - {} action_format: - {} - {} - {} - {} - {} - {} - {} - {} - {} normalize_rotation: true wrist_keys: - left_wrist_eef_9d - right_wrist_eef_9d hand_keys: - left_hand - right_hand extra_keys: - left_arm - right_arm - waist - base_height_command - navigate_command loss_weights: null language: delta_indices: - 0 modality_keys: - annotation.human.task_description normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null real_r1_pro_sharpa_relative_eef_mecka: video: delta_indices: - -30 - 0 modality_keys: - ego_view_cropratio_res320x240_freq30 normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null state: delta_indices: - 0 modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null action_representation: null exclude_state: true action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null action: delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null action_representation: - {} - {} - {} - {} exclude_state: false action_type: - {} - {} - {} - {} action_format: - {} - {} - {} - {} normalize_rotation: true wrist_keys: - left_wrist_eef - right_wrist_eef hand_keys: - left_hand_joints - right_hand_joints extra_keys: [] loss_weights: null language: delta_indices: - 0 modality_keys: - annotation.human.coarse_action normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null oxe_droid_relative_eef_relative_joint: video: delta_indices: - -15 - 0 modality_keys: - exterior_image_1_left - wrist_image_left normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null state: delta_indices: - 0 modality_keys: - eef_9d - gripper_position - joint_position normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null action: delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 modality_keys: - eef_9d - gripper_position - joint_position normalization_mode: null action_representation: - {} - {} - {} exclude_state: false action_type: - {} - {} - {} action_format: - {} - {} - {} normalize_rotation: true wrist_keys: - eef_9d hand_keys: - gripper_position extra_keys: - joint_position loss_weights: null language: delta_indices: - 0 modality_keys: - annotation.language.language_instruction - annotation.language.language_instruction_2 - annotation.language.language_instruction_3 normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null real_r1_pro_sharpa_relative_eef_human: video: delta_indices: - -20 - 0 modality_keys: - ego_view_res320x240_freq20 - left_wrist_view_res320x240_freq20 - right_wrist_view_res320x240_freq20 normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null state: delta_indices: - 0 modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null action_representation: null exclude_state: true action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null action: delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null action_representation: - {} - {} - {} - {} exclude_state: false action_type: - {} - {} - {} - {} action_format: - {} - {} - {} - {} normalize_rotation: true wrist_keys: - left_wrist_eef - right_wrist_eef hand_keys: - left_hand_joints - right_hand_joints extra_keys: [] loss_weights: null language: delta_indices: - 0 modality_keys: - annotation.human.coarse_action normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null xdof_relative_eef_relative_joint: video: delta_indices: - -30 - 0 modality_keys: - top_camera-images-rgb_320_240 - left_camera-images-rgb_320_240 - right_camera-images-rgb_320_240 normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null state: delta_indices: - 0 modality_keys: - left_wrist_eef - right_wrist_eef - left_gripper_pos - right_gripper_pos - left_joint_pos - right_joint_pos normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null action: delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 modality_keys: - left_wrist_eef - right_wrist_eef - left_gripper_pos - right_gripper_pos - left_joint_pos - right_joint_pos normalization_mode: null action_representation: - {} - {} - {} - {} - {} - {} exclude_state: false action_type: - {} - {} - {} - {} - {} - {} action_format: - {} - {} - {} - {} - {} - {} normalize_rotation: true wrist_keys: - left_wrist_eef - right_wrist_eef hand_keys: - left_gripper_pos - right_gripper_pos extra_keys: - left_joint_pos - right_joint_pos loss_weights: null language: delta_indices: - 0 modality_keys: - annotation.task normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null xdof_relative_eef_relative_joint_subtask: video: delta_indices: - -30 - 0 modality_keys: - top_camera-images-rgb_320_240 - left_camera-images-rgb_320_240 - right_camera-images-rgb_320_240 normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null state: delta_indices: - 0 modality_keys: - left_wrist_eef - right_wrist_eef - left_gripper_pos - right_gripper_pos - left_joint_pos - right_joint_pos normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null action: delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 modality_keys: - left_wrist_eef - right_wrist_eef - left_gripper_pos - right_gripper_pos - left_joint_pos - right_joint_pos normalization_mode: null action_representation: - {} - {} - {} - {} - {} - {} exclude_state: false action_type: - {} - {} - {} - {} - {} - {} action_format: - {} - {} - {} - {} - {} - {} normalize_rotation: true wrist_keys: - left_wrist_eef - right_wrist_eef hand_keys: - left_gripper_pos - right_gripper_pos extra_keys: - left_joint_pos - right_joint_pos loss_weights: null language: delta_indices: - 0 modality_keys: - annotation.sub_task normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null real_r1_pro_sharpa_relative_eef: video: delta_indices: - -20 - 0 modality_keys: - ego_view_res320x240_freq20 - left_wrist_view_res320x240_freq20 - right_wrist_view_res320x240_freq20 normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null state: delta_indices: - 0 modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null action: delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null action_representation: - {} - {} - {} - {} exclude_state: false action_type: - {} - {} - {} - {} action_format: - {} - {} - {} - {} normalize_rotation: true wrist_keys: - left_wrist_eef - right_wrist_eef hand_keys: - left_hand_joints - right_hand_joints extra_keys: [] loss_weights: null language: delta_indices: - 0 modality_keys: - annotation.human.coarse_action normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null real_r1_pro_sharpa_relative_eef_maxinsights: video: delta_indices: - -30 - 0 modality_keys: - ego_view_cropratio_res320x240_freq30 normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null state: delta_indices: - 0 modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null action_representation: null exclude_state: true action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null action: delta_indices: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 - 38 - 39 modality_keys: - left_wrist_eef - right_wrist_eef - left_hand_joints - right_hand_joints normalization_mode: null action_representation: - {} - {} - {} - {} exclude_state: false action_type: - {} - {} - {} - {} action_format: - {} - {} - {} - {} normalize_rotation: true wrist_keys: - left_wrist_eef - right_wrist_eef hand_keys: - left_hand_joints - right_hand_joints extra_keys: [] loss_weights: null language: delta_indices: - 0 modality_keys: - annotation.human.coarse_action normalization_mode: null action_representation: null exclude_state: false action_type: null action_format: null normalize_rotation: true wrist_keys: null hand_keys: null extra_keys: null loss_weights: null download_cache: false shard_size: 1024 episode_sampling_rate: 0.1 num_shards_per_epoch: 100000 override_pretraining_statistics: false mode: single_turn random_chop: 0.0 mock_dataset_mode: false num_prompt_trajectories: 2 variable_num_demos: false max_prompt_trajectories: 5 shuffle: true seed: 24 subsample_ratio: 1.0 image_crop_size: - 244 - 244 image_target_size: - 224 - 224 video_backend: torchcodec training: output_dir: nvidia/Cosmos-Reason2-2B experiment_name: null max_steps: 200000 global_batch_size: 1024 batch_size: 32 gradient_accumulation_steps: 1 use_muon: false muon_lr: 0.005 use_legacy_wd_application: false learning_rate: 5.0e-05 lr_scheduler_type: cosine weight_decay: 1.0e-05 warmup_ratio: 0.05 warmup_steps: 0 max_grad_norm: 1.0 wsd_stable_ratio: 0.8 wsd_decay_type: cosine optim: adamw_torch_fused start_from_checkpoint: null tf32: true fp16: false bf16: true eval_bf16: true logging_steps: 10 save_steps: 1000 save_total_limit: 5 save_vl_model: false upload_checkpoints: true upload_every: 1000 upload_last_n_checkpoints: 5 max_concurrent_uploads: 2 eval_strategy: 'no' eval_steps: 500 eval_set_split_ratio: 0.1 eval_batch_size: 2 save_best_eval_metric_name: '' save_best_eval_metric_greater_is_better: true deepspeed_stage: 2 gradient_checkpointing: false use_ddp: false num_gpus: 256 dataloader_num_workers: 4 remove_unused_columns: false use_wandb: true wandb_project: human_pretraining_n15_galaxea_sharpa enable_profiling: false max_retries: 3 skip_spike: true skip_spike_threshold: 5.0 skip_spike_ema_alpha: 0.99 skip_spike_max_consecutive: 10 assert_loss_less_than: null max_steps: 200000 save_steps: 1000