binarykoder's picture
Duplicate from nvidia/GR00T-N1.7-3B
fd0ed6b
load_config_path: groot/vla/omni/configs/experiments/r1_pro/sharpa/n17_pretrain/n17_pretrain_human_robot_cross_embodiment_fix_yam_absolute_hand_2step.yaml
model:
return_dict: true
output_hidden_states: false
torchscript: false
dtype: null
pruned_heads: {}
tie_word_embeddings: true
chunk_size_feed_forward: 0
is_encoder_decoder: false
is_decoder: false
cross_attention_hidden_size: null
add_cross_attention: false
tie_encoder_decoder: false
architectures: null
finetuning_task: null
id2label:
0: LABEL_0
1: LABEL_1
label2id:
LABEL_0: 0
LABEL_1: 1
task_specific_params: null
problem_type: null
tokenizer_class: null
prefix: null
bos_token_id: null
pad_token_id: null
eos_token_id: null
sep_token_id: null
decoder_start_token_id: null
max_length: 20
min_length: 0
do_sample: false
early_stopping: false
num_beams: 1
temperature: 1.0
top_k: 50
top_p: 1.0
typical_p: 1.0
repetition_penalty: 1.0
length_penalty: 1.0
no_repeat_ngram_size: 0
encoder_no_repeat_ngram_size: 0
bad_words_ids: null
num_return_sequences: 1
output_scores: false
return_dict_in_generate: false
forced_bos_token_id: null
forced_eos_token_id: null
remove_invalid_values: false
exponential_decay_length_penalty: null
suppress_tokens: null
begin_suppress_tokens: null
num_beam_groups: 1
diversity_penalty: 0.0
transformers_version: null
model_type: GrootN1d5Qwen
model_dtype: bfloat16
vlm_backend: qwen3
vlm_model_path: nvidia/Cosmos-Reason2-2B
backbone_embedding_dim: 2048
tune_llm: false
tune_top_llm_layers: 0
tune_visual: false
tune_linear: true
select_layer: 16
reproject_vision: false
use_flash_attention: true
load_bf16: true
exclude_state: false
image_crop_size:
- 230
- 230
image_target_size:
- 256
- 256
random_rotation_angle: 0
color_jitter_params:
brightness: 0.3
contrast: 0.4
saturation: 0.5
hue: 0.08
formalize_language: true
action_space_prompt: false
apply_sincos_state_encoding: false
letter_box_transform: false
use_percentiles: true
use_mean_std: false
use_albumentations: true
shortest_image_edge: 256
crop_fraction: 0.95
random_history_crop: true
state_gaussian_noise_std: 0.0
do_human_interpolation: false
interpolation_steps: 20
human_embodiment_tags: null
max_state_dim: 132
max_action_dim: 132
action_horizon: 40
hidden_size: 1024
dit_latent_dim: 1536
state_dropout_prob: 0.2
language_dropout_prob: 0.0
add_pos_embed: true
attn_dropout: 0.2
use_vlln: true
use_vl_self_attention: true
max_seq_len: 1024
use_future_tokens: false
use_alternate_vl_dit: true
vl_self_attention_cfg:
positional_embeddings: null
num_layers: 4
num_attention_heads: 32
attention_head_dim: 64
dropout: 0.2
final_dropout: true
diffusion_model_cfg:
positional_embeddings: null
num_layers: 32
num_attention_heads: 32
attention_head_dim: 48
norm_type: ada_norm
dropout: 0.2
final_dropout: true
output_dim: 1024
interleave_self_attention: true
cross_attention_dim: 2048
num_inference_timesteps: 4
noise_beta_alpha: 1.5
noise_beta_beta: 1.0
noise_s: 0.999
num_timestep_buckets: 1000
tune_projector: true
tune_diffusion_model: true
tune_vlln: true
max_num_embodiments: 32
rtc_ramp_rate: 6.0
tf_legacy_loss: false
use_bfloat16: false
data:
datasets:
- dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/xdof.yam_v7_all_merged_global_task_exclude_bad_subtasks
embodiment_tag: xdof_relative_eef_relative_joint
mix_ratio: 0.1
dataset_type: physical_embodiment
- dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/xdof.yam_v7_subtask_only_merged_global_task
embodiment_tag: xdof_relative_eef_relative_joint_subtask
mix_ratio: 0.2
dataset_type: physical_embodiment
- dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/droid_101_success_idlefiltered_n17
- /mnt/aws-lfs-02/shared/datasets/droid_101_success_idlefiltered_n17_swapped
embodiment_tag: oxe_droid_relative_eef_relative_joint
mix_ratio: 0.1
dataset_type: physical_embodiment
- dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_g1.g1-in-the-wild-merged
embodiment_tag: real_g1_relative_eef_relative_joints
mix_ratio: 0.05
dataset_type: physical_embodiment
- dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_real_robot_batch_1
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_real_robot_batch_2
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.miscellaneous_1k_trajectories
embodiment_tag: real_r1_pro_sharpa_relative_eef
mix_ratio: 0.05
dataset_type: physical_embodiment
- dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch1-2025-12-10-merged
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch3_2026-01-04-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch4_2026-01-05-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch5_2026-01-05-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch6_2026-01-05-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch10_2026-01-10-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch11_2026-01-10-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch12_2026-01-10-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch8_2026-01-10-merged_backup
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/mecka_lerobot/real_r1_pro_sharpa.mecka_batch9_2026-01-10-merged_backup
embodiment_tag: real_r1_pro_sharpa_relative_eef_mecka
mix_ratio: 0.25
dataset_type: physical_embodiment
- dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/maxinsights_lerobot_updated/1530hrs/real_r1_pro_sharpa.maxinsights_1530hrs_updated_train_set_merged
embodiment_tag: real_r1_pro_sharpa_relative_eef_maxinsights
mix_ratio: 0.2
dataset_type: physical_embodiment
- dataset_paths:
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_human_batch1
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.inlab_play_human_batch2
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.shirt_rolling_task24_2000_human_video_filter_n6_keep1619_demo_stats
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.shirt_rolling_task15_2000_human_video_filter_n6_keep572_demo_stats
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.sort_cards_human_filter_n6_keep523_demo_stats_overwrite_left_side_stats
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.tong_task38_2000_human_video_overwrite_left_side_stats
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.syringe_task30i_2000_human_video_filtered
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.unscrew_bottle_task43_2000_human_video_fixed-duration
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.unscrew_Jim_bottle_task47_600_human_video
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.fold_shirt_task30b_500_human_video_halfdone
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.fold_towel_task30c_500_human_video_halfdone
- /mnt/aws-lfs-02/shared/datasets/galaxea_sharpa/real_r1_pro_sharpa.sort_cards_task32e_1000_human_video
embodiment_tag: real_r1_pro_sharpa_relative_eef_human
mix_ratio: 0.05
dataset_type: physical_embodiment
modality_configs:
real_g1_relative_eef_relative_joints:
video:
delta_indices:
- -20
- 0
modality_keys:
- ego_view
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
state:
delta_indices:
- 0
modality_keys:
- left_wrist_eef_9d
- right_wrist_eef_9d
- left_hand
- right_hand
- left_arm
- right_arm
- waist
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
action:
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
modality_keys:
- left_wrist_eef_9d
- right_wrist_eef_9d
- left_hand
- right_hand
- left_arm
- right_arm
- waist
- base_height_command
- navigate_command
normalization_mode: null
action_representation:
- {}
- {}
- {}
- {}
- {}
- {}
- {}
- {}
- {}
exclude_state: false
action_type:
- {}
- {}
- {}
- {}
- {}
- {}
- {}
- {}
- {}
action_format:
- {}
- {}
- {}
- {}
- {}
- {}
- {}
- {}
- {}
normalize_rotation: true
wrist_keys:
- left_wrist_eef_9d
- right_wrist_eef_9d
hand_keys:
- left_hand
- right_hand
extra_keys:
- left_arm
- right_arm
- waist
- base_height_command
- navigate_command
loss_weights: null
language:
delta_indices:
- 0
modality_keys:
- annotation.human.task_description
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
real_r1_pro_sharpa_relative_eef_mecka:
video:
delta_indices:
- -30
- 0
modality_keys:
- ego_view_cropratio_res320x240_freq30
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
state:
delta_indices:
- 0
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
action_representation: null
exclude_state: true
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
action:
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
action_representation:
- {}
- {}
- {}
- {}
exclude_state: false
action_type:
- {}
- {}
- {}
- {}
action_format:
- {}
- {}
- {}
- {}
normalize_rotation: true
wrist_keys:
- left_wrist_eef
- right_wrist_eef
hand_keys:
- left_hand_joints
- right_hand_joints
extra_keys: []
loss_weights: null
language:
delta_indices:
- 0
modality_keys:
- annotation.human.coarse_action
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
oxe_droid_relative_eef_relative_joint:
video:
delta_indices:
- -15
- 0
modality_keys:
- exterior_image_1_left
- wrist_image_left
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
state:
delta_indices:
- 0
modality_keys:
- eef_9d
- gripper_position
- joint_position
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
action:
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
modality_keys:
- eef_9d
- gripper_position
- joint_position
normalization_mode: null
action_representation:
- {}
- {}
- {}
exclude_state: false
action_type:
- {}
- {}
- {}
action_format:
- {}
- {}
- {}
normalize_rotation: true
wrist_keys:
- eef_9d
hand_keys:
- gripper_position
extra_keys:
- joint_position
loss_weights: null
language:
delta_indices:
- 0
modality_keys:
- annotation.language.language_instruction
- annotation.language.language_instruction_2
- annotation.language.language_instruction_3
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
real_r1_pro_sharpa_relative_eef_human:
video:
delta_indices:
- -20
- 0
modality_keys:
- ego_view_res320x240_freq20
- left_wrist_view_res320x240_freq20
- right_wrist_view_res320x240_freq20
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
state:
delta_indices:
- 0
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
action_representation: null
exclude_state: true
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
action:
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
action_representation:
- {}
- {}
- {}
- {}
exclude_state: false
action_type:
- {}
- {}
- {}
- {}
action_format:
- {}
- {}
- {}
- {}
normalize_rotation: true
wrist_keys:
- left_wrist_eef
- right_wrist_eef
hand_keys:
- left_hand_joints
- right_hand_joints
extra_keys: []
loss_weights: null
language:
delta_indices:
- 0
modality_keys:
- annotation.human.coarse_action
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
xdof_relative_eef_relative_joint:
video:
delta_indices:
- -30
- 0
modality_keys:
- top_camera-images-rgb_320_240
- left_camera-images-rgb_320_240
- right_camera-images-rgb_320_240
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
state:
delta_indices:
- 0
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_gripper_pos
- right_gripper_pos
- left_joint_pos
- right_joint_pos
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
action:
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_gripper_pos
- right_gripper_pos
- left_joint_pos
- right_joint_pos
normalization_mode: null
action_representation:
- {}
- {}
- {}
- {}
- {}
- {}
exclude_state: false
action_type:
- {}
- {}
- {}
- {}
- {}
- {}
action_format:
- {}
- {}
- {}
- {}
- {}
- {}
normalize_rotation: true
wrist_keys:
- left_wrist_eef
- right_wrist_eef
hand_keys:
- left_gripper_pos
- right_gripper_pos
extra_keys:
- left_joint_pos
- right_joint_pos
loss_weights: null
language:
delta_indices:
- 0
modality_keys:
- annotation.task
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
xdof_relative_eef_relative_joint_subtask:
video:
delta_indices:
- -30
- 0
modality_keys:
- top_camera-images-rgb_320_240
- left_camera-images-rgb_320_240
- right_camera-images-rgb_320_240
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
state:
delta_indices:
- 0
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_gripper_pos
- right_gripper_pos
- left_joint_pos
- right_joint_pos
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
action:
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_gripper_pos
- right_gripper_pos
- left_joint_pos
- right_joint_pos
normalization_mode: null
action_representation:
- {}
- {}
- {}
- {}
- {}
- {}
exclude_state: false
action_type:
- {}
- {}
- {}
- {}
- {}
- {}
action_format:
- {}
- {}
- {}
- {}
- {}
- {}
normalize_rotation: true
wrist_keys:
- left_wrist_eef
- right_wrist_eef
hand_keys:
- left_gripper_pos
- right_gripper_pos
extra_keys:
- left_joint_pos
- right_joint_pos
loss_weights: null
language:
delta_indices:
- 0
modality_keys:
- annotation.sub_task
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
real_r1_pro_sharpa_relative_eef:
video:
delta_indices:
- -20
- 0
modality_keys:
- ego_view_res320x240_freq20
- left_wrist_view_res320x240_freq20
- right_wrist_view_res320x240_freq20
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
state:
delta_indices:
- 0
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
action:
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
action_representation:
- {}
- {}
- {}
- {}
exclude_state: false
action_type:
- {}
- {}
- {}
- {}
action_format:
- {}
- {}
- {}
- {}
normalize_rotation: true
wrist_keys:
- left_wrist_eef
- right_wrist_eef
hand_keys:
- left_hand_joints
- right_hand_joints
extra_keys: []
loss_weights: null
language:
delta_indices:
- 0
modality_keys:
- annotation.human.coarse_action
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
real_r1_pro_sharpa_relative_eef_maxinsights:
video:
delta_indices:
- -30
- 0
modality_keys:
- ego_view_cropratio_res320x240_freq30
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
state:
delta_indices:
- 0
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
action_representation: null
exclude_state: true
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
action:
delta_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
modality_keys:
- left_wrist_eef
- right_wrist_eef
- left_hand_joints
- right_hand_joints
normalization_mode: null
action_representation:
- {}
- {}
- {}
- {}
exclude_state: false
action_type:
- {}
- {}
- {}
- {}
action_format:
- {}
- {}
- {}
- {}
normalize_rotation: true
wrist_keys:
- left_wrist_eef
- right_wrist_eef
hand_keys:
- left_hand_joints
- right_hand_joints
extra_keys: []
loss_weights: null
language:
delta_indices:
- 0
modality_keys:
- annotation.human.coarse_action
normalization_mode: null
action_representation: null
exclude_state: false
action_type: null
action_format: null
normalize_rotation: true
wrist_keys: null
hand_keys: null
extra_keys: null
loss_weights: null
download_cache: false
shard_size: 1024
episode_sampling_rate: 0.1
num_shards_per_epoch: 100000
override_pretraining_statistics: false
mode: single_turn
random_chop: 0.0
mock_dataset_mode: false
num_prompt_trajectories: 2
variable_num_demos: false
max_prompt_trajectories: 5
shuffle: true
seed: 24
subsample_ratio: 1.0
image_crop_size:
- 244
- 244
image_target_size:
- 224
- 224
video_backend: torchcodec
training:
output_dir: nvidia/Cosmos-Reason2-2B
experiment_name: null
max_steps: 200000
global_batch_size: 1024
batch_size: 32
gradient_accumulation_steps: 1
use_muon: false
muon_lr: 0.005
use_legacy_wd_application: false
learning_rate: 5.0e-05
lr_scheduler_type: cosine
weight_decay: 1.0e-05
warmup_ratio: 0.05
warmup_steps: 0
max_grad_norm: 1.0
wsd_stable_ratio: 0.8
wsd_decay_type: cosine
optim: adamw_torch_fused
start_from_checkpoint: null
tf32: true
fp16: false
bf16: true
eval_bf16: true
logging_steps: 10
save_steps: 1000
save_total_limit: 5
save_vl_model: false
upload_checkpoints: true
upload_every: 1000
upload_last_n_checkpoints: 5
max_concurrent_uploads: 2
eval_strategy: 'no'
eval_steps: 500
eval_set_split_ratio: 0.1
eval_batch_size: 2
save_best_eval_metric_name: ''
save_best_eval_metric_greater_is_better: true
deepspeed_stage: 2
gradient_checkpointing: false
use_ddp: false
num_gpus: 256
dataloader_num_workers: 4
remove_unused_columns: false
use_wandb: true
wandb_project: human_pretraining_n15_galaxea_sharpa
enable_profiling: false
max_retries: 3
skip_spike: true
skip_spike_threshold: 5.0
skip_spike_ema_alpha: 0.99
skip_spike_max_consecutive: 10
assert_loss_less_than: null
max_steps: 200000
save_steps: 1000