thinhpt commited on
Commit
1124b6e
·
verified ·
1 Parent(s): 7ddf93f

Add files using upload-large-folder tool

Browse files
config.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_horizon": 40,
3
+ "add_pos_embed": true,
4
+ "apply_sincos_state_encoding": false,
5
+ "architectures": [
6
+ "Gr00tN1d7"
7
+ ],
8
+ "attn_dropout": 0.2,
9
+ "attn_implementation": null,
10
+ "backbone_embedding_dim": 2048,
11
+ "backbone_trainable_params_fp32": true,
12
+ "color_jitter_params": {
13
+ "brightness": 0.3,
14
+ "contrast": 0.4,
15
+ "hue": 0.08,
16
+ "saturation": 0.5
17
+ },
18
+ "crop_fraction": 0.95,
19
+ "diffusion_model_cfg": {
20
+ "attention_head_dim": 48,
21
+ "dropout": 0.2,
22
+ "final_dropout": true,
23
+ "interleave_self_attention": true,
24
+ "norm_type": "ada_norm",
25
+ "num_attention_heads": 32,
26
+ "num_layers": 32,
27
+ "output_dim": 1024,
28
+ "positional_embeddings": null
29
+ },
30
+ "dtype": "float32",
31
+ "exclude_state": false,
32
+ "formalize_language": true,
33
+ "hidden_size": 1024,
34
+ "image_crop_size": [
35
+ 230,
36
+ 230
37
+ ],
38
+ "image_target_size": [
39
+ 256,
40
+ 256
41
+ ],
42
+ "letter_box_transform": false,
43
+ "load_bf16": false,
44
+ "max_action_dim": 132,
45
+ "max_num_embodiments": 32,
46
+ "max_seq_len": 1024,
47
+ "max_state_dim": 132,
48
+ "model_dtype": "bfloat16",
49
+ "model_name": "nvidia/Cosmos-Reason2-2B",
50
+ "model_type": "Gr00tN1d7",
51
+ "noise_beta_alpha": 1.5,
52
+ "noise_beta_beta": 1.0,
53
+ "noise_s": 0.999,
54
+ "num_inference_timesteps": 4,
55
+ "num_timestep_buckets": 1000,
56
+ "random_history_crop": true,
57
+ "random_rotation_angle": 0,
58
+ "reproject_vision": false,
59
+ "rtc_ramp_rate": 6.0,
60
+ "select_layer": 16,
61
+ "shortest_image_edge": 256,
62
+ "state_dropout_prob": 0.2,
63
+ "state_gaussian_noise_std": 0.0,
64
+ "transformers_version": "4.57.3",
65
+ "tune_diffusion_model": true,
66
+ "tune_linear": true,
67
+ "tune_llm": false,
68
+ "tune_projector": true,
69
+ "tune_top_llm_layers": 0,
70
+ "tune_visual": false,
71
+ "tune_vlln": true,
72
+ "use_albumentations": true,
73
+ "use_alternate_vl_dit": true,
74
+ "use_flash_attention": true,
75
+ "use_future_tokens": false,
76
+ "use_mean_std": false,
77
+ "use_percentiles": true,
78
+ "use_vl_self_attention": true,
79
+ "use_vlln": true,
80
+ "vl_self_attention_cfg": {
81
+ "attention_head_dim": 64,
82
+ "dropout": 0.2,
83
+ "final_dropout": true,
84
+ "num_attention_heads": 32,
85
+ "num_layers": 4,
86
+ "positional_embeddings": null
87
+ }
88
+ }
embodiment_id.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "robocasa_panda_omron": 13,
3
+ "oxe_droid": 17,
4
+ "oxe_fractal": 18,
5
+ "oxe_language_table": 19,
6
+ "oxe_bridge": 20,
7
+ "unknown": 22,
8
+ "gr1_unified": 20,
9
+ "agibot": 26,
10
+ "sim_behavior_r1_pro": 23,
11
+ "xdof": 24,
12
+ "xdof_oss_data": 25,
13
+ "unitree_g1_full_body_with_waist_height_nav_cmd": 25,
14
+ "real_r1_pro_sharpa": 27,
15
+ "real_r1_pro_sharpa_add_view": 27,
16
+ "real_r1_pro_sharpa_relative_arm_joint": 26,
17
+ "real_r1_pro_sharpa_delta_eef": 26,
18
+ "real_r1_pro_sharpa_absolute_eef": 26,
19
+ "real_r1_pro_sharpa_meanstd": 26,
20
+ "real_r1_pro_sharpa_relative_eef": 26,
21
+ "real_r1_pro_sharpa_relative_eef_add_view": 26,
22
+ "real_r1_pro_sharpa_relative_eef_relative_hand": 26,
23
+ "real_r1_pro_sharpa_relative_eef_human": 26,
24
+ "real_r1_pro_sharpa_relative_eef_human_add_view": 26,
25
+ "real_r1_pro_sharpa_relative_eef_human_relative_hand": 26,
26
+ "real_r1_pro_sharpa_relative_eef_egodex": 26,
27
+ "real_r1_pro_sharpa_relative_eef_egodex_relative_hand": 26,
28
+ "real_r1_pro_sharpa_relative_eef_egodex_wrist_only": 26,
29
+ "real_r1_pro_sharpa_relative_eef_maxinsights": 26,
30
+ "real_r1_pro_sharpa_relative_eef_maxinsights_relative_hand": 26,
31
+ "real_r1_pro_sharpa_relative_eef_mecka": 26,
32
+ "real_r1_pro_sharpa_relative_eef_mecka_relative_hand": 26,
33
+ "real_g1_relative_eef_absolute_joints": 25,
34
+ "real_g1_relative_eef_absolute_joints_wrist_cam": 25,
35
+ "real_g1_relative_eef_relative_joints": 25,
36
+ "real_r1_pro_sharpa_relative_eef_relative_hand_relative_joint": 26,
37
+ "real_r1_pro_sharpa_relative_joint": 29,
38
+ "oxe_droid_relative_eef_relative_joint": 24,
39
+ "oxe_droid_relative_eef_relative_joint_swapped": 24,
40
+ "oxe_droid_relative_eef_relative_joint_upweight_z": 24,
41
+ "oxe_droid_relative_eef_relative_joint_upweight_z_swapped": 24,
42
+ "oxe_droid_relative_eef_relative_joint_3view": 24,
43
+ "oxe_droid_relative_eef_relative_joint_3view_swapped": 24,
44
+ "oxe_droid_relative_eef": 24,
45
+ "oxe_droid_joint_position_relative": 24,
46
+ "xdof_relative_eef_relative_joint": 27,
47
+ "xdof_relative_eef_relative_joint_subtask": 27,
48
+ "xdof_relative_eef": 27,
49
+ "xdof_relative_joint": 28,
50
+ "simpler_env_google": 0,
51
+ "simpler_env_widowx": 1,
52
+ "libero_sim": 2,
53
+ "droid_sim": 3,
54
+ "unitree_g1_sonic": 11,
55
+ "new_embodiment": 10
56
+ }
experiment_cfg/conf.yaml ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ load_config_path: null
2
+ model:
3
+ model_type: Gr00tN1d7
4
+ model_dtype: bfloat16
5
+ model_name: nvidia/Cosmos-Reason2-2B
6
+ backbone_model_type: qwen
7
+ model_revision: null
8
+ tune_top_llm_layers: 0
9
+ backbone_embedding_dim: 2048
10
+ tune_llm: false
11
+ tune_visual: false
12
+ select_layer: 12
13
+ reproject_vision: false
14
+ use_flash_attention: true
15
+ load_bf16: false
16
+ backbone_trainable_params_fp32: true
17
+ image_crop_size:
18
+ - 230
19
+ - 230
20
+ image_target_size:
21
+ - 256
22
+ - 256
23
+ shortest_image_edge: null
24
+ crop_fraction: null
25
+ random_rotation_angle: null
26
+ color_jitter_params:
27
+ brightness: 0.3
28
+ contrast: 0.4
29
+ saturation: 0.5
30
+ hue: 0.08
31
+ use_albumentations_transforms: true
32
+ extra_augmentation_config: null
33
+ formalize_language: true
34
+ apply_sincos_state_encoding: false
35
+ use_percentiles: true
36
+ use_relative_action: true
37
+ max_state_dim: 132
38
+ max_action_dim: 132
39
+ action_horizon: 40
40
+ hidden_size: 1024
41
+ input_embedding_dim: 1536
42
+ state_history_length: 1
43
+ add_pos_embed: true
44
+ attn_dropout: 0.2
45
+ use_vlln: true
46
+ max_seq_len: 1024
47
+ use_alternate_vl_dit: true
48
+ attend_text_every_n_blocks: 2
49
+ diffusion_model_cfg:
50
+ positional_embeddings: null
51
+ num_layers: 16
52
+ num_attention_heads: 32
53
+ attention_head_dim: 48
54
+ norm_type: ada_norm
55
+ dropout: 0.2
56
+ final_dropout: true
57
+ output_dim: 1024
58
+ interleave_self_attention: true
59
+ num_inference_timesteps: 4
60
+ noise_beta_alpha: 1.5
61
+ noise_beta_beta: 1.0
62
+ noise_s: 0.999
63
+ num_timestep_buckets: 1000
64
+ tune_projector: true
65
+ tune_diffusion_model: true
66
+ tune_vlln: true
67
+ state_dropout_prob: 0.2
68
+ exclude_state: false
69
+ use_mean_std: false
70
+ max_num_embodiments: 32
71
+ data:
72
+ datasets:
73
+ - dataset_paths:
74
+ - /workspace/Isaac-GR00T/examples/so101/merged_v2_out/thinhpt/merged_data_29042026
75
+ embodiment_tag: new_embodiment
76
+ mix_ratio: 1.0
77
+ dataset_type: physical_embodiment
78
+ val_dataset_path: null
79
+ modality_configs:
80
+ new_embodiment:
81
+ video:
82
+ delta_indices:
83
+ - 0
84
+ modality_keys:
85
+ - top
86
+ - side
87
+ - wrist
88
+ sin_cos_embedding_keys: null
89
+ mean_std_embedding_keys: null
90
+ action_configs: null
91
+ state:
92
+ delta_indices:
93
+ - 0
94
+ modality_keys:
95
+ - single_arm
96
+ - gripper
97
+ sin_cos_embedding_keys: null
98
+ mean_std_embedding_keys: null
99
+ action_configs: null
100
+ action:
101
+ delta_indices:
102
+ - 0
103
+ - 1
104
+ - 2
105
+ - 3
106
+ - 4
107
+ - 5
108
+ - 6
109
+ - 7
110
+ - 8
111
+ - 9
112
+ - 10
113
+ - 11
114
+ - 12
115
+ - 13
116
+ - 14
117
+ - 15
118
+ modality_keys:
119
+ - single_arm
120
+ - gripper
121
+ sin_cos_embedding_keys: null
122
+ mean_std_embedding_keys: null
123
+ action_configs:
124
+ - rep: RELATIVE
125
+ type: NON_EEF
126
+ format: DEFAULT
127
+ state_key: null
128
+ - rep: ABSOLUTE
129
+ type: NON_EEF
130
+ format: DEFAULT
131
+ state_key: null
132
+ language:
133
+ delta_indices:
134
+ - 0
135
+ modality_keys:
136
+ - annotation.human.task_description
137
+ sin_cos_embedding_keys: null
138
+ mean_std_embedding_keys: null
139
+ action_configs: null
140
+ download_cache: false
141
+ shard_size: 1024
142
+ episode_sampling_rate: 0.1
143
+ num_shards_per_epoch: 100000
144
+ override_pretraining_statistics: true
145
+ mode: single_turn
146
+ random_chop: 0.0
147
+ mock_dataset_mode: false
148
+ shuffle: true
149
+ seed: 42
150
+ multiprocessing_context: fork
151
+ allow_padding: false
152
+ subsample_ratio: 1.0
153
+ image_crop_size:
154
+ - 244
155
+ - 244
156
+ image_target_size:
157
+ - 224
158
+ - 224
159
+ video_backend: torchcodec
160
+ training:
161
+ output_dir: /workspace/Isaac-GR00T/outputs/so101_from_a100
162
+ experiment_name: null
163
+ max_steps: 1
164
+ global_batch_size: 1
165
+ batch_size: null
166
+ gradient_accumulation_steps: 1
167
+ learning_rate: 0.0001
168
+ lr_scheduler_type: cosine
169
+ weight_decay: 1.0e-05
170
+ warmup_ratio: 0.05
171
+ warmup_steps: 0
172
+ max_grad_norm: 1.0
173
+ optim: adamw_torch
174
+ start_from_checkpoint: nvidia/GR00T-N1.7-3B
175
+ skip_weight_loading: false
176
+ tf32: true
177
+ fp16: false
178
+ bf16: true
179
+ eval_bf16: true
180
+ logging_steps: 10
181
+ save_steps: 1
182
+ save_total_limit: 5
183
+ save_vl_model: false
184
+ save_only_model: false
185
+ upload_checkpoints: false
186
+ upload_every: 1000
187
+ upload_last_n_checkpoints: 5
188
+ max_concurrent_uploads: 2
189
+ eval_strategy: 'no'
190
+ eval_steps: 500
191
+ eval_set_split_ratio: 0.1
192
+ eval_batch_size: 2
193
+ save_best_eval_metric_name: ''
194
+ save_best_eval_metric_greater_is_better: true
195
+ deepspeed_stage: 2
196
+ gradient_checkpointing: false
197
+ transformers_trust_remote_code: true
198
+ transformers_local_files_only: false
199
+ transformers_cache_dir: null
200
+ transformers_access_token: null
201
+ use_ddp: false
202
+ ddp_bucket_cap_mb: 100
203
+ num_gpus: 1
204
+ dataloader_num_workers: 4
205
+ remove_unused_columns: false
206
+ use_wandb: false
207
+ wandb_project: finetune-gr00t-n1d7
208
+ enable_profiling: false
209
+ max_retries: 3
210
+ assert_loss_less_than: null
211
+ add_rl_callback: false
212
+ enable_open_loop_eval: false
213
+ open_loop_eval_traj_ids:
214
+ - 0
215
+ open_loop_eval_steps_per_traj: 100
216
+ open_loop_eval_plot_indices: null
217
+ max_steps: 1
218
+ save_steps: 1
experiment_cfg/config.yaml ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !!python/object:gr00t.configs.base_config.Config
2
+ data: !!python/object:gr00t.configs.data.data_config.DataConfig
3
+ allow_padding: false
4
+ datasets:
5
+ - !!python/object:gr00t.configs.data.data_config.SingleDatasetConfig
6
+ dataset_paths:
7
+ - /workspace/Isaac-GR00T/examples/so101/merged_v2_out/thinhpt/merged_data_29042026
8
+ dataset_type: physical_embodiment
9
+ embodiment_tag: new_embodiment
10
+ mix_ratio: 1.0
11
+ val_dataset_path: null
12
+ download_cache: false
13
+ episode_sampling_rate: 0.1
14
+ image_crop_size:
15
+ - 244
16
+ - 244
17
+ image_target_size:
18
+ - 224
19
+ - 224
20
+ mock_dataset_mode: false
21
+ modality_configs:
22
+ new_embodiment:
23
+ action: !!python/object:gr00t.data.types.ModalityConfig
24
+ action_configs:
25
+ - !!python/object:gr00t.data.types.ActionConfig
26
+ format: &id001 !!python/object/apply:gr00t.data.types.ActionFormat
27
+ - default
28
+ rep: !!python/object/apply:gr00t.data.types.ActionRepresentation
29
+ - relative
30
+ state_key: null
31
+ type: &id002 !!python/object/apply:gr00t.data.types.ActionType
32
+ - non_eef
33
+ - !!python/object:gr00t.data.types.ActionConfig
34
+ format: *id001
35
+ rep: !!python/object/apply:gr00t.data.types.ActionRepresentation
36
+ - absolute
37
+ state_key: null
38
+ type: *id002
39
+ delta_indices:
40
+ - 0
41
+ - 1
42
+ - 2
43
+ - 3
44
+ - 4
45
+ - 5
46
+ - 6
47
+ - 7
48
+ - 8
49
+ - 9
50
+ - 10
51
+ - 11
52
+ - 12
53
+ - 13
54
+ - 14
55
+ - 15
56
+ mean_std_embedding_keys: null
57
+ modality_keys:
58
+ - single_arm
59
+ - gripper
60
+ sin_cos_embedding_keys: null
61
+ language: !!python/object:gr00t.data.types.ModalityConfig
62
+ action_configs: null
63
+ delta_indices:
64
+ - 0
65
+ mean_std_embedding_keys: null
66
+ modality_keys:
67
+ - annotation.human.task_description
68
+ sin_cos_embedding_keys: null
69
+ state: !!python/object:gr00t.data.types.ModalityConfig
70
+ action_configs: null
71
+ delta_indices:
72
+ - 0
73
+ mean_std_embedding_keys: null
74
+ modality_keys:
75
+ - single_arm
76
+ - gripper
77
+ sin_cos_embedding_keys: null
78
+ video: !!python/object:gr00t.data.types.ModalityConfig
79
+ action_configs: null
80
+ delta_indices:
81
+ - 0
82
+ mean_std_embedding_keys: null
83
+ modality_keys:
84
+ - top
85
+ - side
86
+ - wrist
87
+ sin_cos_embedding_keys: null
88
+ mode: single_turn
89
+ multiprocessing_context: fork
90
+ num_shards_per_epoch: 100000
91
+ override_pretraining_statistics: true
92
+ random_chop: 0.0
93
+ seed: 42
94
+ shard_size: 1024
95
+ shuffle: true
96
+ subsample_ratio: 1.0
97
+ video_backend: torchcodec
98
+ load_config_path: null
99
+ model: !!python/object:gr00t.configs.model.gr00t_n1d7.Gr00tN1d7Config
100
+ _attn_implementation_internal: null
101
+ _commit_hash: null
102
+ _name_or_path: ''
103
+ _output_attentions: false
104
+ add_cross_attention: false
105
+ architectures: null
106
+ backbone_trainable_params_fp32: true
107
+ bad_words_ids: null
108
+ begin_suppress_tokens: null
109
+ bos_token_id: null
110
+ chunk_size_feed_forward: 0
111
+ color_jitter_params:
112
+ brightness: 0.3
113
+ contrast: 0.4
114
+ hue: 0.08
115
+ saturation: 0.5
116
+ cross_attention_hidden_size: null
117
+ decoder_start_token_id: null
118
+ diffusion_model_cfg:
119
+ attention_head_dim: 48
120
+ dropout: 0.2
121
+ final_dropout: true
122
+ interleave_self_attention: true
123
+ norm_type: ada_norm
124
+ num_attention_heads: 32
125
+ num_layers: 16
126
+ output_dim: 1024
127
+ positional_embeddings: null
128
+ diversity_penalty: 0.0
129
+ do_sample: false
130
+ dtype: null
131
+ early_stopping: false
132
+ encoder_no_repeat_ngram_size: 0
133
+ eos_token_id: null
134
+ exponential_decay_length_penalty: null
135
+ extra_augmentation_config: null
136
+ finetuning_task: null
137
+ forced_bos_token_id: null
138
+ forced_eos_token_id: null
139
+ id2label:
140
+ 0: LABEL_0
141
+ 1: LABEL_1
142
+ is_decoder: false
143
+ is_encoder_decoder: false
144
+ label2id:
145
+ LABEL_0: 0
146
+ LABEL_1: 1
147
+ length_penalty: 1.0
148
+ load_bf16: false
149
+ max_length: 20
150
+ min_length: 0
151
+ model_name: nvidia/Cosmos-Reason2-2B
152
+ no_repeat_ngram_size: 0
153
+ num_beam_groups: 1
154
+ num_beams: 1
155
+ num_return_sequences: 1
156
+ output_hidden_states: false
157
+ output_scores: false
158
+ pad_token_id: null
159
+ prefix: null
160
+ problem_type: null
161
+ pruned_heads: {}
162
+ random_rotation_angle: null
163
+ remove_invalid_values: false
164
+ repetition_penalty: 1.0
165
+ reproject_vision: false
166
+ return_dict: true
167
+ return_dict_in_generate: false
168
+ sep_token_id: null
169
+ state_dropout_prob: 0.2
170
+ suppress_tokens: null
171
+ task_specific_params: null
172
+ temperature: 1.0
173
+ tf_legacy_loss: false
174
+ tie_encoder_decoder: false
175
+ tie_word_embeddings: true
176
+ tokenizer_class: null
177
+ top_k: 50
178
+ top_p: 1.0
179
+ torchscript: false
180
+ transformers_version: null
181
+ tune_diffusion_model: true
182
+ tune_llm: false
183
+ tune_projector: true
184
+ tune_visual: false
185
+ typical_p: 1.0
186
+ use_bfloat16: false
187
+ use_relative_action: true
188
+ training: !!python/object:gr00t.configs.training.training_config.TrainingConfig
189
+ add_rl_callback: false
190
+ assert_loss_less_than: null
191
+ batch_size: null
192
+ bf16: true
193
+ dataloader_num_workers: 4
194
+ ddp_bucket_cap_mb: 100
195
+ deepspeed_stage: 2
196
+ enable_open_loop_eval: false
197
+ enable_profiling: false
198
+ eval_batch_size: 2
199
+ eval_bf16: true
200
+ eval_set_split_ratio: 0.1
201
+ eval_steps: 500
202
+ eval_strategy: 'no'
203
+ experiment_name: null
204
+ fp16: false
205
+ global_batch_size: 1
206
+ gradient_accumulation_steps: 1
207
+ gradient_checkpointing: false
208
+ learning_rate: 0.0001
209
+ logging_steps: 10
210
+ lr_scheduler_type: cosine
211
+ max_concurrent_uploads: 2
212
+ max_grad_norm: 1.0
213
+ max_retries: 3
214
+ max_steps: 1
215
+ num_gpus: 1
216
+ open_loop_eval_plot_indices: null
217
+ open_loop_eval_steps_per_traj: 100
218
+ open_loop_eval_traj_ids:
219
+ - 0
220
+ optim: adamw_torch
221
+ output_dir: /workspace/Isaac-GR00T/outputs/so101_from_a100
222
+ remove_unused_columns: false
223
+ save_best_eval_metric_greater_is_better: true
224
+ save_best_eval_metric_name: ''
225
+ save_only_model: false
226
+ save_steps: 1
227
+ save_total_limit: 5
228
+ save_vl_model: false
229
+ skip_weight_loading: false
230
+ start_from_checkpoint: nvidia/GR00T-N1.7-3B
231
+ tf32: true
232
+ transformers_access_token: null
233
+ transformers_cache_dir: null
234
+ transformers_local_files_only: false
235
+ transformers_trust_remote_code: true
236
+ upload_checkpoints: false
237
+ upload_every: 1000
238
+ upload_last_n_checkpoints: 5
239
+ use_ddp: false
240
+ use_wandb: false
241
+ wandb_project: finetune-gr00t-n1d7
242
+ warmup_ratio: 0.05
243
+ warmup_steps: 0
244
+ weight_decay: 1.0e-05
experiment_cfg/dataset_statistics.json ADDED
@@ -0,0 +1,824 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "state": {
4
+ "single_arm": {
5
+ "min": [
6
+ -54.74613571166992,
7
+ -98.57022857666016,
8
+ -43.726234436035156,
9
+ 46.123172760009766,
10
+ -21.846965789794922
11
+ ],
12
+ "max": [
13
+ 33.480499267578125,
14
+ 44.49116897583008,
15
+ 99.6197738647461,
16
+ 99.37970733642578,
17
+ 44.96042251586914
18
+ ],
19
+ "mean": [
20
+ -3.71223117675045,
21
+ -28.96169491491071,
22
+ 38.268331990254325,
23
+ 76.11713071680639,
24
+ 8.719410478347504
25
+ ],
26
+ "std": [
27
+ 19.7703417672378,
28
+ 45.76361889878161,
29
+ 36.95550118330955,
30
+ 10.436981253480331,
31
+ 10.28527453206456
32
+ ],
33
+ "q01": [
34
+ -28.35573358165663,
35
+ -98.37625679512502,
36
+ -6.215841300198603,
37
+ 61.27545041751672,
38
+ -3.5717462805583824
39
+ ],
40
+ "q99": [
41
+ 22.835918021244282,
42
+ 16.923880809559762,
43
+ 94.53338718633756,
44
+ 94.08840435465513,
45
+ 19.448960641935898
46
+ ]
47
+ },
48
+ "gripper": {
49
+ "min": [
50
+ 1.6655563116073608
51
+ ],
52
+ "max": [
53
+ 44.7701530456543
54
+ ],
55
+ "mean": [
56
+ 12.234805392212133
57
+ ],
58
+ "std": [
59
+ 10.744181979537759
60
+ ],
61
+ "q01": [
62
+ 2.819680079656035
63
+ ],
64
+ "q99": [
65
+ 29.035170235970288
66
+ ]
67
+ }
68
+ },
69
+ "action": {
70
+ "single_arm": {
71
+ "min": [
72
+ -54.81263732910156,
73
+ -100.0,
74
+ -47.463768005371094,
75
+ 45.159915924072266,
76
+ -22.161027908325195
77
+ ],
78
+ "max": [
79
+ 34.09257888793945,
80
+ 43.78151321411133,
81
+ 99.18478393554688,
82
+ 100.0,
83
+ 45.239967346191406
84
+ ],
85
+ "mean": [
86
+ -3.654850216570309,
87
+ -29.786135515368066,
88
+ 36.804539250043725,
89
+ 76.00833611622012,
90
+ 8.795426704788387
91
+ ],
92
+ "std": [
93
+ 19.86646705175531,
94
+ 45.46621788789847,
95
+ 37.49960349292722,
96
+ 10.664087697927368,
97
+ 10.308307671275646
98
+ ],
99
+ "q01": [
100
+ -28.57498577516432,
101
+ -99.09725515198915,
102
+ -9.289173182009739,
103
+ 60.45941761745114,
104
+ -3.646041259007378
105
+ ],
106
+ "q99": [
107
+ 23.24454320656618,
108
+ 16.751953349857775,
109
+ 94.22454394741354,
110
+ 94.70154876475645,
111
+ 19.739092304095912
112
+ ]
113
+ },
114
+ "gripper": {
115
+ "min": [
116
+ 0.0
117
+ ],
118
+ "max": [
119
+ 45.26813888549805
120
+ ],
121
+ "mean": [
122
+ 11.791910935375741
123
+ ],
124
+ "std": [
125
+ 11.282201040172414
126
+ ],
127
+ "q01": [
128
+ 1.7659824175548617
129
+ ],
130
+ "q99": [
131
+ 29.41554971485232
132
+ ]
133
+ }
134
+ },
135
+ "relative_action": {
136
+ "single_arm": {
137
+ "min": [
138
+ [
139
+ -11.495086669921875,
140
+ -26.346839904785156,
141
+ -37.27923583984375,
142
+ -18.473644256591797,
143
+ -6.521447658538818
144
+ ],
145
+ [
146
+ -14.360626220703125,
147
+ -32.31824493408203,
148
+ -44.78874206542969,
149
+ -22.993900299072266,
150
+ -8.119771003723145
151
+ ],
152
+ [
153
+ -17.15679931640625,
154
+ -37.86912536621094,
155
+ -51.7279052734375,
156
+ -27.247333526611328,
157
+ -9.588430404663086
158
+ ],
159
+ [
160
+ -19.658639907836914,
161
+ -43.504112243652344,
162
+ -58.52138137817383,
163
+ -31.146312713623047,
164
+ -10.847281455993652
165
+ ],
166
+ [
167
+ -21.789426803588867,
168
+ -48.88678741455078,
169
+ -65.27043151855469,
170
+ -34.89897537231445,
171
+ -12.061001777648926
172
+ ],
173
+ [
174
+ -23.629013061523438,
175
+ -54.09234619140625,
176
+ -71.73430633544922,
177
+ -38.620731353759766,
178
+ -13.162496566772461
179
+ ],
180
+ [
181
+ -25.321434020996094,
182
+ -59.47502136230469,
183
+ -77.62784576416016,
184
+ -41.81081008911133,
185
+ -14.21790599822998
186
+ ],
187
+ [
188
+ -26.866689682006836,
189
+ -64.76913452148438,
190
+ -83.5213851928711,
191
+ -44.7958869934082,
192
+ -15.214496612548828
193
+ ],
194
+ [
195
+ -28.338359832763672,
196
+ -69.81539154052734,
197
+ -89.12974548339844,
198
+ -47.09982681274414,
199
+ -16.148725509643555
200
+ ],
201
+ [
202
+ -29.70535659790039,
203
+ -74.60933685302734,
204
+ -94.35787963867188,
205
+ -49.40259552001953,
206
+ -17.457027435302734
207
+ ],
208
+ [
209
+ -31.689193725585938,
210
+ -79.23507690429688,
211
+ -99.49095916748047,
212
+ -51.17485809326172,
213
+ -18.934598922729492
214
+ ],
215
+ [
216
+ -33.74650573730469,
217
+ -83.60482788085938,
218
+ -104.47283935546875,
219
+ -52.28360366821289,
220
+ -20.51771354675293
221
+ ],
222
+ [
223
+ -35.65967559814453,
224
+ -87.72593688964844,
225
+ -109.22569274902344,
226
+ -53.08112716674805,
227
+ -22.04805564880371
228
+ ],
229
+ [
230
+ -37.496559143066406,
231
+ -91.6755142211914,
232
+ -113.50326538085938,
233
+ -53.25835037231445,
234
+ -23.63117027282715
235
+ ],
236
+ [
237
+ -39.33344268798828,
238
+ -95.460205078125,
239
+ -117.40060424804688,
240
+ -53.169734954833984,
241
+ -25.16151237487793
242
+ ],
243
+ [
244
+ -41.02586364746094,
245
+ -98.82437896728516,
246
+ -120.7520523071289,
247
+ -53.169734954833984,
248
+ -26.48077392578125
249
+ ]
250
+ ],
251
+ "max": [
252
+ [
253
+ 12.493302345275879,
254
+ 26.105972290039062,
255
+ 30.445114135742188,
256
+ 12.794731140136719,
257
+ 11.564742088317871
258
+ ],
259
+ [
260
+ 15.750332832336426,
261
+ 31.736225128173828,
262
+ 37.09910583496094,
263
+ 15.896194458007812,
264
+ 13.977540016174316
265
+ ],
266
+ [
267
+ 19.13020133972168,
268
+ 37.203006744384766,
269
+ 43.658042907714844,
270
+ 18.710693359375,
271
+ 16.2994384765625
272
+ ],
273
+ [
274
+ 22.294292449951172,
275
+ 42.249263763427734,
276
+ 50.089202880859375,
277
+ 21.36908721923828,
278
+ 18.040863037109375
279
+ ],
280
+ [
281
+ 25.458383560180664,
282
+ 47.207244873046875,
283
+ 56.339202880859375,
284
+ 23.910110473632812,
285
+ 19.614429473876953
286
+ ],
287
+ [
288
+ 28.61782455444336,
289
+ 52.08119583129883,
290
+ 62.13768005371094,
291
+ 26.479888916015625,
292
+ 20.93368911743164
293
+ ],
294
+ [
295
+ 31.63031578063965,
296
+ 56.62282180786133,
297
+ 67.5559310913086,
298
+ 28.606605529785156,
299
+ 22.16421127319336
300
+ ],
301
+ [
302
+ 34.57365417480469,
303
+ 60.82803726196289,
304
+ 72.5378189086914,
305
+ 30.482940673828125,
306
+ 24.314748764038086
307
+ ],
308
+ [
309
+ 37.52121353149414,
310
+ 65.02971649169922,
311
+ 77.38572692871094,
312
+ 32.077980041503906,
313
+ 26.465286254882812
314
+ ],
315
+ [
316
+ 40.74937438964844,
317
+ 68.73030853271484,
318
+ 81.73355102539062,
319
+ 33.05272674560547,
320
+ 28.470563888549805
321
+ ],
322
+ [
323
+ 43.98704528808594,
324
+ 72.0916519165039,
325
+ 85.90021514892578,
326
+ 34.44295120239258,
327
+ 30.358840942382812
328
+ ],
329
+ [
330
+ 46.99953842163086,
331
+ 75.61852264404297,
332
+ 89.70249938964844,
333
+ 36.21521759033203,
334
+ 31.941953659057617
335
+ ],
336
+ [
337
+ 49.86929702758789,
338
+ 79.2350082397461,
339
+ 93.31466674804688,
340
+ 37.83568572998047,
341
+ 33.30570983886719
342
+ ],
343
+ [
344
+ 52.73905563354492,
345
+ 82.84844970703125,
346
+ 96.39437866210938,
347
+ 39.430721282958984,
348
+ 34.72191619873047
349
+ ],
350
+ [
351
+ 55.4576416015625,
352
+ 85.87620544433594,
353
+ 99.34114074707031,
354
+ 40.79533004760742,
355
+ 35.935638427734375
356
+ ],
357
+ [
358
+ 57.95948028564453,
359
+ 88.73335266113281,
360
+ 101.96795654296875,
361
+ 41.59284973144531,
362
+ 37.29207229614258
363
+ ]
364
+ ],
365
+ "mean": [
366
+ [
367
+ 0.055648088455200195,
368
+ -0.8320212960243225,
369
+ -1.514521598815918,
370
+ -0.11044922471046448,
371
+ 0.0779842957854271
372
+ ],
373
+ [
374
+ 0.053355250507593155,
375
+ -0.831329882144928,
376
+ -1.5161784887313843,
377
+ -0.11004702746868134,
378
+ 0.07830151915550232
379
+ ],
380
+ [
381
+ 0.05104710906744003,
382
+ -0.8308613896369934,
383
+ -1.5175775289535522,
384
+ -0.1095501035451889,
385
+ 0.07863958179950714
386
+ ],
387
+ [
388
+ 0.048748865723609924,
389
+ -0.8305501341819763,
390
+ -1.5187740325927734,
391
+ -0.10894875973463058,
392
+ 0.07897468656301498
393
+ ],
394
+ [
395
+ 0.046457186341285706,
396
+ -0.8303706049919128,
397
+ -1.5198224782943726,
398
+ -0.10824387520551682,
399
+ 0.07932591438293457
400
+ ],
401
+ [
402
+ 0.044185150414705276,
403
+ -0.8302662372589111,
404
+ -1.5207382440567017,
405
+ -0.10747325420379639,
406
+ 0.07968516647815704
407
+ ],
408
+ [
409
+ 0.04192883148789406,
410
+ -0.8302180171012878,
411
+ -1.5215797424316406,
412
+ -0.10666685551404953,
413
+ 0.08004017919301987
414
+ ],
415
+ [
416
+ 0.03968740254640579,
417
+ -0.8302181363105774,
418
+ -1.5223520994186401,
419
+ -0.10584192723035812,
420
+ 0.08038759976625443
421
+ ],
422
+ [
423
+ 0.037463363260030746,
424
+ -0.830231785774231,
425
+ -1.5230622291564941,
426
+ -0.10499752312898636,
427
+ 0.0807308703660965
428
+ ],
429
+ [
430
+ 0.03524709865450859,
431
+ -0.8302584290504456,
432
+ -1.5237103700637817,
433
+ -0.10413610190153122,
434
+ 0.08106588572263718
435
+ ],
436
+ [
437
+ 0.0330469124019146,
438
+ -0.8302862048149109,
439
+ -1.5243130922317505,
440
+ -0.10326503962278366,
441
+ 0.08139366656541824
442
+ ],
443
+ [
444
+ 0.03083740547299385,
445
+ -0.8303185701370239,
446
+ -1.524881362915039,
447
+ -0.10238658636808395,
448
+ 0.08171631395816803
449
+ ],
450
+ [
451
+ 0.0286197941750288,
452
+ -0.8303502202033997,
453
+ -1.5253970623016357,
454
+ -0.10150988399982452,
455
+ 0.08205557614564896
456
+ ],
457
+ [
458
+ 0.026385115459561348,
459
+ -0.8303899168968201,
460
+ -1.5258857011795044,
461
+ -0.10063113272190094,
462
+ 0.08241897076368332
463
+ ],
464
+ [
465
+ 0.024145090952515602,
466
+ -0.8304489850997925,
467
+ -1.5263406038284302,
468
+ -0.09974220395088196,
469
+ 0.08279348164796829
470
+ ],
471
+ [
472
+ 0.021909138187766075,
473
+ -0.830514132976532,
474
+ -1.52675199508667,
475
+ -0.09885106235742569,
476
+ 0.0831804946064949
477
+ ]
478
+ ],
479
+ "std": [
480
+ [
481
+ 2.3281056880950928,
482
+ 5.052164554595947,
483
+ 5.022761344909668,
484
+ 2.1672933101654053,
485
+ 1.1374194622039795
486
+ ],
487
+ [
488
+ 2.95160174369812,
489
+ 6.290643692016602,
490
+ 6.232492923736572,
491
+ 2.734529733657837,
492
+ 1.4302462339401245
493
+ ],
494
+ [
495
+ 3.5650782585144043,
496
+ 7.519575595855713,
497
+ 7.430261135101318,
498
+ 3.2848517894744873,
499
+ 1.710359811782837
500
+ ],
501
+ [
502
+ 4.166775226593018,
503
+ 8.734842300415039,
504
+ 8.610282897949219,
505
+ 3.8151462078094482,
506
+ 1.9777880907058716
507
+ ],
508
+ [
509
+ 4.755683422088623,
510
+ 9.933793067932129,
511
+ 9.769296646118164,
512
+ 4.323939323425293,
513
+ 2.2334964275360107
514
+ ],
515
+ [
516
+ 5.3314666748046875,
517
+ 11.115108489990234,
518
+ 10.905523300170898,
519
+ 4.810477256774902,
520
+ 2.47870135307312
521
+ ],
522
+ [
523
+ 5.89393949508667,
524
+ 12.278027534484863,
525
+ 12.017640113830566,
526
+ 5.27458381652832,
527
+ 2.7145771980285645
528
+ ],
529
+ [
530
+ 6.4430437088012695,
531
+ 13.421923637390137,
532
+ 13.105111122131348,
533
+ 5.716440677642822,
534
+ 2.942037343978882
535
+ ],
536
+ [
537
+ 6.979062557220459,
538
+ 14.54690933227539,
539
+ 14.167784690856934,
540
+ 6.136516571044922,
541
+ 3.1618080139160156
542
+ ],
543
+ [
544
+ 7.502256393432617,
545
+ 15.652772903442383,
546
+ 15.205517768859863,
547
+ 6.5353217124938965,
548
+ 3.374497175216675
549
+ ],
550
+ [
551
+ 8.012919425964355,
552
+ 16.739797592163086,
553
+ 16.218400955200195,
554
+ 6.913644313812256,
555
+ 3.5806076526641846
556
+ ],
557
+ [
558
+ 8.511361122131348,
559
+ 17.80817985534668,
560
+ 17.206693649291992,
561
+ 7.272296905517578,
562
+ 3.780578851699829
563
+ ],
564
+ [
565
+ 8.997858047485352,
566
+ 18.858156204223633,
567
+ 18.170631408691406,
568
+ 7.611952304840088,
569
+ 3.9747884273529053
570
+ ],
571
+ [
572
+ 9.472798347473145,
573
+ 19.890417098999023,
574
+ 19.110652923583984,
575
+ 7.9334397315979,
576
+ 4.163602828979492
577
+ ],
578
+ [
579
+ 9.936485290527344,
580
+ 20.905296325683594,
581
+ 20.026742935180664,
582
+ 8.237541198730469,
583
+ 4.34731912612915
584
+ ],
585
+ [
586
+ 10.389312744140625,
587
+ 21.903194427490234,
588
+ 20.919414520263672,
589
+ 8.524994850158691,
590
+ 4.526242733001709
591
+ ]
592
+ ],
593
+ "q01": [
594
+ [
595
+ -6.665879402160645,
596
+ -14.016157150268555,
597
+ -18.17994812011719,
598
+ -6.566455993652344,
599
+ -3.179898805618286
600
+ ],
601
+ [
602
+ -8.507232131958007,
603
+ -17.456420440673828,
604
+ -22.29572769165039,
605
+ -8.339033508300782,
606
+ -4.072495708465576
607
+ ],
608
+ [
609
+ -10.31580192565918,
610
+ -20.88756576538086,
611
+ -26.324436950683594,
612
+ -10.068443908691407,
613
+ -4.926240749359131
614
+ ],
615
+ [
616
+ -11.974199905395508,
617
+ -24.180819549560546,
618
+ -30.219016723632812,
619
+ -11.673286437988281,
620
+ -5.754124221801757
621
+ ],
622
+ [
623
+ -13.622781486511231,
624
+ -27.586204528808594,
625
+ -34.089948883056636,
626
+ -13.132738342285156,
627
+ -6.494998493194579
628
+ ],
629
+ [
630
+ -15.208937492370605,
631
+ -30.715314025878907,
632
+ -37.71124755859375,
633
+ -14.523447875976562,
634
+ -7.202737331390381
635
+ ],
636
+ [
637
+ -16.70104965209961,
638
+ -34.017022399902345,
639
+ -41.36379302978516,
640
+ -15.868139343261719,
641
+ -7.887644081115722
642
+ ],
643
+ [
644
+ -18.089058227539063,
645
+ -37.035921325683596,
646
+ -45.10492156982422,
647
+ -17.046712951660155,
648
+ -8.534011497497557
649
+ ],
650
+ [
651
+ -19.427554321289062,
652
+ -40.144180603027344,
653
+ -48.51883255004883,
654
+ -18.154289855957032,
655
+ -9.11346351623535
656
+ ],
657
+ [
658
+ -20.61064712524414,
659
+ -43.16117599487305,
660
+ -51.93670974731445,
661
+ -19.193991088867186,
662
+ -9.762433395385742
663
+ ],
664
+ [
665
+ -21.711649627685546,
666
+ -45.93755310058594,
667
+ -55.125614166259766,
668
+ -20.116388397216795,
669
+ -10.3573006439209
670
+ ],
671
+ [
672
+ -22.738679428100586,
673
+ -48.97865203857422,
674
+ -58.49867599487305,
675
+ -20.9490185546875,
676
+ -10.945838890075683
677
+ ],
678
+ [
679
+ -23.753029708862304,
680
+ -51.72798324584961,
681
+ -61.444978332519526,
682
+ -21.6744921875,
683
+ -11.450961227416991
684
+ ],
685
+ [
686
+ -24.652152328491212,
687
+ -54.50247207641601,
688
+ -64.40270690917968,
689
+ -22.302978515625,
690
+ -11.953850250244141
691
+ ],
692
+ [
693
+ -25.526405487060547,
694
+ -57.1378970336914,
695
+ -67.23038177490234,
696
+ -22.905455627441405,
697
+ -12.480633811950684
698
+ ],
699
+ [
700
+ -26.35790428161621,
701
+ -59.974665832519534,
702
+ -70.08912353515625,
703
+ -23.482775268554686,
704
+ -12.981596565246582
705
+ ]
706
+ ],
707
+ "q99": [
708
+ [
709
+ 8.16102737426758,
710
+ 17.203563842773438,
711
+ 12.228648223876954,
712
+ 6.190132141113281,
713
+ 4.2718777465820335
714
+ ],
715
+ [
716
+ 10.346405105590824,
717
+ 21.301721725463867,
718
+ 15.796453704833986,
719
+ 7.958911743164063,
720
+ 5.424470520019533
721
+ ],
722
+ [
723
+ 12.521966819763184,
724
+ 25.372473144531252,
725
+ 19.33406288146973,
726
+ 9.619446105957032,
727
+ 6.516082611083985
728
+ ],
729
+ [
730
+ 14.666381454467773,
731
+ 29.38535583496094,
732
+ 22.816484527587896,
733
+ 11.262754211425781,
734
+ 7.482540168762211
735
+ ],
736
+ [
737
+ 16.824214706420904,
738
+ 33.260471496582035,
739
+ 26.28074981689453,
740
+ 12.785390930175783,
741
+ 8.422623748779298
742
+ ],
743
+ [
744
+ 18.910324478149413,
745
+ 37.02602508544924,
746
+ 29.526105651855474,
747
+ 14.186925659179687,
748
+ 9.320058593750003
749
+ ],
750
+ [
751
+ 20.967150573730468,
752
+ 40.583617248535155,
753
+ 32.72448623657227,
754
+ 15.530707397460938,
755
+ 10.081392669677737
756
+ ],
757
+ [
758
+ 22.92281455993653,
759
+ 43.98616302490237,
760
+ 35.79875701904297,
761
+ 16.797211303710938,
762
+ 10.83802040100098
763
+ ],
764
+ [
765
+ 24.86040229797364,
766
+ 47.42544555664064,
767
+ 38.77677886962892,
768
+ 18.068880004882814,
769
+ 11.41708156585694
770
+ ],
771
+ [
772
+ 26.8188102722168,
773
+ 50.64716796875,
774
+ 41.6482504272461,
775
+ 19.176239013671875,
776
+ 12.060648956298829
777
+ ],
778
+ [
779
+ 28.741224975585936,
780
+ 53.73454513549807,
781
+ 44.526905212402355,
782
+ 20.276117095947267,
783
+ 12.697874450683594
784
+ ],
785
+ [
786
+ 30.602312393188477,
787
+ 56.670256195068376,
788
+ 47.06060317993165,
789
+ 21.301702880859377,
790
+ 13.154049987792968
791
+ ],
792
+ [
793
+ 32.47877929687501,
794
+ 59.32594985961914,
795
+ 49.71035003662111,
796
+ 22.218926696777345,
797
+ 13.643209037780762
798
+ ],
799
+ [
800
+ 34.18646453857422,
801
+ 62.087795410156254,
802
+ 52.03034790039063,
803
+ 23.099743499755867,
804
+ 14.102179527282715
805
+ ],
806
+ [
807
+ 35.960482940673835,
808
+ 64.44493347167969,
809
+ 54.46940231323242,
810
+ 23.925567626953125,
811
+ 14.555956344604493
812
+ ],
813
+ [
814
+ 37.56228790283204,
815
+ 66.62295715332031,
816
+ 56.67596618652344,
817
+ 24.686989135742195,
818
+ 14.986904106140141
819
+ ]
820
+ ]
821
+ }
822
+ }
823
+ }
824
+ }
experiment_cfg/final_model_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "Gr00tN1d7",
3
+ "model_dtype": "bfloat16",
4
+ "model_name": "nvidia/Cosmos-Reason2-2B",
5
+ "backbone_model_type": "qwen",
6
+ "model_revision": null,
7
+ "tune_top_llm_layers": 0,
8
+ "backbone_embedding_dim": 2048,
9
+ "tune_llm": false,
10
+ "tune_visual": false,
11
+ "select_layer": 16,
12
+ "reproject_vision": false,
13
+ "use_flash_attention": true,
14
+ "load_bf16": false,
15
+ "backbone_trainable_params_fp32": true,
16
+ "extra_augmentation_config": null,
17
+ "apply_sincos_state_encoding": false,
18
+ "use_percentiles": true,
19
+ "use_relative_action": false,
20
+ "max_state_dim": 132,
21
+ "max_action_dim": 132,
22
+ "action_horizon": 40,
23
+ "hidden_size": 1024,
24
+ "input_embedding_dim": 1536,
25
+ "state_history_length": 1,
26
+ "add_pos_embed": true,
27
+ "attn_dropout": 0.2,
28
+ "use_vlln": true,
29
+ "max_seq_len": 1024,
30
+ "use_alternate_vl_dit": true,
31
+ "attend_text_every_n_blocks": 2,
32
+ "diffusion_model_cfg": {
33
+ "attention_head_dim": 48,
34
+ "dropout": 0.2,
35
+ "final_dropout": true,
36
+ "interleave_self_attention": true,
37
+ "norm_type": "ada_norm",
38
+ "num_attention_heads": 32,
39
+ "num_layers": 32,
40
+ "output_dim": 1024,
41
+ "positional_embeddings": null
42
+ },
43
+ "num_inference_timesteps": 4,
44
+ "noise_beta_alpha": 1.5,
45
+ "noise_beta_beta": 1.0,
46
+ "noise_s": 0.999,
47
+ "num_timestep_buckets": 1000,
48
+ "tune_projector": true,
49
+ "tune_diffusion_model": true,
50
+ "tune_vlln": true,
51
+ "state_dropout_prob": 0.2,
52
+ "exclude_state": false,
53
+ "use_mean_std": false,
54
+ "max_num_embodiments": 32
55
+ }
experiment_cfg/final_processor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbcaea5ee88f1e0f1465043920a2647c67e7de17d24adfd1c477742a6168edec
3
+ size 4986649584
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c39153e2c89aa5c7b6423b2fb05725d21fa1b594fe61dbebaffa7956736848e3
3
+ size 4970792616
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0a78a2f4ce9af011f57e67e441ddd1b02e358278800f883914fae334bfed225
3
+ size 2618758696
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88d086b13a9eb7c0a94e3d19d85a32160715d57b185332c1bcef1a8775354d52
3
+ size 12964594710
processor_config.json ADDED
@@ -0,0 +1,1124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processor_class": "Gr00tN1d7Processor",
3
+ "processor_kwargs": {
4
+ "modality_configs": {
5
+ "real_g1_relative_eef_relative_joints": {
6
+ "video": {
7
+ "delta_indices": [
8
+ -20,
9
+ 0
10
+ ],
11
+ "modality_keys": [
12
+ "ego_view"
13
+ ],
14
+ "sin_cos_embedding_keys": null,
15
+ "mean_std_embedding_keys": null,
16
+ "action_configs": null
17
+ },
18
+ "state": {
19
+ "delta_indices": [
20
+ 0
21
+ ],
22
+ "modality_keys": [
23
+ "left_wrist_eef_9d",
24
+ "right_wrist_eef_9d",
25
+ "left_hand",
26
+ "right_hand",
27
+ "left_arm",
28
+ "right_arm",
29
+ "waist"
30
+ ],
31
+ "sin_cos_embedding_keys": null,
32
+ "mean_std_embedding_keys": null,
33
+ "action_configs": null
34
+ },
35
+ "action": {
36
+ "delta_indices": [
37
+ 0,
38
+ 1,
39
+ 2,
40
+ 3,
41
+ 4,
42
+ 5,
43
+ 6,
44
+ 7,
45
+ 8,
46
+ 9,
47
+ 10,
48
+ 11,
49
+ 12,
50
+ 13,
51
+ 14,
52
+ 15,
53
+ 16,
54
+ 17,
55
+ 18,
56
+ 19,
57
+ 20,
58
+ 21,
59
+ 22,
60
+ 23,
61
+ 24,
62
+ 25,
63
+ 26,
64
+ 27,
65
+ 28,
66
+ 29,
67
+ 30,
68
+ 31,
69
+ 32,
70
+ 33,
71
+ 34,
72
+ 35,
73
+ 36,
74
+ 37,
75
+ 38,
76
+ 39
77
+ ],
78
+ "modality_keys": [
79
+ "left_wrist_eef_9d",
80
+ "right_wrist_eef_9d",
81
+ "left_hand",
82
+ "right_hand",
83
+ "left_arm",
84
+ "right_arm",
85
+ "waist",
86
+ "base_height_command",
87
+ "navigate_command"
88
+ ],
89
+ "sin_cos_embedding_keys": null,
90
+ "mean_std_embedding_keys": null,
91
+ "action_configs": [
92
+ {
93
+ "rep": "RELATIVE",
94
+ "type": "EEF",
95
+ "format": "XYZ_ROT6D",
96
+ "state_key": "left_wrist_eef_9d"
97
+ },
98
+ {
99
+ "rep": "RELATIVE",
100
+ "type": "EEF",
101
+ "format": "XYZ_ROT6D",
102
+ "state_key": "right_wrist_eef_9d"
103
+ },
104
+ {
105
+ "rep": "ABSOLUTE",
106
+ "type": "NON_EEF",
107
+ "format": "DEFAULT",
108
+ "state_key": "left_hand"
109
+ },
110
+ {
111
+ "rep": "ABSOLUTE",
112
+ "type": "NON_EEF",
113
+ "format": "DEFAULT",
114
+ "state_key": "right_hand"
115
+ },
116
+ {
117
+ "rep": "RELATIVE",
118
+ "type": "NON_EEF",
119
+ "format": "DEFAULT",
120
+ "state_key": "left_arm"
121
+ },
122
+ {
123
+ "rep": "RELATIVE",
124
+ "type": "NON_EEF",
125
+ "format": "DEFAULT",
126
+ "state_key": "right_arm"
127
+ },
128
+ {
129
+ "rep": "ABSOLUTE",
130
+ "type": "NON_EEF",
131
+ "format": "DEFAULT",
132
+ "state_key": "waist"
133
+ },
134
+ {
135
+ "rep": "ABSOLUTE",
136
+ "type": "NON_EEF",
137
+ "format": "DEFAULT",
138
+ "state_key": "base_height_command"
139
+ },
140
+ {
141
+ "rep": "ABSOLUTE",
142
+ "type": "NON_EEF",
143
+ "format": "DEFAULT",
144
+ "state_key": "navigate_command"
145
+ }
146
+ ]
147
+ },
148
+ "language": {
149
+ "delta_indices": [
150
+ 0
151
+ ],
152
+ "modality_keys": [
153
+ "annotation.human.task_description"
154
+ ],
155
+ "sin_cos_embedding_keys": null,
156
+ "mean_std_embedding_keys": null,
157
+ "action_configs": null
158
+ }
159
+ },
160
+ "real_r1_pro_sharpa_relative_eef_mecka": {
161
+ "video": {
162
+ "delta_indices": [
163
+ -30,
164
+ 0
165
+ ],
166
+ "modality_keys": [
167
+ "ego_view_cropratio_res320x240_freq30"
168
+ ],
169
+ "sin_cos_embedding_keys": null,
170
+ "mean_std_embedding_keys": null,
171
+ "action_configs": null
172
+ },
173
+ "state": {
174
+ "delta_indices": [
175
+ 0
176
+ ],
177
+ "modality_keys": [
178
+ "left_wrist_eef",
179
+ "right_wrist_eef",
180
+ "left_hand_joints",
181
+ "right_hand_joints"
182
+ ],
183
+ "sin_cos_embedding_keys": null,
184
+ "mean_std_embedding_keys": null,
185
+ "action_configs": null
186
+ },
187
+ "action": {
188
+ "delta_indices": [
189
+ 0,
190
+ 1,
191
+ 2,
192
+ 3,
193
+ 4,
194
+ 5,
195
+ 6,
196
+ 7,
197
+ 8,
198
+ 9,
199
+ 10,
200
+ 11,
201
+ 12,
202
+ 13,
203
+ 14,
204
+ 15,
205
+ 16,
206
+ 17,
207
+ 18,
208
+ 19,
209
+ 20,
210
+ 21,
211
+ 22,
212
+ 23,
213
+ 24,
214
+ 25,
215
+ 26,
216
+ 27,
217
+ 28,
218
+ 29,
219
+ 30,
220
+ 31,
221
+ 32,
222
+ 33,
223
+ 34,
224
+ 35,
225
+ 36,
226
+ 37,
227
+ 38,
228
+ 39
229
+ ],
230
+ "modality_keys": [
231
+ "left_wrist_eef",
232
+ "right_wrist_eef",
233
+ "left_hand_joints",
234
+ "right_hand_joints"
235
+ ],
236
+ "sin_cos_embedding_keys": null,
237
+ "mean_std_embedding_keys": null,
238
+ "action_configs": [
239
+ {
240
+ "rep": "RELATIVE",
241
+ "type": "EEF",
242
+ "format": "XYZ_ROT6D",
243
+ "state_key": "left_wrist_eef"
244
+ },
245
+ {
246
+ "rep": "RELATIVE",
247
+ "type": "EEF",
248
+ "format": "XYZ_ROT6D",
249
+ "state_key": "right_wrist_eef"
250
+ },
251
+ {
252
+ "rep": "ABSOLUTE",
253
+ "type": "NON_EEF",
254
+ "format": "DEFAULT",
255
+ "state_key": "left_hand_joints"
256
+ },
257
+ {
258
+ "rep": "ABSOLUTE",
259
+ "type": "NON_EEF",
260
+ "format": "DEFAULT",
261
+ "state_key": "right_hand_joints"
262
+ }
263
+ ]
264
+ },
265
+ "language": {
266
+ "delta_indices": [
267
+ 0
268
+ ],
269
+ "modality_keys": [
270
+ "annotation.human.coarse_action"
271
+ ],
272
+ "sin_cos_embedding_keys": null,
273
+ "mean_std_embedding_keys": null,
274
+ "action_configs": null
275
+ }
276
+ },
277
+ "real_r1_pro_sharpa_relative_eef_human": {
278
+ "video": {
279
+ "delta_indices": [
280
+ -20,
281
+ 0
282
+ ],
283
+ "modality_keys": [
284
+ "ego_view_res320x240_freq20",
285
+ "left_wrist_view_res320x240_freq20",
286
+ "right_wrist_view_res320x240_freq20"
287
+ ],
288
+ "sin_cos_embedding_keys": null,
289
+ "mean_std_embedding_keys": null,
290
+ "action_configs": null
291
+ },
292
+ "state": {
293
+ "delta_indices": [
294
+ 0
295
+ ],
296
+ "modality_keys": [
297
+ "left_wrist_eef",
298
+ "right_wrist_eef",
299
+ "left_hand_joints",
300
+ "right_hand_joints"
301
+ ],
302
+ "sin_cos_embedding_keys": null,
303
+ "mean_std_embedding_keys": null,
304
+ "action_configs": null
305
+ },
306
+ "action": {
307
+ "delta_indices": [
308
+ 0,
309
+ 1,
310
+ 2,
311
+ 3,
312
+ 4,
313
+ 5,
314
+ 6,
315
+ 7,
316
+ 8,
317
+ 9,
318
+ 10,
319
+ 11,
320
+ 12,
321
+ 13,
322
+ 14,
323
+ 15,
324
+ 16,
325
+ 17,
326
+ 18,
327
+ 19,
328
+ 20,
329
+ 21,
330
+ 22,
331
+ 23,
332
+ 24,
333
+ 25,
334
+ 26,
335
+ 27,
336
+ 28,
337
+ 29,
338
+ 30,
339
+ 31,
340
+ 32,
341
+ 33,
342
+ 34,
343
+ 35,
344
+ 36,
345
+ 37,
346
+ 38,
347
+ 39
348
+ ],
349
+ "modality_keys": [
350
+ "left_wrist_eef",
351
+ "right_wrist_eef",
352
+ "left_hand_joints",
353
+ "right_hand_joints"
354
+ ],
355
+ "sin_cos_embedding_keys": null,
356
+ "mean_std_embedding_keys": null,
357
+ "action_configs": [
358
+ {
359
+ "rep": "RELATIVE",
360
+ "type": "EEF",
361
+ "format": "XYZ_ROT6D",
362
+ "state_key": "left_wrist_eef"
363
+ },
364
+ {
365
+ "rep": "RELATIVE",
366
+ "type": "EEF",
367
+ "format": "XYZ_ROT6D",
368
+ "state_key": "right_wrist_eef"
369
+ },
370
+ {
371
+ "rep": "ABSOLUTE",
372
+ "type": "NON_EEF",
373
+ "format": "DEFAULT",
374
+ "state_key": "left_hand_joints"
375
+ },
376
+ {
377
+ "rep": "ABSOLUTE",
378
+ "type": "NON_EEF",
379
+ "format": "DEFAULT",
380
+ "state_key": "right_hand_joints"
381
+ }
382
+ ]
383
+ },
384
+ "language": {
385
+ "delta_indices": [
386
+ 0
387
+ ],
388
+ "modality_keys": [
389
+ "annotation.human.coarse_action"
390
+ ],
391
+ "sin_cos_embedding_keys": null,
392
+ "mean_std_embedding_keys": null,
393
+ "action_configs": null
394
+ }
395
+ },
396
+ "real_r1_pro_sharpa_relative_eef": {
397
+ "video": {
398
+ "delta_indices": [
399
+ -20,
400
+ 0
401
+ ],
402
+ "modality_keys": [
403
+ "ego_view_res320x240_freq20",
404
+ "left_wrist_view_res320x240_freq20",
405
+ "right_wrist_view_res320x240_freq20"
406
+ ],
407
+ "sin_cos_embedding_keys": null,
408
+ "mean_std_embedding_keys": null,
409
+ "action_configs": null
410
+ },
411
+ "state": {
412
+ "delta_indices": [
413
+ 0
414
+ ],
415
+ "modality_keys": [
416
+ "left_wrist_eef",
417
+ "right_wrist_eef",
418
+ "left_hand_joints",
419
+ "right_hand_joints"
420
+ ],
421
+ "sin_cos_embedding_keys": null,
422
+ "mean_std_embedding_keys": null,
423
+ "action_configs": null
424
+ },
425
+ "action": {
426
+ "delta_indices": [
427
+ 0,
428
+ 1,
429
+ 2,
430
+ 3,
431
+ 4,
432
+ 5,
433
+ 6,
434
+ 7,
435
+ 8,
436
+ 9,
437
+ 10,
438
+ 11,
439
+ 12,
440
+ 13,
441
+ 14,
442
+ 15,
443
+ 16,
444
+ 17,
445
+ 18,
446
+ 19,
447
+ 20,
448
+ 21,
449
+ 22,
450
+ 23,
451
+ 24,
452
+ 25,
453
+ 26,
454
+ 27,
455
+ 28,
456
+ 29,
457
+ 30,
458
+ 31,
459
+ 32,
460
+ 33,
461
+ 34,
462
+ 35,
463
+ 36,
464
+ 37,
465
+ 38,
466
+ 39
467
+ ],
468
+ "modality_keys": [
469
+ "left_wrist_eef",
470
+ "right_wrist_eef",
471
+ "left_hand_joints",
472
+ "right_hand_joints"
473
+ ],
474
+ "sin_cos_embedding_keys": null,
475
+ "mean_std_embedding_keys": null,
476
+ "action_configs": [
477
+ {
478
+ "rep": "RELATIVE",
479
+ "type": "EEF",
480
+ "format": "XYZ_ROT6D",
481
+ "state_key": "left_wrist_eef"
482
+ },
483
+ {
484
+ "rep": "RELATIVE",
485
+ "type": "EEF",
486
+ "format": "XYZ_ROT6D",
487
+ "state_key": "right_wrist_eef"
488
+ },
489
+ {
490
+ "rep": "ABSOLUTE",
491
+ "type": "NON_EEF",
492
+ "format": "DEFAULT",
493
+ "state_key": "left_hand_joints"
494
+ },
495
+ {
496
+ "rep": "ABSOLUTE",
497
+ "type": "NON_EEF",
498
+ "format": "DEFAULT",
499
+ "state_key": "right_hand_joints"
500
+ }
501
+ ]
502
+ },
503
+ "language": {
504
+ "delta_indices": [
505
+ 0
506
+ ],
507
+ "modality_keys": [
508
+ "annotation.human.coarse_action"
509
+ ],
510
+ "sin_cos_embedding_keys": null,
511
+ "mean_std_embedding_keys": null,
512
+ "action_configs": null
513
+ }
514
+ },
515
+ "xdof_relative_eef_relative_joint": {
516
+ "video": {
517
+ "delta_indices": [
518
+ -30,
519
+ 0
520
+ ],
521
+ "modality_keys": [
522
+ "top_camera-images-rgb_320_240",
523
+ "left_camera-images-rgb_320_240",
524
+ "right_camera-images-rgb_320_240"
525
+ ],
526
+ "sin_cos_embedding_keys": null,
527
+ "mean_std_embedding_keys": null,
528
+ "action_configs": null
529
+ },
530
+ "state": {
531
+ "delta_indices": [
532
+ 0
533
+ ],
534
+ "modality_keys": [
535
+ "left_wrist_eef",
536
+ "right_wrist_eef",
537
+ "left_gripper_pos",
538
+ "right_gripper_pos",
539
+ "left_joint_pos",
540
+ "right_joint_pos"
541
+ ],
542
+ "sin_cos_embedding_keys": null,
543
+ "mean_std_embedding_keys": null,
544
+ "action_configs": null
545
+ },
546
+ "action": {
547
+ "delta_indices": [
548
+ 0,
549
+ 1,
550
+ 2,
551
+ 3,
552
+ 4,
553
+ 5,
554
+ 6,
555
+ 7,
556
+ 8,
557
+ 9,
558
+ 10,
559
+ 11,
560
+ 12,
561
+ 13,
562
+ 14,
563
+ 15,
564
+ 16,
565
+ 17,
566
+ 18,
567
+ 19,
568
+ 20,
569
+ 21,
570
+ 22,
571
+ 23,
572
+ 24,
573
+ 25,
574
+ 26,
575
+ 27,
576
+ 28,
577
+ 29,
578
+ 30,
579
+ 31,
580
+ 32,
581
+ 33,
582
+ 34,
583
+ 35,
584
+ 36,
585
+ 37,
586
+ 38,
587
+ 39
588
+ ],
589
+ "modality_keys": [
590
+ "left_wrist_eef",
591
+ "right_wrist_eef",
592
+ "left_gripper_pos",
593
+ "right_gripper_pos",
594
+ "left_joint_pos",
595
+ "right_joint_pos"
596
+ ],
597
+ "sin_cos_embedding_keys": null,
598
+ "mean_std_embedding_keys": null,
599
+ "action_configs": [
600
+ {
601
+ "rep": "RELATIVE",
602
+ "type": "EEF",
603
+ "format": "XYZ_ROT6D",
604
+ "state_key": "left_wrist_eef"
605
+ },
606
+ {
607
+ "rep": "RELATIVE",
608
+ "type": "EEF",
609
+ "format": "XYZ_ROT6D",
610
+ "state_key": "right_wrist_eef"
611
+ },
612
+ {
613
+ "rep": "ABSOLUTE",
614
+ "type": "NON_EEF",
615
+ "format": "DEFAULT",
616
+ "state_key": "left_gripper_pos"
617
+ },
618
+ {
619
+ "rep": "ABSOLUTE",
620
+ "type": "NON_EEF",
621
+ "format": "DEFAULT",
622
+ "state_key": "right_gripper_pos"
623
+ },
624
+ {
625
+ "rep": "RELATIVE",
626
+ "type": "NON_EEF",
627
+ "format": "DEFAULT",
628
+ "state_key": "left_joint_pos"
629
+ },
630
+ {
631
+ "rep": "RELATIVE",
632
+ "type": "NON_EEF",
633
+ "format": "DEFAULT",
634
+ "state_key": "right_joint_pos"
635
+ }
636
+ ]
637
+ },
638
+ "language": {
639
+ "delta_indices": [
640
+ 0
641
+ ],
642
+ "modality_keys": [
643
+ "annotation.task"
644
+ ],
645
+ "sin_cos_embedding_keys": null,
646
+ "mean_std_embedding_keys": null,
647
+ "action_configs": null
648
+ }
649
+ },
650
+ "real_r1_pro_sharpa_relative_eef_maxinsights": {
651
+ "video": {
652
+ "delta_indices": [
653
+ -30,
654
+ 0
655
+ ],
656
+ "modality_keys": [
657
+ "ego_view_cropratio_res320x240_freq30"
658
+ ],
659
+ "sin_cos_embedding_keys": null,
660
+ "mean_std_embedding_keys": null,
661
+ "action_configs": null
662
+ },
663
+ "state": {
664
+ "delta_indices": [
665
+ 0
666
+ ],
667
+ "modality_keys": [
668
+ "left_wrist_eef",
669
+ "right_wrist_eef",
670
+ "left_hand_joints",
671
+ "right_hand_joints"
672
+ ],
673
+ "sin_cos_embedding_keys": null,
674
+ "mean_std_embedding_keys": null,
675
+ "action_configs": null
676
+ },
677
+ "action": {
678
+ "delta_indices": [
679
+ 0,
680
+ 1,
681
+ 2,
682
+ 3,
683
+ 4,
684
+ 5,
685
+ 6,
686
+ 7,
687
+ 8,
688
+ 9,
689
+ 10,
690
+ 11,
691
+ 12,
692
+ 13,
693
+ 14,
694
+ 15,
695
+ 16,
696
+ 17,
697
+ 18,
698
+ 19,
699
+ 20,
700
+ 21,
701
+ 22,
702
+ 23,
703
+ 24,
704
+ 25,
705
+ 26,
706
+ 27,
707
+ 28,
708
+ 29,
709
+ 30,
710
+ 31,
711
+ 32,
712
+ 33,
713
+ 34,
714
+ 35,
715
+ 36,
716
+ 37,
717
+ 38,
718
+ 39
719
+ ],
720
+ "modality_keys": [
721
+ "left_wrist_eef",
722
+ "right_wrist_eef",
723
+ "left_hand_joints",
724
+ "right_hand_joints"
725
+ ],
726
+ "sin_cos_embedding_keys": null,
727
+ "mean_std_embedding_keys": null,
728
+ "action_configs": [
729
+ {
730
+ "rep": "RELATIVE",
731
+ "type": "EEF",
732
+ "format": "XYZ_ROT6D",
733
+ "state_key": "left_wrist_eef"
734
+ },
735
+ {
736
+ "rep": "RELATIVE",
737
+ "type": "EEF",
738
+ "format": "XYZ_ROT6D",
739
+ "state_key": "right_wrist_eef"
740
+ },
741
+ {
742
+ "rep": "ABSOLUTE",
743
+ "type": "NON_EEF",
744
+ "format": "DEFAULT",
745
+ "state_key": "left_hand_joints"
746
+ },
747
+ {
748
+ "rep": "ABSOLUTE",
749
+ "type": "NON_EEF",
750
+ "format": "DEFAULT",
751
+ "state_key": "right_hand_joints"
752
+ }
753
+ ]
754
+ },
755
+ "language": {
756
+ "delta_indices": [
757
+ 0
758
+ ],
759
+ "modality_keys": [
760
+ "annotation.human.coarse_action"
761
+ ],
762
+ "sin_cos_embedding_keys": null,
763
+ "mean_std_embedding_keys": null,
764
+ "action_configs": null
765
+ }
766
+ },
767
+ "xdof_relative_eef_relative_joint_subtask": {
768
+ "video": {
769
+ "delta_indices": [
770
+ -30,
771
+ 0
772
+ ],
773
+ "modality_keys": [
774
+ "top_camera-images-rgb_320_240",
775
+ "left_camera-images-rgb_320_240",
776
+ "right_camera-images-rgb_320_240"
777
+ ],
778
+ "sin_cos_embedding_keys": null,
779
+ "mean_std_embedding_keys": null,
780
+ "action_configs": null
781
+ },
782
+ "state": {
783
+ "delta_indices": [
784
+ 0
785
+ ],
786
+ "modality_keys": [
787
+ "left_wrist_eef",
788
+ "right_wrist_eef",
789
+ "left_gripper_pos",
790
+ "right_gripper_pos",
791
+ "left_joint_pos",
792
+ "right_joint_pos"
793
+ ],
794
+ "sin_cos_embedding_keys": null,
795
+ "mean_std_embedding_keys": null,
796
+ "action_configs": null
797
+ },
798
+ "action": {
799
+ "delta_indices": [
800
+ 0,
801
+ 1,
802
+ 2,
803
+ 3,
804
+ 4,
805
+ 5,
806
+ 6,
807
+ 7,
808
+ 8,
809
+ 9,
810
+ 10,
811
+ 11,
812
+ 12,
813
+ 13,
814
+ 14,
815
+ 15,
816
+ 16,
817
+ 17,
818
+ 18,
819
+ 19,
820
+ 20,
821
+ 21,
822
+ 22,
823
+ 23,
824
+ 24,
825
+ 25,
826
+ 26,
827
+ 27,
828
+ 28,
829
+ 29,
830
+ 30,
831
+ 31,
832
+ 32,
833
+ 33,
834
+ 34,
835
+ 35,
836
+ 36,
837
+ 37,
838
+ 38,
839
+ 39
840
+ ],
841
+ "modality_keys": [
842
+ "left_wrist_eef",
843
+ "right_wrist_eef",
844
+ "left_gripper_pos",
845
+ "right_gripper_pos",
846
+ "left_joint_pos",
847
+ "right_joint_pos"
848
+ ],
849
+ "sin_cos_embedding_keys": null,
850
+ "mean_std_embedding_keys": null,
851
+ "action_configs": [
852
+ {
853
+ "rep": "RELATIVE",
854
+ "type": "EEF",
855
+ "format": "XYZ_ROT6D",
856
+ "state_key": "left_wrist_eef"
857
+ },
858
+ {
859
+ "rep": "RELATIVE",
860
+ "type": "EEF",
861
+ "format": "XYZ_ROT6D",
862
+ "state_key": "right_wrist_eef"
863
+ },
864
+ {
865
+ "rep": "ABSOLUTE",
866
+ "type": "NON_EEF",
867
+ "format": "DEFAULT",
868
+ "state_key": "left_gripper_pos"
869
+ },
870
+ {
871
+ "rep": "ABSOLUTE",
872
+ "type": "NON_EEF",
873
+ "format": "DEFAULT",
874
+ "state_key": "right_gripper_pos"
875
+ },
876
+ {
877
+ "rep": "RELATIVE",
878
+ "type": "NON_EEF",
879
+ "format": "DEFAULT",
880
+ "state_key": "left_joint_pos"
881
+ },
882
+ {
883
+ "rep": "RELATIVE",
884
+ "type": "NON_EEF",
885
+ "format": "DEFAULT",
886
+ "state_key": "right_joint_pos"
887
+ }
888
+ ]
889
+ },
890
+ "language": {
891
+ "delta_indices": [
892
+ 0
893
+ ],
894
+ "modality_keys": [
895
+ "annotation.sub_task"
896
+ ],
897
+ "sin_cos_embedding_keys": null,
898
+ "mean_std_embedding_keys": null,
899
+ "action_configs": null
900
+ }
901
+ },
902
+ "oxe_droid_relative_eef_relative_joint": {
903
+ "video": {
904
+ "delta_indices": [
905
+ -15,
906
+ 0
907
+ ],
908
+ "modality_keys": [
909
+ "exterior_image_1_left",
910
+ "wrist_image_left"
911
+ ],
912
+ "sin_cos_embedding_keys": null,
913
+ "mean_std_embedding_keys": null,
914
+ "action_configs": null
915
+ },
916
+ "state": {
917
+ "delta_indices": [
918
+ 0
919
+ ],
920
+ "modality_keys": [
921
+ "eef_9d",
922
+ "gripper_position",
923
+ "joint_position"
924
+ ],
925
+ "sin_cos_embedding_keys": null,
926
+ "mean_std_embedding_keys": null,
927
+ "action_configs": null
928
+ },
929
+ "action": {
930
+ "delta_indices": [
931
+ 0,
932
+ 1,
933
+ 2,
934
+ 3,
935
+ 4,
936
+ 5,
937
+ 6,
938
+ 7,
939
+ 8,
940
+ 9,
941
+ 10,
942
+ 11,
943
+ 12,
944
+ 13,
945
+ 14,
946
+ 15,
947
+ 16,
948
+ 17,
949
+ 18,
950
+ 19,
951
+ 20,
952
+ 21,
953
+ 22,
954
+ 23,
955
+ 24,
956
+ 25,
957
+ 26,
958
+ 27,
959
+ 28,
960
+ 29,
961
+ 30,
962
+ 31,
963
+ 32,
964
+ 33,
965
+ 34,
966
+ 35,
967
+ 36,
968
+ 37,
969
+ 38,
970
+ 39
971
+ ],
972
+ "modality_keys": [
973
+ "eef_9d",
974
+ "gripper_position",
975
+ "joint_position"
976
+ ],
977
+ "sin_cos_embedding_keys": null,
978
+ "mean_std_embedding_keys": null,
979
+ "action_configs": [
980
+ {
981
+ "rep": "RELATIVE",
982
+ "type": "EEF",
983
+ "format": "XYZ_ROT6D",
984
+ "state_key": "eef_9d"
985
+ },
986
+ {
987
+ "rep": "ABSOLUTE",
988
+ "type": "NON_EEF",
989
+ "format": "DEFAULT",
990
+ "state_key": "gripper_position"
991
+ },
992
+ {
993
+ "rep": "RELATIVE",
994
+ "type": "NON_EEF",
995
+ "format": "DEFAULT",
996
+ "state_key": "joint_position"
997
+ }
998
+ ]
999
+ },
1000
+ "language": {
1001
+ "delta_indices": [
1002
+ 0
1003
+ ],
1004
+ "modality_keys": [
1005
+ "annotation.language.language_instruction"
1006
+ ],
1007
+ "sin_cos_embedding_keys": null,
1008
+ "mean_std_embedding_keys": null,
1009
+ "action_configs": null
1010
+ }
1011
+ },
1012
+ "new_embodiment": {
1013
+ "video": {
1014
+ "delta_indices": [
1015
+ 0
1016
+ ],
1017
+ "modality_keys": [
1018
+ "top",
1019
+ "side",
1020
+ "wrist"
1021
+ ],
1022
+ "sin_cos_embedding_keys": null,
1023
+ "mean_std_embedding_keys": null,
1024
+ "action_configs": null
1025
+ },
1026
+ "state": {
1027
+ "delta_indices": [
1028
+ 0
1029
+ ],
1030
+ "modality_keys": [
1031
+ "single_arm",
1032
+ "gripper"
1033
+ ],
1034
+ "sin_cos_embedding_keys": null,
1035
+ "mean_std_embedding_keys": null,
1036
+ "action_configs": null
1037
+ },
1038
+ "action": {
1039
+ "delta_indices": [
1040
+ 0,
1041
+ 1,
1042
+ 2,
1043
+ 3,
1044
+ 4,
1045
+ 5,
1046
+ 6,
1047
+ 7,
1048
+ 8,
1049
+ 9,
1050
+ 10,
1051
+ 11,
1052
+ 12,
1053
+ 13,
1054
+ 14,
1055
+ 15
1056
+ ],
1057
+ "modality_keys": [
1058
+ "single_arm",
1059
+ "gripper"
1060
+ ],
1061
+ "sin_cos_embedding_keys": null,
1062
+ "mean_std_embedding_keys": null,
1063
+ "action_configs": [
1064
+ {
1065
+ "rep": "RELATIVE",
1066
+ "type": "NON_EEF",
1067
+ "format": "DEFAULT",
1068
+ "state_key": null
1069
+ },
1070
+ {
1071
+ "rep": "ABSOLUTE",
1072
+ "type": "NON_EEF",
1073
+ "format": "DEFAULT",
1074
+ "state_key": null
1075
+ }
1076
+ ]
1077
+ },
1078
+ "language": {
1079
+ "delta_indices": [
1080
+ 0
1081
+ ],
1082
+ "modality_keys": [
1083
+ "annotation.human.task_description"
1084
+ ],
1085
+ "sin_cos_embedding_keys": null,
1086
+ "mean_std_embedding_keys": null,
1087
+ "action_configs": null
1088
+ }
1089
+ }
1090
+ },
1091
+ "image_crop_size": [
1092
+ 230,
1093
+ 230
1094
+ ],
1095
+ "image_target_size": [
1096
+ 256,
1097
+ 256
1098
+ ],
1099
+ "use_albumentations": true,
1100
+ "random_rotation_angle": 0,
1101
+ "color_jitter_params": {
1102
+ "brightness": 0.3,
1103
+ "contrast": 0.4,
1104
+ "saturation": 0.5,
1105
+ "hue": 0.08
1106
+ },
1107
+ "shortest_image_edge": 256,
1108
+ "crop_fraction": 0.95,
1109
+ "letter_box_transform": false,
1110
+ "model_name": "nvidia/Cosmos-Reason2-2B",
1111
+ "model_type": "qwen",
1112
+ "formalize_language": true,
1113
+ "max_state_dim": 132,
1114
+ "max_action_dim": 132,
1115
+ "max_action_horizon": 40,
1116
+ "use_percentiles": true,
1117
+ "use_mean_std": false,
1118
+ "clip_outliers": true,
1119
+ "apply_sincos_state_encoding": false,
1120
+ "use_relative_action": true,
1121
+ "exclude_state": false,
1122
+ "state_dropout_prob": 0.2
1123
+ }
1124
+ }
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e9154c5e9ad35634697c0726384af7d8c924ac0c4f2e1497fe3a12c241f9e46
3
+ size 14645
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f42d91efd686c2847e9f7c6381a82591c8707df06f61850f29ee39644815b943
3
+ size 1465
statistics.json ADDED
The diff for this file is too large to render. See raw diff
 
trainer_state.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [],
12
+ "logging_steps": 10,
13
+ "max_steps": 1,
14
+ "num_input_tokens_seen": 0,
15
+ "num_train_epochs": 9223372036854775807,
16
+ "save_steps": 1,
17
+ "stateful_callbacks": {
18
+ "TrainerControl": {
19
+ "args": {
20
+ "should_epoch_stop": false,
21
+ "should_evaluate": false,
22
+ "should_log": false,
23
+ "should_save": true,
24
+ "should_training_stop": true
25
+ },
26
+ "attributes": {}
27
+ }
28
+ },
29
+ "total_flos": 0.0,
30
+ "train_batch_size": 1,
31
+ "trial_name": null,
32
+ "trial_params": null
33
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c12ff511086145db8b0a574153fb71263f4cca6aa25ea6f78ad1f4ed3b1f7c1
3
+ size 5841
wandb_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"project": "finetune-gr00t-n1d7", "run_id": "so101_from_a100"}