Dongkkka commited on
Commit
68df8e0
·
verified ·
1 Parent(s): 8c0bf3c

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_horizon": 50,
3
+ "add_pos_embed": true,
4
+ "apply_sincos_state_encoding": true,
5
+ "architectures": [
6
+ "Gr00tN1d6"
7
+ ],
8
+ "attn_dropout": 0.2,
9
+ "attn_implementation": null,
10
+ "backbone_embedding_dim": 2048,
11
+ "backbone_model_type": "eagle",
12
+ "backbone_trainable_params_fp32": true,
13
+ "collator_overwrite_image_inputs": false,
14
+ "color_jitter_params": {
15
+ "brightness": 0.1,
16
+ "contrast": 0.1,
17
+ "hue": 0.1,
18
+ "saturation": 0.1
19
+ },
20
+ "crop_fraction": 0.95,
21
+ "diffusion_model_cfg": {
22
+ "attention_head_dim": 48,
23
+ "dropout": 0.2,
24
+ "final_dropout": true,
25
+ "interleave_self_attention": true,
26
+ "norm_type": "ada_norm",
27
+ "num_attention_heads": 32,
28
+ "num_layers": 32,
29
+ "output_dim": 1024,
30
+ "positional_embeddings": null
31
+ },
32
+ "eagle_collator": true,
33
+ "formalize_language": true,
34
+ "gemma_collator": false,
35
+ "hidden_size": 1024,
36
+ "image_crop_size": null,
37
+ "image_target_size": null,
38
+ "input_embedding_dim": 1536,
39
+ "load_bf16": true,
40
+ "max_action_dim": 128,
41
+ "max_num_embodiments": 32,
42
+ "max_seq_len": 1024,
43
+ "max_state_dim": 128,
44
+ "model_dtype": "bfloat16",
45
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
46
+ "model_type": "Gr00tN1d6",
47
+ "noise_beta_alpha": 1.5,
48
+ "noise_beta_beta": 1.0,
49
+ "noise_s": 0.999,
50
+ "num_inference_timesteps": 4,
51
+ "num_timestep_buckets": 1000,
52
+ "random_rotation_angle": null,
53
+ "reproject_vision": false,
54
+ "select_layer": 16,
55
+ "shortest_image_edge": 256,
56
+ "state_dropout_prob": 0.0,
57
+ "torch_dtype": "bfloat16",
58
+ "transformers_version": "4.51.3",
59
+ "tune_diffusion_model": true,
60
+ "tune_llm": false,
61
+ "tune_projector": true,
62
+ "tune_top_llm_layers": 4,
63
+ "tune_visual": false,
64
+ "tune_vlln": true,
65
+ "use_albumentations_transforms": true,
66
+ "use_alternate_vl_dit": true,
67
+ "use_flash_attention": true,
68
+ "use_relative_action": true,
69
+ "use_vlln": true
70
+ }
embodiment_id.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "robocasa_panda_omron": 13,
3
+ "gr1": 20,
4
+ "behavior_r1_pro": 24,
5
+ "unitree_g1": 8,
6
+ "oxe_google": 0,
7
+ "oxe_widowx": 1,
8
+ "libero_panda": 2,
9
+ "oxe_droid": 16,
10
+ "new_embodiment": 10
11
+ }
experiment_cfg/conf.yaml ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ load_config_path: null
2
+ model:
3
+ model_type: Gr00tN1d6
4
+ model_dtype: bfloat16
5
+ model_name: nvidia/Eagle-Block2A-2B-v2
6
+ backbone_model_type: eagle
7
+ model_revision: null
8
+ tune_top_llm_layers: 4
9
+ backbone_embedding_dim: 2048
10
+ tune_llm: false
11
+ tune_visual: false
12
+ select_layer: 16
13
+ reproject_vision: false
14
+ use_flash_attention: true
15
+ load_bf16: false
16
+ collator_overwrite_image_inputs: false
17
+ eagle_collator: true
18
+ backbone_trainable_params_fp32: true
19
+ image_crop_size: null
20
+ image_target_size: null
21
+ shortest_image_edge: 256
22
+ crop_fraction: 0.95
23
+ random_rotation_angle: null
24
+ color_jitter_params:
25
+ brightness: 0.3
26
+ contrast: 0.4
27
+ saturation: 0.5
28
+ hue: 0.08
29
+ use_albumentations_transforms: true
30
+ extra_augmentation_config: null
31
+ formalize_language: true
32
+ apply_sincos_state_encoding: false
33
+ use_relative_action: true
34
+ max_state_dim: 29
35
+ max_action_dim: 29
36
+ action_horizon: 16
37
+ hidden_size: 1024
38
+ input_embedding_dim: 1536
39
+ add_pos_embed: true
40
+ attn_dropout: 0.2
41
+ use_vlln: true
42
+ max_seq_len: 1024
43
+ use_alternate_vl_dit: true
44
+ attend_text_every_n_blocks: 2
45
+ diffusion_model_cfg:
46
+ positional_embeddings: null
47
+ num_layers: 32
48
+ num_attention_heads: 32
49
+ attention_head_dim: 48
50
+ norm_type: ada_norm
51
+ dropout: 0.2
52
+ final_dropout: true
53
+ output_dim: 1024
54
+ interleave_self_attention: true
55
+ num_inference_timesteps: 4
56
+ noise_beta_alpha: 1.5
57
+ noise_beta_beta: 1.0
58
+ noise_s: 0.999
59
+ num_timestep_buckets: 1000
60
+ tune_projector: true
61
+ tune_diffusion_model: true
62
+ tune_vlln: true
63
+ state_dropout_prob: 0.0
64
+ state_additive_noise_scale: 0.0
65
+ max_num_embodiments: 32
66
+ data:
67
+ datasets:
68
+ - dataset_paths:
69
+ - /data/datasets/Dongkkka/PATs_upload_test_lerobot
70
+ embodiment_tag: new_embodiment
71
+ mix_ratio: 1.0
72
+ dataset_type: physical_embodiment
73
+ val_dataset_path: null
74
+ modality_configs:
75
+ new_embodiment:
76
+ video:
77
+ delta_indices:
78
+ - 0
79
+ modality_keys:
80
+ - cam_left_head
81
+ sin_cos_embedding_keys: null
82
+ mean_std_embedding_keys: null
83
+ action_configs: null
84
+ state:
85
+ delta_indices:
86
+ - 0
87
+ modality_keys:
88
+ - arm_left
89
+ - arm_right
90
+ - head
91
+ - lift
92
+ - other
93
+ sin_cos_embedding_keys: null
94
+ mean_std_embedding_keys: null
95
+ action_configs: null
96
+ action:
97
+ delta_indices:
98
+ - 0
99
+ - 1
100
+ - 2
101
+ - 3
102
+ - 4
103
+ - 5
104
+ - 6
105
+ - 7
106
+ - 8
107
+ - 9
108
+ - 10
109
+ - 11
110
+ - 12
111
+ - 13
112
+ - 14
113
+ - 15
114
+ modality_keys:
115
+ - arm_left
116
+ - arm_right
117
+ - head
118
+ - lift
119
+ - other
120
+ sin_cos_embedding_keys: null
121
+ mean_std_embedding_keys: null
122
+ action_configs:
123
+ - rep: ABSOLUTE
124
+ type: NON_EEF
125
+ format: DEFAULT
126
+ state_key: null
127
+ - rep: ABSOLUTE
128
+ type: NON_EEF
129
+ format: DEFAULT
130
+ state_key: null
131
+ - rep: ABSOLUTE
132
+ type: NON_EEF
133
+ format: DEFAULT
134
+ state_key: null
135
+ - rep: ABSOLUTE
136
+ type: NON_EEF
137
+ format: DEFAULT
138
+ state_key: null
139
+ - rep: ABSOLUTE
140
+ type: NON_EEF
141
+ format: DEFAULT
142
+ state_key: null
143
+ language:
144
+ delta_indices:
145
+ - 0
146
+ modality_keys:
147
+ - annotation.human.task_description
148
+ sin_cos_embedding_keys: null
149
+ mean_std_embedding_keys: null
150
+ action_configs: null
151
+ download_cache: false
152
+ shard_size: 1024
153
+ episode_sampling_rate: 0.1
154
+ num_shards_per_epoch: 100000
155
+ override_pretraining_statistics: false
156
+ mode: single_turn
157
+ random_chop: 0.0
158
+ mock_dataset_mode: false
159
+ shuffle: true
160
+ seed: 42
161
+ multiprocessing_context: fork
162
+ allow_padding: false
163
+ subsample_ratio: 1.0
164
+ image_crop_size:
165
+ - 244
166
+ - 244
167
+ image_target_size:
168
+ - 224
169
+ - 224
170
+ video_backend: torchcodec
171
+ training:
172
+ output_dir: /data/checkpoints/PATs_upload_test_model
173
+ experiment_name: null
174
+ max_steps: 100
175
+ global_batch_size: 48
176
+ batch_size: null
177
+ gradient_accumulation_steps: 1
178
+ learning_rate: 0.0001
179
+ lr_scheduler_type: cosine
180
+ weight_decay: 1.0e-05
181
+ warmup_ratio: 0.05
182
+ warmup_steps: 0
183
+ max_grad_norm: 1.0
184
+ optim: adamw_torch
185
+ start_from_checkpoint: nvidia/GR00T-N1.6-3B
186
+ tf32: true
187
+ fp16: false
188
+ bf16: true
189
+ eval_bf16: true
190
+ logging_steps: 10
191
+ save_steps: 100
192
+ save_total_limit: 10
193
+ save_vl_model: false
194
+ upload_checkpoints: false
195
+ upload_every: 1000
196
+ upload_last_n_checkpoints: 5
197
+ max_concurrent_uploads: 2
198
+ eval_strategy: 'no'
199
+ eval_steps: 500
200
+ eval_set_split_ratio: 0.1
201
+ eval_batch_size: 2
202
+ save_best_eval_metric_name: ''
203
+ save_best_eval_metric_greater_is_better: true
204
+ deepspeed_stage: 2
205
+ gradient_checkpointing: false
206
+ transformers_trust_remote_code: true
207
+ transformers_local_files_only: false
208
+ transformers_cache_dir: null
209
+ transformers_access_token: null
210
+ use_ddp: false
211
+ ddp_bucket_cap_mb: 100
212
+ num_gpus: 1
213
+ dataloader_num_workers: 8
214
+ remove_unused_columns: false
215
+ use_wandb: false
216
+ wandb_project: finetune-gr00t-n1d6
217
+ enable_profiling: false
218
+ max_retries: 3
219
+ assert_loss_less_than: null
220
+ add_rl_callback: false
221
+ enable_open_loop_eval: false
222
+ open_loop_eval_traj_ids:
223
+ - 0
224
+ open_loop_eval_steps_per_traj: 100
225
+ open_loop_eval_plot_indices: null
226
+ max_steps: 100
227
+ save_steps: 100
experiment_cfg/config.yaml ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !!python/object:gr00t.configs.base_config.Config
2
+ data: !!python/object:gr00t.configs.data.data_config.DataConfig
3
+ allow_padding: false
4
+ datasets:
5
+ - !!python/object:gr00t.configs.data.data_config.SingleDatasetConfig
6
+ dataset_paths:
7
+ - /data/datasets/Dongkkka/PATs_upload_test_lerobot
8
+ dataset_type: physical_embodiment
9
+ embodiment_tag: new_embodiment
10
+ mix_ratio: 1.0
11
+ val_dataset_path: null
12
+ download_cache: false
13
+ episode_sampling_rate: 0.1
14
+ image_crop_size:
15
+ - 244
16
+ - 244
17
+ image_target_size:
18
+ - 224
19
+ - 224
20
+ mock_dataset_mode: false
21
+ modality_configs:
22
+ new_embodiment:
23
+ action: !!python/object:gr00t.data.types.ModalityConfig
24
+ action_configs:
25
+ - !!python/object:gr00t.data.types.ActionConfig
26
+ format: &id001 !!python/object/apply:gr00t.data.types.ActionFormat
27
+ - default
28
+ rep: &id002 !!python/object/apply:gr00t.data.types.ActionRepresentation
29
+ - absolute
30
+ state_key: null
31
+ type: &id003 !!python/object/apply:gr00t.data.types.ActionType
32
+ - non_eef
33
+ - !!python/object:gr00t.data.types.ActionConfig
34
+ format: *id001
35
+ rep: *id002
36
+ state_key: null
37
+ type: *id003
38
+ - !!python/object:gr00t.data.types.ActionConfig
39
+ format: *id001
40
+ rep: *id002
41
+ state_key: null
42
+ type: *id003
43
+ - !!python/object:gr00t.data.types.ActionConfig
44
+ format: *id001
45
+ rep: *id002
46
+ state_key: null
47
+ type: *id003
48
+ - !!python/object:gr00t.data.types.ActionConfig
49
+ format: *id001
50
+ rep: *id002
51
+ state_key: null
52
+ type: *id003
53
+ delta_indices:
54
+ - 0
55
+ - 1
56
+ - 2
57
+ - 3
58
+ - 4
59
+ - 5
60
+ - 6
61
+ - 7
62
+ - 8
63
+ - 9
64
+ - 10
65
+ - 11
66
+ - 12
67
+ - 13
68
+ - 14
69
+ - 15
70
+ mean_std_embedding_keys: null
71
+ modality_keys:
72
+ - arm_left
73
+ - arm_right
74
+ - head
75
+ - lift
76
+ - other
77
+ sin_cos_embedding_keys: null
78
+ language: !!python/object:gr00t.data.types.ModalityConfig
79
+ action_configs: null
80
+ delta_indices:
81
+ - 0
82
+ mean_std_embedding_keys: null
83
+ modality_keys:
84
+ - annotation.human.task_description
85
+ sin_cos_embedding_keys: null
86
+ state: !!python/object:gr00t.data.types.ModalityConfig
87
+ action_configs: null
88
+ delta_indices:
89
+ - 0
90
+ mean_std_embedding_keys: null
91
+ modality_keys:
92
+ - arm_left
93
+ - arm_right
94
+ - head
95
+ - lift
96
+ - other
97
+ sin_cos_embedding_keys: null
98
+ video: !!python/object:gr00t.data.types.ModalityConfig
99
+ action_configs: null
100
+ delta_indices:
101
+ - 0
102
+ mean_std_embedding_keys: null
103
+ modality_keys:
104
+ - cam_left_head
105
+ sin_cos_embedding_keys: null
106
+ mode: single_turn
107
+ multiprocessing_context: fork
108
+ num_shards_per_epoch: 100000
109
+ override_pretraining_statistics: false
110
+ random_chop: 0.0
111
+ seed: 42
112
+ shard_size: 1024
113
+ shuffle: true
114
+ subsample_ratio: 1.0
115
+ video_backend: torchcodec
116
+ load_config_path: null
117
+ model: !!python/object:gr00t.configs.model.gr00t_n1d6.Gr00tN1d6Config
118
+ _attn_implementation_autoset: false
119
+ _attn_implementation_internal: null
120
+ _commit_hash: null
121
+ _name_or_path: ''
122
+ add_cross_attention: false
123
+ architectures: null
124
+ backbone_model_type: eagle
125
+ backbone_trainable_params_fp32: true
126
+ bad_words_ids: null
127
+ begin_suppress_tokens: null
128
+ bos_token_id: null
129
+ chunk_size_feed_forward: 0
130
+ color_jitter_params:
131
+ brightness: 0.3
132
+ contrast: 0.4
133
+ hue: 0.08
134
+ saturation: 0.5
135
+ cross_attention_hidden_size: null
136
+ decoder_start_token_id: null
137
+ diffusion_model_cfg:
138
+ attention_head_dim: 48
139
+ dropout: 0.2
140
+ final_dropout: true
141
+ interleave_self_attention: true
142
+ norm_type: ada_norm
143
+ num_attention_heads: 32
144
+ num_layers: 32
145
+ output_dim: 1024
146
+ positional_embeddings: null
147
+ diversity_penalty: 0.0
148
+ do_sample: false
149
+ eagle_collator: true
150
+ early_stopping: false
151
+ encoder_no_repeat_ngram_size: 0
152
+ eos_token_id: null
153
+ exponential_decay_length_penalty: null
154
+ extra_augmentation_config: null
155
+ finetuning_task: null
156
+ forced_bos_token_id: null
157
+ forced_eos_token_id: null
158
+ id2label:
159
+ 0: LABEL_0
160
+ 1: LABEL_1
161
+ is_decoder: false
162
+ is_encoder_decoder: false
163
+ label2id:
164
+ LABEL_0: 0
165
+ LABEL_1: 1
166
+ length_penalty: 1.0
167
+ load_bf16: false
168
+ max_length: 20
169
+ min_length: 0
170
+ model_name: nvidia/Eagle-Block2A-2B-v2
171
+ no_repeat_ngram_size: 0
172
+ num_beam_groups: 1
173
+ num_beams: 1
174
+ num_return_sequences: 1
175
+ output_attentions: false
176
+ output_hidden_states: false
177
+ output_scores: false
178
+ pad_token_id: null
179
+ prefix: null
180
+ problem_type: null
181
+ pruned_heads: {}
182
+ random_rotation_angle: null
183
+ remove_invalid_values: false
184
+ repetition_penalty: 1.0
185
+ reproject_vision: false
186
+ return_dict: true
187
+ return_dict_in_generate: false
188
+ sep_token_id: null
189
+ state_dropout_prob: 0.0
190
+ suppress_tokens: null
191
+ task_specific_params: null
192
+ temperature: 1.0
193
+ tf_legacy_loss: false
194
+ tie_encoder_decoder: false
195
+ tie_word_embeddings: true
196
+ tokenizer_class: null
197
+ top_k: 50
198
+ top_p: 1.0
199
+ torch_dtype: null
200
+ torchscript: false
201
+ transformers_version: null
202
+ tune_diffusion_model: true
203
+ tune_llm: false
204
+ tune_projector: true
205
+ tune_visual: false
206
+ typical_p: 1.0
207
+ use_bfloat16: false
208
+ use_relative_action: true
209
+ training: !!python/object:gr00t.configs.training.training_config.TrainingConfig
210
+ add_rl_callback: false
211
+ assert_loss_less_than: null
212
+ batch_size: null
213
+ bf16: true
214
+ dataloader_num_workers: 8
215
+ ddp_bucket_cap_mb: 100
216
+ deepspeed_stage: 2
217
+ enable_open_loop_eval: false
218
+ enable_profiling: false
219
+ eval_batch_size: 2
220
+ eval_bf16: true
221
+ eval_set_split_ratio: 0.1
222
+ eval_steps: 500
223
+ eval_strategy: 'no'
224
+ experiment_name: null
225
+ fp16: false
226
+ global_batch_size: 48
227
+ gradient_accumulation_steps: 1
228
+ gradient_checkpointing: false
229
+ learning_rate: 0.0001
230
+ logging_steps: 10
231
+ lr_scheduler_type: cosine
232
+ max_concurrent_uploads: 2
233
+ max_grad_norm: 1.0
234
+ max_retries: 3
235
+ max_steps: 100
236
+ num_gpus: 1
237
+ open_loop_eval_plot_indices: null
238
+ open_loop_eval_steps_per_traj: 100
239
+ open_loop_eval_traj_ids:
240
+ - 0
241
+ optim: adamw_torch
242
+ output_dir: /data/checkpoints/PATs_upload_test_model
243
+ remove_unused_columns: false
244
+ save_best_eval_metric_greater_is_better: true
245
+ save_best_eval_metric_name: ''
246
+ save_steps: 100
247
+ save_total_limit: 10
248
+ save_vl_model: false
249
+ start_from_checkpoint: nvidia/GR00T-N1.6-3B
250
+ tf32: true
251
+ transformers_access_token: null
252
+ transformers_cache_dir: null
253
+ transformers_local_files_only: false
254
+ transformers_trust_remote_code: true
255
+ upload_checkpoints: false
256
+ upload_every: 1000
257
+ upload_last_n_checkpoints: 5
258
+ use_ddp: false
259
+ use_wandb: false
260
+ wandb_project: finetune-gr00t-n1d6
261
+ warmup_ratio: 0.05
262
+ warmup_steps: 0
263
+ weight_decay: 1.0e-05
experiment_cfg/dataset_statistics.json ADDED
@@ -0,0 +1,413 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "state": {
4
+ "arm_left": {
5
+ "min": [
6
+ 0.0261855311691761,
7
+ 0.04987834393978119,
8
+ -0.2664572596549988,
9
+ -0.3596585690975189,
10
+ 0.3398486375808716,
11
+ -0.2888677716255188,
12
+ 0.0007343311444856226,
13
+ 0.1365242898464203
14
+ ],
15
+ "max": [
16
+ 0.14100639522075653,
17
+ 0.09169130772352219,
18
+ -0.2527233362197876,
19
+ -0.2703401446342468,
20
+ 0.3994581699371338,
21
+ -0.18811637163162231,
22
+ 0.07764258980751038,
23
+ 0.2208932340145111
24
+ ],
25
+ "mean": [
26
+ 0.07342984527349472,
27
+ 0.07221090793609619,
28
+ -0.25541064143180847,
29
+ -0.3170183598995209,
30
+ 0.3570297360420227,
31
+ -0.252654492855072,
32
+ 0.047983720898628235,
33
+ 0.16140522062778473
34
+ ],
35
+ "std": [
36
+ 0.02547214739024639,
37
+ 0.007564172148704529,
38
+ 0.0024449343327422116,
39
+ 0.021817393600940704,
40
+ 0.01599450968205918,
41
+ 0.030162282288074493,
42
+ 0.019700631499290466,
43
+ 0.03007184900343418
44
+ ],
45
+ "q01": [
46
+ 0.026755980625748636,
47
+ 0.05169419512152672,
48
+ -0.2658288013935089,
49
+ -0.35693241119384767,
50
+ 0.3405748975276947,
51
+ -0.28847227334976194,
52
+ 0.0020259265135973694,
53
+ 0.1365242898464203
54
+ ],
55
+ "q99": [
56
+ 0.13866083860397338,
57
+ 0.09100484818220138,
58
+ -0.2530445146560669,
59
+ -0.27110089898109435,
60
+ 0.39925684690475466,
61
+ -0.18824484288692475,
62
+ 0.07562038660049437,
63
+ 0.2208932340145111
64
+ ]
65
+ },
66
+ "arm_right": {
67
+ "min": [
68
+ 0.023009711876511574,
69
+ -0.08837167173624039,
70
+ 0.24233300983905792,
71
+ -0.3249882161617279,
72
+ -0.5430291891098022,
73
+ -0.20141886174678802,
74
+ -0.21628637611865997,
75
+ 0.07669904083013535
76
+ ],
77
+ "max": [
78
+ 0.18333467841148376,
79
+ -0.040890175849199295,
80
+ 0.25923076272010803,
81
+ -0.29035380482673645,
82
+ -0.5261434316635132,
83
+ -0.12771588563919067,
84
+ -0.1844722479581833,
85
+ 0.07976700365543365
86
+ ],
87
+ "mean": [
88
+ 0.09076610952615738,
89
+ -0.07737327367067337,
90
+ 0.2503284811973572,
91
+ -0.31020236015319824,
92
+ -0.5314948558807373,
93
+ -0.15467683970928192,
94
+ -0.1939942091703415,
95
+ 0.07828317582607269
96
+ ],
97
+ "std": [
98
+ 0.03285541012883186,
99
+ 0.007657118607312427,
100
+ 0.007399852853268136,
101
+ 0.0069493041373787905,
102
+ 0.004396171309053799,
103
+ 0.017841925844550133,
104
+ 0.011914669536054098,
105
+ 0.0002946005261036449
106
+ ],
107
+ "q01": [
108
+ 0.026532114669680595,
109
+ -0.08812719404697418,
110
+ 0.24233300983905792,
111
+ -0.3234734082221985,
112
+ -0.5429851007461548,
113
+ -0.19559118151664734,
114
+ -0.21628637611865997,
115
+ 0.0782330185174942
116
+ ],
117
+ "q99": [
118
+ 0.17690824568271638,
119
+ -0.04201333686709405,
120
+ 0.25923076272010803,
121
+ -0.2913810956478119,
122
+ -0.5261635589599609,
123
+ -0.12910940766334533,
124
+ -0.18474199175834655,
125
+ 0.07976700365543365
126
+ ]
127
+ },
128
+ "head": {
129
+ "min": [
130
+ -2.0694557179012918e-13,
131
+ -0.0015339808305725455
132
+ ],
133
+ "max": [
134
+ -2.0694557179012918e-13,
135
+ 0.0015339808305725455
136
+ ],
137
+ "mean": [
138
+ -2.0694557179012918e-13,
139
+ 1.2471388799895067e-05
140
+ ],
141
+ "std": [
142
+ 0.0,
143
+ 0.0002109108172589913
144
+ ],
145
+ "q01": [
146
+ -2.0694557179012918e-13,
147
+ -2.0694557179012918e-13
148
+ ],
149
+ "q99": [
150
+ -2.0694557179012918e-13,
151
+ 0.0015339808305725455
152
+ ]
153
+ },
154
+ "lift": {
155
+ "min": [
156
+ 0.0
157
+ ],
158
+ "max": [
159
+ 0.0
160
+ ],
161
+ "mean": [
162
+ 0.0
163
+ ],
164
+ "std": [
165
+ 0.0
166
+ ],
167
+ "q01": [
168
+ 0.0
169
+ ],
170
+ "q99": [
171
+ 0.0
172
+ ]
173
+ },
174
+ "other": {
175
+ "min": [
176
+ -0.0023815890308469534,
177
+ -1.3601596947410144e-05,
178
+ -0.0026437826454639435
179
+ ],
180
+ "max": [
181
+ 0.0023931083269417286,
182
+ 2.1567169824265875e-05,
183
+ 0.004167238250374794
184
+ ],
185
+ "mean": [
186
+ 1.2018665529467398e-06,
187
+ -2.14848228097253e-08,
188
+ -4.104913386981934e-06
189
+ ],
190
+ "std": [
191
+ 0.0004975512274540961,
192
+ 4.607368282449897e-06,
193
+ 0.0008911995682865381
194
+ ],
195
+ "q01": [
196
+ -0.0015624607214704154,
197
+ -1.3387292019615416e-05,
198
+ -0.002579548256471753
199
+ ],
200
+ "q99": [
201
+ 0.0017830528365448096,
202
+ 1.714260106382424e-05,
203
+ 0.003313816646113988
204
+ ]
205
+ }
206
+ },
207
+ "action": {
208
+ "arm_left": {
209
+ "min": [
210
+ 0.02454369328916073,
211
+ 0.0475534051656723,
212
+ -0.26691266894340515,
213
+ -0.3666214048862457,
214
+ 0.33900976181030273,
215
+ -0.28904861211776733,
216
+ 0.0015339808305725455,
217
+ 0.13712233304977417
218
+ ],
219
+ "max": [
220
+ 0.1426602154970169,
221
+ 0.0920388475060463,
222
+ -0.251572847366333,
223
+ -0.2684466242790222,
224
+ 0.40957286953926086,
225
+ -0.1878058910369873,
226
+ 0.0782330185174942,
227
+ 0.221491277217865
228
+ ],
229
+ "mean": [
230
+ 0.07346080243587494,
231
+ 0.07216790318489075,
232
+ -0.2554052174091339,
233
+ -0.3169855773448944,
234
+ 0.35689324140548706,
235
+ -0.2531152665615082,
236
+ 0.048434652388095856,
237
+ 0.16206307709217072
238
+ ],
239
+ "std": [
240
+ 0.02742091380059719,
241
+ 0.007938423193991184,
242
+ 0.0025356656406072945,
243
+ 0.023799823597073482,
244
+ 0.01636500656604767,
245
+ 0.030521018430590574,
246
+ 0.019570723176002502,
247
+ 0.029494687914848328
248
+ ],
249
+ "q01": [
250
+ 0.025586799383163453,
251
+ 0.05166447162628174,
252
+ -0.26691266894340515,
253
+ -0.3609763693809509,
254
+ 0.33900976181030273,
255
+ -0.28904861211776733,
256
+ 0.0015339808305725455,
257
+ 0.13712233304977417
258
+ ],
259
+ "q99": [
260
+ 0.1426602154970169,
261
+ 0.0920388475060463,
262
+ -0.25310683250427246,
263
+ -0.26998060941696167,
264
+ 0.4013507390022278,
265
+ -0.1878058910369873,
266
+ 0.0756559309363365,
267
+ 0.221491277217865
268
+ ]
269
+ },
270
+ "arm_right": {
271
+ "min": [
272
+ 0.019941750913858414,
273
+ -0.09050486981868744,
274
+ 0.24236896634101868,
275
+ -0.3267379105091095,
276
+ -0.5430291891098022,
277
+ -0.20928162336349487,
278
+ -0.21629129350185394,
279
+ 0.07975145429372787
280
+ ],
281
+ "max": [
282
+ 0.1871456503868103,
283
+ -0.03988350182771683,
284
+ 0.2592427432537079,
285
+ -0.2899223566055298,
286
+ -0.526155412197113,
287
+ -0.1264466643333435,
288
+ -0.1840776950120926,
289
+ 0.07975145429372787
290
+ ],
291
+ "mean": [
292
+ 0.09057962149381638,
293
+ -0.07741815596818924,
294
+ 0.25054508447647095,
295
+ -0.31016814708709717,
296
+ -0.5314047336578369,
297
+ -0.1541411280632019,
298
+ -0.19378866255283356,
299
+ 0.07975157350301743
300
+ ],
301
+ "std": [
302
+ 0.03508458286523819,
303
+ 0.007914146408438683,
304
+ 0.00744145084172528,
305
+ 0.0075515802018344255,
306
+ 0.004423194099218413,
307
+ 0.017863281071186066,
308
+ 0.011763404123485052,
309
+ 1.1920928955078125e-07
310
+ ],
311
+ "q01": [
312
+ 0.022518837824463844,
313
+ -0.08897088468074799,
314
+ 0.24236896634101868,
315
+ -0.32520392537117004,
316
+ -0.5430291891098022,
317
+ -0.19590531289577484,
318
+ -0.21629129350185394,
319
+ 0.07975145429372787
320
+ ],
321
+ "q99": [
322
+ 0.18456857025623322,
323
+ -0.03988350182771683,
324
+ 0.2592427432537079,
325
+ -0.2899223566055298,
326
+ -0.526155412197113,
327
+ -0.12853287398815158,
328
+ -0.18512080490589142,
329
+ 0.07975145429372787
330
+ ]
331
+ },
332
+ "head": {
333
+ "min": [
334
+ -2.0694557179012918e-13,
335
+ -2.0694557179012918e-13
336
+ ],
337
+ "max": [
338
+ -2.0694557179012918e-13,
339
+ -2.0694557179012918e-13
340
+ ],
341
+ "mean": [
342
+ -2.0694557179012918e-13,
343
+ -2.0694557179012918e-13
344
+ ],
345
+ "std": [
346
+ 0.0,
347
+ 0.0
348
+ ],
349
+ "q01": [
350
+ -2.0694557179012918e-13,
351
+ -2.0694557179012918e-13
352
+ ],
353
+ "q99": [
354
+ -2.0694557179012918e-13,
355
+ -2.0694557179012918e-13
356
+ ]
357
+ },
358
+ "lift": {
359
+ "min": [
360
+ 0.0
361
+ ],
362
+ "max": [
363
+ 0.0
364
+ ],
365
+ "mean": [
366
+ 0.0
367
+ ],
368
+ "std": [
369
+ 0.0
370
+ ],
371
+ "q01": [
372
+ 0.0
373
+ ],
374
+ "q99": [
375
+ 0.0
376
+ ]
377
+ },
378
+ "other": {
379
+ "min": [
380
+ 0.0,
381
+ 0.0,
382
+ 0.0
383
+ ],
384
+ "max": [
385
+ 0.0,
386
+ 0.0,
387
+ 0.0
388
+ ],
389
+ "mean": [
390
+ 0.0,
391
+ 0.0,
392
+ 0.0
393
+ ],
394
+ "std": [
395
+ 0.0,
396
+ 0.0,
397
+ 0.0
398
+ ],
399
+ "q01": [
400
+ 0.0,
401
+ 0.0,
402
+ 0.0
403
+ ],
404
+ "q99": [
405
+ 0.0,
406
+ 0.0,
407
+ 0.0
408
+ ]
409
+ }
410
+ },
411
+ "relative_action": {}
412
+ }
413
+ }
experiment_cfg/final_model_config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "Gr00tN1d6",
3
+ "model_dtype": "bfloat16",
4
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
5
+ "backbone_model_type": "eagle",
6
+ "model_revision": null,
7
+ "tune_top_llm_layers": 4,
8
+ "backbone_embedding_dim": 2048,
9
+ "tune_llm": false,
10
+ "tune_visual": false,
11
+ "select_layer": 16,
12
+ "reproject_vision": false,
13
+ "use_flash_attention": true,
14
+ "load_bf16": true,
15
+ "collator_overwrite_image_inputs": false,
16
+ "eagle_collator": true,
17
+ "backbone_trainable_params_fp32": true,
18
+ "extra_augmentation_config": null,
19
+ "apply_sincos_state_encoding": true,
20
+ "use_relative_action": true,
21
+ "max_state_dim": 128,
22
+ "max_action_dim": 128,
23
+ "action_horizon": 50,
24
+ "hidden_size": 1024,
25
+ "input_embedding_dim": 1536,
26
+ "add_pos_embed": true,
27
+ "attn_dropout": 0.2,
28
+ "use_vlln": true,
29
+ "max_seq_len": 1024,
30
+ "use_alternate_vl_dit": true,
31
+ "attend_text_every_n_blocks": 2,
32
+ "diffusion_model_cfg": {
33
+ "attention_head_dim": 48,
34
+ "dropout": 0.2,
35
+ "final_dropout": true,
36
+ "interleave_self_attention": true,
37
+ "norm_type": "ada_norm",
38
+ "num_attention_heads": 32,
39
+ "num_layers": 32,
40
+ "output_dim": 1024,
41
+ "positional_embeddings": null
42
+ },
43
+ "num_inference_timesteps": 4,
44
+ "noise_beta_alpha": 1.5,
45
+ "noise_beta_beta": 1.0,
46
+ "noise_s": 0.999,
47
+ "num_timestep_buckets": 1000,
48
+ "tune_projector": true,
49
+ "tune_diffusion_model": true,
50
+ "tune_vlln": true,
51
+ "state_dropout_prob": 0.0,
52
+ "state_additive_noise_scale": 0.0,
53
+ "max_num_embodiments": 32
54
+ }
experiment_cfg/final_processor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3cccf9061f4d9fc43fccc44c9d1ed1d5b4518fa205138f721504127766b77fe
3
+ size 4990120184
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57b3f3127cd667325d6cbcb4a2b7f3dd34b14a7f530748849c3509c6c3b9e3bf
3
+ size 4823190320
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
processor_config.json ADDED
@@ -0,0 +1,478 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processor_class": "Gr00tN1d6Processor",
3
+ "processor_kwargs": {
4
+ "modality_configs": {
5
+ "behavior_r1_pro": {
6
+ "video": {
7
+ "delta_indices": [
8
+ 0
9
+ ],
10
+ "modality_keys": [
11
+ "observation.images.rgb.head_256_256",
12
+ "observation.images.rgb.left_wrist_256_256",
13
+ "observation.images.rgb.right_wrist_256_256"
14
+ ],
15
+ "sin_cos_embedding_keys": null,
16
+ "mean_std_embedding_keys": null,
17
+ "action_configs": null
18
+ },
19
+ "state": {
20
+ "delta_indices": [
21
+ 0
22
+ ],
23
+ "modality_keys": [
24
+ "robot_pos",
25
+ "robot_ori_cos",
26
+ "robot_ori_sin",
27
+ "robot_2d_ori",
28
+ "robot_2d_ori_cos",
29
+ "robot_2d_ori_sin",
30
+ "robot_lin_vel",
31
+ "robot_ang_vel",
32
+ "arm_left_qpos",
33
+ "arm_left_qpos_sin",
34
+ "arm_left_qpos_cos",
35
+ "eef_left_pos",
36
+ "eef_left_quat",
37
+ "gripper_left_qpos",
38
+ "arm_right_qpos",
39
+ "arm_right_qpos_sin",
40
+ "arm_right_qpos_cos",
41
+ "eef_right_pos",
42
+ "eef_right_quat",
43
+ "gripper_right_qpos",
44
+ "trunk_qpos"
45
+ ],
46
+ "sin_cos_embedding_keys": null,
47
+ "mean_std_embedding_keys": null,
48
+ "action_configs": null
49
+ },
50
+ "action": {
51
+ "delta_indices": [
52
+ 0,
53
+ 1,
54
+ 2,
55
+ 3,
56
+ 4,
57
+ 5,
58
+ 6,
59
+ 7,
60
+ 8,
61
+ 9,
62
+ 10,
63
+ 11,
64
+ 12,
65
+ 13,
66
+ 14,
67
+ 15,
68
+ 16,
69
+ 17,
70
+ 18,
71
+ 19,
72
+ 20,
73
+ 21,
74
+ 22,
75
+ 23,
76
+ 24,
77
+ 25,
78
+ 26,
79
+ 27,
80
+ 28,
81
+ 29,
82
+ 30,
83
+ 31
84
+ ],
85
+ "modality_keys": [
86
+ "base",
87
+ "torso",
88
+ "left_arm",
89
+ "left_gripper",
90
+ "right_arm",
91
+ "right_gripper"
92
+ ],
93
+ "sin_cos_embedding_keys": null,
94
+ "mean_std_embedding_keys": null,
95
+ "action_configs": [
96
+ {
97
+ "rep": "ABSOLUTE",
98
+ "type": "NON_EEF",
99
+ "format": "DEFAULT",
100
+ "state_key": null
101
+ },
102
+ {
103
+ "rep": "RELATIVE",
104
+ "type": "NON_EEF",
105
+ "format": "DEFAULT",
106
+ "state_key": "trunk_qpos"
107
+ },
108
+ {
109
+ "rep": "RELATIVE",
110
+ "type": "NON_EEF",
111
+ "format": "DEFAULT",
112
+ "state_key": "arm_left_qpos"
113
+ },
114
+ {
115
+ "rep": "ABSOLUTE",
116
+ "type": "NON_EEF",
117
+ "format": "DEFAULT",
118
+ "state_key": null
119
+ },
120
+ {
121
+ "rep": "RELATIVE",
122
+ "type": "NON_EEF",
123
+ "format": "DEFAULT",
124
+ "state_key": "arm_right_qpos"
125
+ },
126
+ {
127
+ "rep": "ABSOLUTE",
128
+ "type": "NON_EEF",
129
+ "format": "DEFAULT",
130
+ "state_key": null
131
+ }
132
+ ]
133
+ },
134
+ "language": {
135
+ "delta_indices": [
136
+ 0
137
+ ],
138
+ "modality_keys": [
139
+ "annotation.human.coarse_action"
140
+ ],
141
+ "sin_cos_embedding_keys": null,
142
+ "mean_std_embedding_keys": null,
143
+ "action_configs": null
144
+ }
145
+ },
146
+ "gr1": {
147
+ "video": {
148
+ "delta_indices": [
149
+ 0
150
+ ],
151
+ "modality_keys": [
152
+ "ego_view_bg_crop_pad_res256_freq20"
153
+ ],
154
+ "sin_cos_embedding_keys": null,
155
+ "mean_std_embedding_keys": null,
156
+ "action_configs": null
157
+ },
158
+ "state": {
159
+ "delta_indices": [
160
+ 0
161
+ ],
162
+ "modality_keys": [
163
+ "left_arm",
164
+ "right_arm",
165
+ "left_hand",
166
+ "right_hand",
167
+ "waist"
168
+ ],
169
+ "sin_cos_embedding_keys": [
170
+ "left_arm",
171
+ "right_arm",
172
+ "left_hand",
173
+ "right_hand",
174
+ "waist"
175
+ ],
176
+ "mean_std_embedding_keys": null,
177
+ "action_configs": null
178
+ },
179
+ "action": {
180
+ "delta_indices": [
181
+ 0,
182
+ 1,
183
+ 2,
184
+ 3,
185
+ 4,
186
+ 5,
187
+ 6,
188
+ 7,
189
+ 8,
190
+ 9,
191
+ 10,
192
+ 11,
193
+ 12,
194
+ 13,
195
+ 14,
196
+ 15
197
+ ],
198
+ "modality_keys": [
199
+ "left_arm",
200
+ "right_arm",
201
+ "left_hand",
202
+ "right_hand",
203
+ "waist"
204
+ ],
205
+ "sin_cos_embedding_keys": null,
206
+ "mean_std_embedding_keys": null,
207
+ "action_configs": [
208
+ {
209
+ "rep": "RELATIVE",
210
+ "type": "NON_EEF",
211
+ "format": "DEFAULT",
212
+ "state_key": null
213
+ },
214
+ {
215
+ "rep": "RELATIVE",
216
+ "type": "NON_EEF",
217
+ "format": "DEFAULT",
218
+ "state_key": null
219
+ },
220
+ {
221
+ "rep": "RELATIVE",
222
+ "type": "NON_EEF",
223
+ "format": "DEFAULT",
224
+ "state_key": null
225
+ },
226
+ {
227
+ "rep": "RELATIVE",
228
+ "type": "NON_EEF",
229
+ "format": "DEFAULT",
230
+ "state_key": null
231
+ },
232
+ {
233
+ "rep": "ABSOLUTE",
234
+ "type": "NON_EEF",
235
+ "format": "DEFAULT",
236
+ "state_key": null
237
+ }
238
+ ]
239
+ },
240
+ "language": {
241
+ "delta_indices": [
242
+ 0
243
+ ],
244
+ "modality_keys": [
245
+ "task"
246
+ ],
247
+ "sin_cos_embedding_keys": null,
248
+ "mean_std_embedding_keys": null,
249
+ "action_configs": null
250
+ }
251
+ },
252
+ "robocasa_panda_omron": {
253
+ "video": {
254
+ "delta_indices": [
255
+ 0
256
+ ],
257
+ "modality_keys": [
258
+ "res256_image_side_0",
259
+ "res256_image_side_1",
260
+ "res256_image_wrist_0"
261
+ ],
262
+ "sin_cos_embedding_keys": null,
263
+ "mean_std_embedding_keys": null,
264
+ "action_configs": null
265
+ },
266
+ "state": {
267
+ "delta_indices": [
268
+ 0
269
+ ],
270
+ "modality_keys": [
271
+ "end_effector_position_relative",
272
+ "end_effector_rotation_relative",
273
+ "gripper_qpos",
274
+ "base_position",
275
+ "base_rotation"
276
+ ],
277
+ "sin_cos_embedding_keys": null,
278
+ "mean_std_embedding_keys": null,
279
+ "action_configs": null
280
+ },
281
+ "action": {
282
+ "delta_indices": [
283
+ 0,
284
+ 1,
285
+ 2,
286
+ 3,
287
+ 4,
288
+ 5,
289
+ 6,
290
+ 7,
291
+ 8,
292
+ 9,
293
+ 10,
294
+ 11,
295
+ 12,
296
+ 13,
297
+ 14,
298
+ 15
299
+ ],
300
+ "modality_keys": [
301
+ "end_effector_position",
302
+ "end_effector_rotation",
303
+ "gripper_close",
304
+ "base_motion",
305
+ "control_mode"
306
+ ],
307
+ "sin_cos_embedding_keys": null,
308
+ "mean_std_embedding_keys": null,
309
+ "action_configs": [
310
+ {
311
+ "rep": "ABSOLUTE",
312
+ "type": "NON_EEF",
313
+ "format": "DEFAULT",
314
+ "state_key": null
315
+ },
316
+ {
317
+ "rep": "ABSOLUTE",
318
+ "type": "NON_EEF",
319
+ "format": "DEFAULT",
320
+ "state_key": null
321
+ },
322
+ {
323
+ "rep": "ABSOLUTE",
324
+ "type": "NON_EEF",
325
+ "format": "DEFAULT",
326
+ "state_key": null
327
+ },
328
+ {
329
+ "rep": "ABSOLUTE",
330
+ "type": "NON_EEF",
331
+ "format": "DEFAULT",
332
+ "state_key": null
333
+ },
334
+ {
335
+ "rep": "ABSOLUTE",
336
+ "type": "NON_EEF",
337
+ "format": "DEFAULT",
338
+ "state_key": null
339
+ }
340
+ ]
341
+ },
342
+ "language": {
343
+ "delta_indices": [
344
+ 0
345
+ ],
346
+ "modality_keys": [
347
+ "annotation.human.action.task_description"
348
+ ],
349
+ "sin_cos_embedding_keys": null,
350
+ "mean_std_embedding_keys": null,
351
+ "action_configs": null
352
+ }
353
+ },
354
+ "new_embodiment": {
355
+ "video": {
356
+ "delta_indices": [
357
+ 0
358
+ ],
359
+ "modality_keys": [
360
+ "cam_left_head"
361
+ ],
362
+ "sin_cos_embedding_keys": null,
363
+ "mean_std_embedding_keys": null,
364
+ "action_configs": null
365
+ },
366
+ "state": {
367
+ "delta_indices": [
368
+ 0
369
+ ],
370
+ "modality_keys": [
371
+ "arm_left",
372
+ "arm_right",
373
+ "head",
374
+ "lift",
375
+ "other"
376
+ ],
377
+ "sin_cos_embedding_keys": null,
378
+ "mean_std_embedding_keys": null,
379
+ "action_configs": null
380
+ },
381
+ "action": {
382
+ "delta_indices": [
383
+ 0,
384
+ 1,
385
+ 2,
386
+ 3,
387
+ 4,
388
+ 5,
389
+ 6,
390
+ 7,
391
+ 8,
392
+ 9,
393
+ 10,
394
+ 11,
395
+ 12,
396
+ 13,
397
+ 14,
398
+ 15
399
+ ],
400
+ "modality_keys": [
401
+ "arm_left",
402
+ "arm_right",
403
+ "head",
404
+ "lift",
405
+ "other"
406
+ ],
407
+ "sin_cos_embedding_keys": null,
408
+ "mean_std_embedding_keys": null,
409
+ "action_configs": [
410
+ {
411
+ "rep": "ABSOLUTE",
412
+ "type": "NON_EEF",
413
+ "format": "DEFAULT",
414
+ "state_key": null
415
+ },
416
+ {
417
+ "rep": "ABSOLUTE",
418
+ "type": "NON_EEF",
419
+ "format": "DEFAULT",
420
+ "state_key": null
421
+ },
422
+ {
423
+ "rep": "ABSOLUTE",
424
+ "type": "NON_EEF",
425
+ "format": "DEFAULT",
426
+ "state_key": null
427
+ },
428
+ {
429
+ "rep": "ABSOLUTE",
430
+ "type": "NON_EEF",
431
+ "format": "DEFAULT",
432
+ "state_key": null
433
+ },
434
+ {
435
+ "rep": "ABSOLUTE",
436
+ "type": "NON_EEF",
437
+ "format": "DEFAULT",
438
+ "state_key": null
439
+ }
440
+ ]
441
+ },
442
+ "language": {
443
+ "delta_indices": [
444
+ 0
445
+ ],
446
+ "modality_keys": [
447
+ "annotation.human.task_description"
448
+ ],
449
+ "sin_cos_embedding_keys": null,
450
+ "mean_std_embedding_keys": null,
451
+ "action_configs": null
452
+ }
453
+ }
454
+ },
455
+ "image_crop_size": null,
456
+ "image_target_size": null,
457
+ "use_albumentations": true,
458
+ "random_rotation_angle": null,
459
+ "color_jitter_params": {
460
+ "brightness": 0.3,
461
+ "contrast": 0.4,
462
+ "saturation": 0.5,
463
+ "hue": 0.08
464
+ },
465
+ "shortest_image_edge": 256,
466
+ "crop_fraction": 0.95,
467
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
468
+ "model_type": "eagle",
469
+ "formalize_language": true,
470
+ "max_state_dim": 128,
471
+ "max_action_dim": 128,
472
+ "max_action_horizon": 50,
473
+ "use_percentiles": false,
474
+ "clip_outliers": true,
475
+ "apply_sincos_state_encoding": true,
476
+ "use_relative_action": true
477
+ }
478
+ }
statistics.json ADDED
The diff for this file is too large to render. See raw diff
 
trainer_state.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 100,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "grad_norm": 0.29096701741218567,
14
+ "learning_rate": 9.956320346634876e-05,
15
+ "loss": 1.2375,
16
+ "step": 10
17
+ },
18
+ {
19
+ "grad_norm": 0.16030608117580414,
20
+ "learning_rate": 9.473646649103818e-05,
21
+ "loss": 1.1952,
22
+ "step": 20
23
+ },
24
+ {
25
+ "grad_norm": 0.13103239238262177,
26
+ "learning_rate": 8.506183921362443e-05,
27
+ "loss": 1.2002,
28
+ "step": 30
29
+ },
30
+ {
31
+ "grad_norm": 0.11081837117671967,
32
+ "learning_rate": 7.158771761692464e-05,
33
+ "loss": 1.191,
34
+ "step": 40
35
+ },
36
+ {
37
+ "grad_norm": 0.15927913784980774,
38
+ "learning_rate": 5.577423184847932e-05,
39
+ "loss": 1.194,
40
+ "step": 50
41
+ },
42
+ {
43
+ "grad_norm": 0.2992898225784302,
44
+ "learning_rate": 3.933501846281267e-05,
45
+ "loss": 1.1546,
46
+ "step": 60
47
+ },
48
+ {
49
+ "grad_norm": 0.402879923582077,
50
+ "learning_rate": 2.405152131093926e-05,
51
+ "loss": 1.1042,
52
+ "step": 70
53
+ },
54
+ {
55
+ "grad_norm": 0.3662746846675873,
56
+ "learning_rate": 1.157994445715706e-05,
57
+ "loss": 1.091,
58
+ "step": 80
59
+ },
60
+ {
61
+ "grad_norm": 0.31836211681365967,
62
+ "learning_rate": 3.271776770026963e-06,
63
+ "loss": 1.0746,
64
+ "step": 90
65
+ },
66
+ {
67
+ "grad_norm": 0.2885839641094208,
68
+ "learning_rate": 2.7337132953697554e-08,
69
+ "loss": 1.0781,
70
+ "step": 100
71
+ }
72
+ ],
73
+ "logging_steps": 10,
74
+ "max_steps": 100,
75
+ "num_input_tokens_seen": 0,
76
+ "num_train_epochs": 9223372036854775807,
77
+ "save_steps": 100,
78
+ "stateful_callbacks": {
79
+ "TrainerControl": {
80
+ "args": {
81
+ "should_epoch_stop": false,
82
+ "should_evaluate": false,
83
+ "should_log": false,
84
+ "should_save": true,
85
+ "should_training_stop": true
86
+ },
87
+ "attributes": {}
88
+ }
89
+ },
90
+ "total_flos": 0.0,
91
+ "train_batch_size": 48,
92
+ "trial_name": null,
93
+ "trial_params": null
94
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80455889e43f93dca11660a5a2c7b0b0c42781f793878da53d76a14431b3bb9f
3
+ size 5777
wandb_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"project": "finetune-gr00t-n1d6", "run_id": "PATs_upload_test_model"}