Dongkkka commited on
Commit
e1cf8af
·
verified ·
1 Parent(s): 4eb8558

Upload folder using huggingface_hub

Browse files
.source_vm ADDED
@@ -0,0 +1 @@
 
 
1
+ Gr00t-train
config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_horizon": 50,
3
+ "add_pos_embed": true,
4
+ "apply_sincos_state_encoding": true,
5
+ "architectures": [
6
+ "Gr00tN1d6"
7
+ ],
8
+ "attn_dropout": 0.2,
9
+ "attn_implementation": null,
10
+ "backbone_embedding_dim": 2048,
11
+ "backbone_model_type": "eagle",
12
+ "backbone_trainable_params_fp32": true,
13
+ "collator_overwrite_image_inputs": false,
14
+ "color_jitter_params": {
15
+ "brightness": 0.1,
16
+ "contrast": 0.1,
17
+ "hue": 0.1,
18
+ "saturation": 0.1
19
+ },
20
+ "crop_fraction": 0.95,
21
+ "diffusion_model_cfg": {
22
+ "attention_head_dim": 48,
23
+ "dropout": 0.2,
24
+ "final_dropout": true,
25
+ "interleave_self_attention": true,
26
+ "norm_type": "ada_norm",
27
+ "num_attention_heads": 32,
28
+ "num_layers": 32,
29
+ "output_dim": 1024,
30
+ "positional_embeddings": null
31
+ },
32
+ "eagle_collator": true,
33
+ "formalize_language": true,
34
+ "gemma_collator": false,
35
+ "hidden_size": 1024,
36
+ "image_crop_size": null,
37
+ "image_target_size": null,
38
+ "input_embedding_dim": 1536,
39
+ "load_bf16": true,
40
+ "max_action_dim": 128,
41
+ "max_num_embodiments": 32,
42
+ "max_seq_len": 1024,
43
+ "max_state_dim": 128,
44
+ "model_dtype": "bfloat16",
45
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
46
+ "model_type": "Gr00tN1d6",
47
+ "noise_beta_alpha": 1.5,
48
+ "noise_beta_beta": 1.0,
49
+ "noise_s": 0.999,
50
+ "num_inference_timesteps": 4,
51
+ "num_timestep_buckets": 1000,
52
+ "random_rotation_angle": null,
53
+ "reproject_vision": false,
54
+ "select_layer": 16,
55
+ "shortest_image_edge": 256,
56
+ "state_dropout_prob": 0.0,
57
+ "torch_dtype": "bfloat16",
58
+ "transformers_version": "4.51.3",
59
+ "tune_diffusion_model": true,
60
+ "tune_llm": false,
61
+ "tune_projector": true,
62
+ "tune_top_llm_layers": 4,
63
+ "tune_visual": false,
64
+ "tune_vlln": true,
65
+ "use_albumentations_transforms": true,
66
+ "use_alternate_vl_dit": true,
67
+ "use_flash_attention": true,
68
+ "use_relative_action": true,
69
+ "use_vlln": true
70
+ }
embodiment_id.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "robocasa_panda_omron": 13,
3
+ "gr1": 20,
4
+ "behavior_r1_pro": 24,
5
+ "unitree_g1": 8,
6
+ "oxe_google": 0,
7
+ "oxe_widowx": 1,
8
+ "libero_panda": 2,
9
+ "oxe_droid": 16,
10
+ "new_embodiment": 10
11
+ }
experiment_cfg/conf.yaml ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ load_config_path: null
2
+ model:
3
+ model_type: Gr00tN1d6
4
+ model_dtype: bfloat16
5
+ model_name: nvidia/Eagle-Block2A-2B-v2
6
+ backbone_model_type: eagle
7
+ model_revision: null
8
+ tune_top_llm_layers: 4
9
+ backbone_embedding_dim: 2048
10
+ tune_llm: false
11
+ tune_visual: false
12
+ select_layer: 16
13
+ reproject_vision: false
14
+ use_flash_attention: true
15
+ load_bf16: false
16
+ collator_overwrite_image_inputs: false
17
+ eagle_collator: true
18
+ backbone_trainable_params_fp32: true
19
+ image_crop_size: null
20
+ image_target_size: null
21
+ shortest_image_edge: 256
22
+ crop_fraction: 0.95
23
+ random_rotation_angle: null
24
+ color_jitter_params:
25
+ brightness: 0.3
26
+ contrast: 0.4
27
+ saturation: 0.5
28
+ hue: 0.08
29
+ use_albumentations_transforms: true
30
+ extra_augmentation_config: null
31
+ formalize_language: true
32
+ apply_sincos_state_encoding: false
33
+ use_relative_action: true
34
+ max_state_dim: 29
35
+ max_action_dim: 29
36
+ action_horizon: 16
37
+ hidden_size: 1024
38
+ input_embedding_dim: 1536
39
+ add_pos_embed: true
40
+ attn_dropout: 0.2
41
+ use_vlln: true
42
+ max_seq_len: 1024
43
+ use_alternate_vl_dit: true
44
+ attend_text_every_n_blocks: 2
45
+ diffusion_model_cfg:
46
+ positional_embeddings: null
47
+ num_layers: 32
48
+ num_attention_heads: 32
49
+ attention_head_dim: 48
50
+ norm_type: ada_norm
51
+ dropout: 0.2
52
+ final_dropout: true
53
+ output_dim: 1024
54
+ interleave_self_attention: true
55
+ num_inference_timesteps: 4
56
+ noise_beta_alpha: 1.5
57
+ noise_beta_beta: 1.0
58
+ noise_s: 0.999
59
+ num_timestep_buckets: 1000
60
+ tune_projector: true
61
+ tune_diffusion_model: true
62
+ tune_vlln: true
63
+ state_dropout_prob: 0.0
64
+ state_additive_noise_scale: 0.0
65
+ max_num_embodiments: 32
66
+ data:
67
+ datasets:
68
+ - dataset_paths:
69
+ - /data/datasets/Dongkkka/Test_lerobot
70
+ embodiment_tag: new_embodiment
71
+ mix_ratio: 1.0
72
+ dataset_type: physical_embodiment
73
+ val_dataset_path: null
74
+ modality_configs:
75
+ new_embodiment:
76
+ video:
77
+ delta_indices:
78
+ - 0
79
+ modality_keys:
80
+ - cam_left_head
81
+ sin_cos_embedding_keys: null
82
+ mean_std_embedding_keys: null
83
+ action_configs: null
84
+ state:
85
+ delta_indices:
86
+ - 0
87
+ modality_keys:
88
+ - arm_left
89
+ - arm_right
90
+ sin_cos_embedding_keys: null
91
+ mean_std_embedding_keys: null
92
+ action_configs: null
93
+ action:
94
+ delta_indices:
95
+ - 0
96
+ - 1
97
+ - 2
98
+ - 3
99
+ - 4
100
+ - 5
101
+ - 6
102
+ - 7
103
+ - 8
104
+ - 9
105
+ - 10
106
+ - 11
107
+ - 12
108
+ - 13
109
+ - 14
110
+ - 15
111
+ modality_keys:
112
+ - arm_left
113
+ - arm_right
114
+ sin_cos_embedding_keys: null
115
+ mean_std_embedding_keys: null
116
+ action_configs:
117
+ - rep: ABSOLUTE
118
+ type: NON_EEF
119
+ format: DEFAULT
120
+ state_key: null
121
+ - rep: ABSOLUTE
122
+ type: NON_EEF
123
+ format: DEFAULT
124
+ state_key: null
125
+ language:
126
+ delta_indices:
127
+ - 0
128
+ modality_keys:
129
+ - annotation.human.task_description
130
+ sin_cos_embedding_keys: null
131
+ mean_std_embedding_keys: null
132
+ action_configs: null
133
+ download_cache: false
134
+ shard_size: 1024
135
+ episode_sampling_rate: 0.1
136
+ num_shards_per_epoch: 100000
137
+ override_pretraining_statistics: false
138
+ mode: single_turn
139
+ random_chop: 0.0
140
+ mock_dataset_mode: false
141
+ shuffle: true
142
+ seed: 42
143
+ multiprocessing_context: fork
144
+ allow_padding: false
145
+ subsample_ratio: 1.0
146
+ image_crop_size:
147
+ - 244
148
+ - 244
149
+ image_target_size:
150
+ - 224
151
+ - 224
152
+ video_backend: torchcodec
153
+ training:
154
+ output_dir: /data/checkpoints/TestModel4
155
+ experiment_name: null
156
+ max_steps: 200
157
+ global_batch_size: 48
158
+ batch_size: null
159
+ gradient_accumulation_steps: 1
160
+ learning_rate: 0.0001
161
+ lr_scheduler_type: cosine
162
+ weight_decay: 1.0e-05
163
+ warmup_ratio: 0.05
164
+ warmup_steps: 0
165
+ max_grad_norm: 1.0
166
+ optim: adamw_torch
167
+ start_from_checkpoint: nvidia/GR00T-N1.6-3B
168
+ tf32: true
169
+ fp16: false
170
+ bf16: true
171
+ eval_bf16: true
172
+ logging_steps: 10
173
+ save_steps: 100
174
+ save_total_limit: 10
175
+ save_vl_model: false
176
+ upload_checkpoints: false
177
+ upload_every: 1000
178
+ upload_last_n_checkpoints: 5
179
+ max_concurrent_uploads: 2
180
+ eval_strategy: 'no'
181
+ eval_steps: 500
182
+ eval_set_split_ratio: 0.1
183
+ eval_batch_size: 2
184
+ save_best_eval_metric_name: ''
185
+ save_best_eval_metric_greater_is_better: true
186
+ deepspeed_stage: 2
187
+ gradient_checkpointing: false
188
+ transformers_trust_remote_code: true
189
+ transformers_local_files_only: false
190
+ transformers_cache_dir: null
191
+ transformers_access_token: null
192
+ use_ddp: false
193
+ ddp_bucket_cap_mb: 100
194
+ num_gpus: 1
195
+ dataloader_num_workers: 8
196
+ remove_unused_columns: false
197
+ use_wandb: false
198
+ wandb_project: finetune-gr00t-n1d6
199
+ enable_profiling: false
200
+ max_retries: 3
201
+ assert_loss_less_than: null
202
+ add_rl_callback: false
203
+ enable_open_loop_eval: false
204
+ open_loop_eval_traj_ids:
205
+ - 0
206
+ open_loop_eval_steps_per_traj: 100
207
+ open_loop_eval_plot_indices: null
208
+ max_steps: 200
209
+ save_steps: 100
experiment_cfg/config.yaml ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !!python/object:gr00t.configs.base_config.Config
2
+ data: !!python/object:gr00t.configs.data.data_config.DataConfig
3
+ allow_padding: false
4
+ datasets:
5
+ - !!python/object:gr00t.configs.data.data_config.SingleDatasetConfig
6
+ dataset_paths:
7
+ - /data/datasets/Dongkkka/Test_lerobot
8
+ dataset_type: physical_embodiment
9
+ embodiment_tag: new_embodiment
10
+ mix_ratio: 1.0
11
+ val_dataset_path: null
12
+ download_cache: false
13
+ episode_sampling_rate: 0.1
14
+ image_crop_size:
15
+ - 244
16
+ - 244
17
+ image_target_size:
18
+ - 224
19
+ - 224
20
+ mock_dataset_mode: false
21
+ modality_configs:
22
+ new_embodiment:
23
+ action: !!python/object:gr00t.data.types.ModalityConfig
24
+ action_configs:
25
+ - !!python/object:gr00t.data.types.ActionConfig
26
+ format: &id001 !!python/object/apply:gr00t.data.types.ActionFormat
27
+ - default
28
+ rep: &id002 !!python/object/apply:gr00t.data.types.ActionRepresentation
29
+ - absolute
30
+ state_key: null
31
+ type: &id003 !!python/object/apply:gr00t.data.types.ActionType
32
+ - non_eef
33
+ - !!python/object:gr00t.data.types.ActionConfig
34
+ format: *id001
35
+ rep: *id002
36
+ state_key: null
37
+ type: *id003
38
+ delta_indices:
39
+ - 0
40
+ - 1
41
+ - 2
42
+ - 3
43
+ - 4
44
+ - 5
45
+ - 6
46
+ - 7
47
+ - 8
48
+ - 9
49
+ - 10
50
+ - 11
51
+ - 12
52
+ - 13
53
+ - 14
54
+ - 15
55
+ mean_std_embedding_keys: null
56
+ modality_keys:
57
+ - arm_left
58
+ - arm_right
59
+ sin_cos_embedding_keys: null
60
+ language: !!python/object:gr00t.data.types.ModalityConfig
61
+ action_configs: null
62
+ delta_indices:
63
+ - 0
64
+ mean_std_embedding_keys: null
65
+ modality_keys:
66
+ - annotation.human.task_description
67
+ sin_cos_embedding_keys: null
68
+ state: !!python/object:gr00t.data.types.ModalityConfig
69
+ action_configs: null
70
+ delta_indices:
71
+ - 0
72
+ mean_std_embedding_keys: null
73
+ modality_keys:
74
+ - arm_left
75
+ - arm_right
76
+ sin_cos_embedding_keys: null
77
+ video: !!python/object:gr00t.data.types.ModalityConfig
78
+ action_configs: null
79
+ delta_indices:
80
+ - 0
81
+ mean_std_embedding_keys: null
82
+ modality_keys:
83
+ - cam_left_head
84
+ sin_cos_embedding_keys: null
85
+ mode: single_turn
86
+ multiprocessing_context: fork
87
+ num_shards_per_epoch: 100000
88
+ override_pretraining_statistics: false
89
+ random_chop: 0.0
90
+ seed: 42
91
+ shard_size: 1024
92
+ shuffle: true
93
+ subsample_ratio: 1.0
94
+ video_backend: torchcodec
95
+ load_config_path: null
96
+ model: !!python/object:gr00t.configs.model.gr00t_n1d6.Gr00tN1d6Config
97
+ _attn_implementation_autoset: false
98
+ _attn_implementation_internal: null
99
+ _commit_hash: null
100
+ _name_or_path: ''
101
+ add_cross_attention: false
102
+ architectures: null
103
+ backbone_model_type: eagle
104
+ backbone_trainable_params_fp32: true
105
+ bad_words_ids: null
106
+ begin_suppress_tokens: null
107
+ bos_token_id: null
108
+ chunk_size_feed_forward: 0
109
+ color_jitter_params:
110
+ brightness: 0.3
111
+ contrast: 0.4
112
+ hue: 0.08
113
+ saturation: 0.5
114
+ cross_attention_hidden_size: null
115
+ decoder_start_token_id: null
116
+ diffusion_model_cfg:
117
+ attention_head_dim: 48
118
+ dropout: 0.2
119
+ final_dropout: true
120
+ interleave_self_attention: true
121
+ norm_type: ada_norm
122
+ num_attention_heads: 32
123
+ num_layers: 32
124
+ output_dim: 1024
125
+ positional_embeddings: null
126
+ diversity_penalty: 0.0
127
+ do_sample: false
128
+ eagle_collator: true
129
+ early_stopping: false
130
+ encoder_no_repeat_ngram_size: 0
131
+ eos_token_id: null
132
+ exponential_decay_length_penalty: null
133
+ extra_augmentation_config: null
134
+ finetuning_task: null
135
+ forced_bos_token_id: null
136
+ forced_eos_token_id: null
137
+ id2label:
138
+ 0: LABEL_0
139
+ 1: LABEL_1
140
+ is_decoder: false
141
+ is_encoder_decoder: false
142
+ label2id:
143
+ LABEL_0: 0
144
+ LABEL_1: 1
145
+ length_penalty: 1.0
146
+ load_bf16: false
147
+ max_length: 20
148
+ min_length: 0
149
+ model_name: nvidia/Eagle-Block2A-2B-v2
150
+ no_repeat_ngram_size: 0
151
+ num_beam_groups: 1
152
+ num_beams: 1
153
+ num_return_sequences: 1
154
+ output_attentions: false
155
+ output_hidden_states: false
156
+ output_scores: false
157
+ pad_token_id: null
158
+ prefix: null
159
+ problem_type: null
160
+ pruned_heads: {}
161
+ random_rotation_angle: null
162
+ remove_invalid_values: false
163
+ repetition_penalty: 1.0
164
+ reproject_vision: false
165
+ return_dict: true
166
+ return_dict_in_generate: false
167
+ sep_token_id: null
168
+ state_dropout_prob: 0.0
169
+ suppress_tokens: null
170
+ task_specific_params: null
171
+ temperature: 1.0
172
+ tf_legacy_loss: false
173
+ tie_encoder_decoder: false
174
+ tie_word_embeddings: true
175
+ tokenizer_class: null
176
+ top_k: 50
177
+ top_p: 1.0
178
+ torch_dtype: null
179
+ torchscript: false
180
+ transformers_version: null
181
+ tune_diffusion_model: true
182
+ tune_llm: false
183
+ tune_projector: true
184
+ tune_visual: false
185
+ typical_p: 1.0
186
+ use_bfloat16: false
187
+ use_relative_action: true
188
+ training: !!python/object:gr00t.configs.training.training_config.TrainingConfig
189
+ add_rl_callback: false
190
+ assert_loss_less_than: null
191
+ batch_size: null
192
+ bf16: true
193
+ dataloader_num_workers: 8
194
+ ddp_bucket_cap_mb: 100
195
+ deepspeed_stage: 2
196
+ enable_open_loop_eval: false
197
+ enable_profiling: false
198
+ eval_batch_size: 2
199
+ eval_bf16: true
200
+ eval_set_split_ratio: 0.1
201
+ eval_steps: 500
202
+ eval_strategy: 'no'
203
+ experiment_name: null
204
+ fp16: false
205
+ global_batch_size: 48
206
+ gradient_accumulation_steps: 1
207
+ gradient_checkpointing: false
208
+ learning_rate: 0.0001
209
+ logging_steps: 10
210
+ lr_scheduler_type: cosine
211
+ max_concurrent_uploads: 2
212
+ max_grad_norm: 1.0
213
+ max_retries: 3
214
+ max_steps: 200
215
+ num_gpus: 1
216
+ open_loop_eval_plot_indices: null
217
+ open_loop_eval_steps_per_traj: 100
218
+ open_loop_eval_traj_ids:
219
+ - 0
220
+ optim: adamw_torch
221
+ output_dir: /data/checkpoints/TestModel4
222
+ remove_unused_columns: false
223
+ save_best_eval_metric_greater_is_better: true
224
+ save_best_eval_metric_name: ''
225
+ save_steps: 100
226
+ save_total_limit: 10
227
+ save_vl_model: false
228
+ start_from_checkpoint: nvidia/GR00T-N1.6-3B
229
+ tf32: true
230
+ transformers_access_token: null
231
+ transformers_cache_dir: null
232
+ transformers_local_files_only: false
233
+ transformers_trust_remote_code: true
234
+ upload_checkpoints: false
235
+ upload_every: 1000
236
+ upload_last_n_checkpoints: 5
237
+ use_ddp: false
238
+ use_wandb: false
239
+ wandb_project: finetune-gr00t-n1d6
240
+ warmup_ratio: 0.05
241
+ warmup_steps: 0
242
+ weight_decay: 1.0e-05
experiment_cfg/dataset_statistics.json ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "state": {
4
+ "arm_left": {
5
+ "min": [
6
+ 0.5357068181037903,
7
+ 0.09819874167442322,
8
+ -0.005884254351258278,
9
+ -1.9916343688964844,
10
+ 0.171865776181221,
11
+ -0.7192093133926392,
12
+ -0.2347273975610733,
13
+ 0.1835404485464096
14
+ ],
15
+ "max": [
16
+ 0.8060949444770813,
17
+ 0.14419420063495636,
18
+ 0.1393405795097351,
19
+ -1.6996147632598877,
20
+ 0.36047351360321045,
21
+ -0.3933342397212982,
22
+ -0.17479148507118225,
23
+ 0.1919773519039154
24
+ ],
25
+ "mean": [
26
+ 0.647490382194519,
27
+ 0.11591889709234238,
28
+ 0.05527754873037338,
29
+ -1.8612351417541504,
30
+ 0.2748093605041504,
31
+ -0.5980395078659058,
32
+ -0.20959335565567017,
33
+ 0.1872807741165161
34
+ ],
35
+ "std": [
36
+ 0.07161965221166611,
37
+ 0.01286551449447867,
38
+ 0.02708311937749386,
39
+ 0.07693036645650828,
40
+ 0.04468376561999321,
41
+ 0.08223182708024979,
42
+ 0.020106619223952207,
43
+ 0.0019493200816217977
44
+ ],
45
+ "q01": [
46
+ 0.542970244884491,
47
+ 0.09969677031040192,
48
+ 0.0053154832031577825,
49
+ -1.9704224967956543,
50
+ 0.21393687039613724,
51
+ -0.7124262452125549,
52
+ -0.23469635844230652,
53
+ 0.18635275959968567
54
+ ],
55
+ "q99": [
56
+ 0.7976319086551666,
57
+ 0.1417374312877655,
58
+ 0.1274196347594261,
59
+ -1.710426688194275,
60
+ 0.35903324604034426,
61
+ -0.396414190530777,
62
+ -0.17485354840755463,
63
+ 0.1919773519039154
64
+ ]
65
+ },
66
+ "arm_right": {
67
+ "min": [
68
+ -0.12207131832838058,
69
+ -0.35657861828804016,
70
+ -0.4954638183116913,
71
+ -2.450582265853882,
72
+ 0.8439650535583496,
73
+ -0.33980071544647217,
74
+ -0.9373581409454346,
75
+ 0.18072815239429474
76
+ ],
77
+ "max": [
78
+ 0.31729432940483093,
79
+ -0.019150791689753532,
80
+ 0.5513582229614258,
81
+ -2.1091995239257812,
82
+ 1.5962507724761963,
83
+ 0.3736201822757721,
84
+ -0.6251019239425659,
85
+ 0.20322653651237488
86
+ ],
87
+ "mean": [
88
+ 0.07138513773679733,
89
+ -0.16919372975826263,
90
+ 0.0536830797791481,
91
+ -2.2932255268096924,
92
+ 1.2343531847000122,
93
+ 0.08430171012878418,
94
+ -0.763832688331604,
95
+ 0.19362585246562958
96
+ ],
97
+ "std": [
98
+ 0.06878731399774551,
99
+ 0.06808101385831833,
100
+ 0.23403118550777435,
101
+ 0.0615885853767395,
102
+ 0.17844322323799133,
103
+ 0.10313137620687485,
104
+ 0.05072656273841858,
105
+ 0.005280985496938146
106
+ ],
107
+ "q01": [
108
+ -0.07233462154865265,
109
+ -0.31898770451545716,
110
+ -0.37645135819911957,
111
+ -2.429511785507202,
112
+ 0.9102486312389374,
113
+ -0.21467964828014374,
114
+ -0.899279260635376,
115
+ 0.18072815239429474
116
+ ],
117
+ "q99": [
118
+ 0.22330133676528924,
119
+ -0.05408480763435364,
120
+ 0.48604661107063274,
121
+ -2.1352156257629393,
122
+ 1.5619227027893066,
123
+ 0.2997923380136489,
124
+ -0.6622847545146943,
125
+ 0.20041424036026
126
+ ]
127
+ }
128
+ },
129
+ "action": {
130
+ "arm_left": {
131
+ "min": [
132
+ 0.5353593230247498,
133
+ 0.09817477315664291,
134
+ -0.006135923322290182,
135
+ -1.9926410913467407,
136
+ 0.16260196268558502,
137
+ -0.7185632586479187,
138
+ -0.2346990555524826,
139
+ 0.12024854868650436
140
+ ],
141
+ "max": [
142
+ 0.8068739175796509,
143
+ 0.14419420063495636,
144
+ 0.139592245221138,
145
+ -1.699650764465332,
146
+ 0.3604854941368103,
147
+ -0.39335930347442627,
148
+ -0.17487381398677826,
149
+ 0.1303728222846985
150
+ ],
151
+ "mean": [
152
+ 0.647443413734436,
153
+ 0.11592874675989151,
154
+ 0.05530872195959091,
155
+ -1.861175298690796,
156
+ 0.274822860956192,
157
+ -0.5980128645896912,
158
+ -0.20963473618030548,
159
+ 0.12473082542419434
160
+ ],
161
+ "std": [
162
+ 0.07161091268062592,
163
+ 0.012873547151684761,
164
+ 0.02710757777094841,
165
+ 0.07696112245321238,
166
+ 0.04471345245838165,
167
+ 0.08225028216838837,
168
+ 0.020105436444282532,
169
+ 0.0023378378245981287
170
+ ],
171
+ "q01": [
172
+ 0.5430291891098022,
173
+ 0.09970875084400177,
174
+ 0.004601942375302315,
175
+ -1.9711652994155884,
176
+ 0.21322332322597504,
177
+ -0.7124273180961609,
178
+ -0.2346990555524826,
179
+ 0.1236233040690422
180
+ ],
181
+ "q99": [
182
+ 0.7976700067520142,
183
+ 0.14112623035907745,
184
+ 0.12732040882110596,
185
+ -1.7103885412216187,
186
+ 0.35895150899887085,
187
+ -0.3964272737503052,
188
+ -0.17487381398677826,
189
+ 0.1303728222846985
190
+ ]
191
+ },
192
+ "arm_right": {
193
+ "min": [
194
+ -0.1257864236831665,
195
+ -0.3604854941368103,
196
+ -0.5062136650085449,
197
+ -2.451301336288452,
198
+ 0.8375535011291504,
199
+ -0.3473398983478546,
200
+ -0.9418641924858093,
201
+ 0.11687378585338593
202
+ ],
203
+ "max": [
204
+ 0.3206019699573517,
205
+ -0.016873788088560104,
206
+ 0.558368980884552,
207
+ -2.104621648788452,
208
+ 1.6214176416397095,
209
+ 0.3766990303993225,
210
+ -0.6258641481399536,
211
+ 0.14387184381484985
212
+ ],
213
+ "mean": [
214
+ 0.0714845135807991,
215
+ -0.16912876069545746,
216
+ 0.053440161049366,
217
+ -2.2931737899780273,
218
+ 1.2342159748077393,
219
+ 0.08426135778427124,
220
+ -0.7638657689094543,
221
+ 0.1323314756155014
222
+ ],
223
+ "std": [
224
+ 0.06979027390480042,
225
+ 0.07009217888116837,
226
+ 0.24079644680023193,
227
+ 0.062028586864471436,
228
+ 0.18407252430915833,
229
+ 0.10534369945526123,
230
+ 0.05093063414096832,
231
+ 0.006346254609525135
232
+ ],
233
+ "q01": [
234
+ -0.0746128237247467,
235
+ -0.32032585799694063,
236
+ -0.3850291669368744,
237
+ -2.4310834121704104,
238
+ 0.9007228302955628,
239
+ -0.20928162336349487,
240
+ -0.901704580783844,
241
+ 0.11687378585338593
242
+ ],
243
+ "q99": [
244
+ 0.22702915966510773,
245
+ -0.05062136426568031,
246
+ 0.49519968152046157,
247
+ -2.1337673664093018,
248
+ 1.5677284002304077,
249
+ 0.3055836880207053,
250
+ -0.6626796722412109,
251
+ 0.140497088432312
252
+ ]
253
+ }
254
+ },
255
+ "relative_action": {}
256
+ }
257
+ }
experiment_cfg/final_model_config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "Gr00tN1d6",
3
+ "model_dtype": "bfloat16",
4
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
5
+ "backbone_model_type": "eagle",
6
+ "model_revision": null,
7
+ "tune_top_llm_layers": 4,
8
+ "backbone_embedding_dim": 2048,
9
+ "tune_llm": false,
10
+ "tune_visual": false,
11
+ "select_layer": 16,
12
+ "reproject_vision": false,
13
+ "use_flash_attention": true,
14
+ "load_bf16": true,
15
+ "collator_overwrite_image_inputs": false,
16
+ "eagle_collator": true,
17
+ "backbone_trainable_params_fp32": true,
18
+ "extra_augmentation_config": null,
19
+ "apply_sincos_state_encoding": true,
20
+ "use_relative_action": true,
21
+ "max_state_dim": 128,
22
+ "max_action_dim": 128,
23
+ "action_horizon": 50,
24
+ "hidden_size": 1024,
25
+ "input_embedding_dim": 1536,
26
+ "add_pos_embed": true,
27
+ "attn_dropout": 0.2,
28
+ "use_vlln": true,
29
+ "max_seq_len": 1024,
30
+ "use_alternate_vl_dit": true,
31
+ "attend_text_every_n_blocks": 2,
32
+ "diffusion_model_cfg": {
33
+ "attention_head_dim": 48,
34
+ "dropout": 0.2,
35
+ "final_dropout": true,
36
+ "interleave_self_attention": true,
37
+ "norm_type": "ada_norm",
38
+ "num_attention_heads": 32,
39
+ "num_layers": 32,
40
+ "output_dim": 1024,
41
+ "positional_embeddings": null
42
+ },
43
+ "num_inference_timesteps": 4,
44
+ "noise_beta_alpha": 1.5,
45
+ "noise_beta_beta": 1.0,
46
+ "noise_s": 0.999,
47
+ "num_timestep_buckets": 1000,
48
+ "tune_projector": true,
49
+ "tune_diffusion_model": true,
50
+ "tune_vlln": true,
51
+ "state_dropout_prob": 0.0,
52
+ "state_additive_noise_scale": 0.0,
53
+ "max_num_embodiments": 32
54
+ }
experiment_cfg/final_processor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49c72b22bcc7e7bf23d62d89f589c6dde1b3d3e2b78bdebe42815c856bcc236d
3
+ size 4990120184
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edc430476f0dfe396a7097de20e031448bac1c379a3b0ecda50cf0b831d8db54
3
+ size 4823190320
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
processor_config.json ADDED
@@ -0,0 +1,454 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processor_class": "Gr00tN1d6Processor",
3
+ "processor_kwargs": {
4
+ "modality_configs": {
5
+ "behavior_r1_pro": {
6
+ "video": {
7
+ "delta_indices": [
8
+ 0
9
+ ],
10
+ "modality_keys": [
11
+ "observation.images.rgb.head_256_256",
12
+ "observation.images.rgb.left_wrist_256_256",
13
+ "observation.images.rgb.right_wrist_256_256"
14
+ ],
15
+ "sin_cos_embedding_keys": null,
16
+ "mean_std_embedding_keys": null,
17
+ "action_configs": null
18
+ },
19
+ "state": {
20
+ "delta_indices": [
21
+ 0
22
+ ],
23
+ "modality_keys": [
24
+ "robot_pos",
25
+ "robot_ori_cos",
26
+ "robot_ori_sin",
27
+ "robot_2d_ori",
28
+ "robot_2d_ori_cos",
29
+ "robot_2d_ori_sin",
30
+ "robot_lin_vel",
31
+ "robot_ang_vel",
32
+ "arm_left_qpos",
33
+ "arm_left_qpos_sin",
34
+ "arm_left_qpos_cos",
35
+ "eef_left_pos",
36
+ "eef_left_quat",
37
+ "gripper_left_qpos",
38
+ "arm_right_qpos",
39
+ "arm_right_qpos_sin",
40
+ "arm_right_qpos_cos",
41
+ "eef_right_pos",
42
+ "eef_right_quat",
43
+ "gripper_right_qpos",
44
+ "trunk_qpos"
45
+ ],
46
+ "sin_cos_embedding_keys": null,
47
+ "mean_std_embedding_keys": null,
48
+ "action_configs": null
49
+ },
50
+ "action": {
51
+ "delta_indices": [
52
+ 0,
53
+ 1,
54
+ 2,
55
+ 3,
56
+ 4,
57
+ 5,
58
+ 6,
59
+ 7,
60
+ 8,
61
+ 9,
62
+ 10,
63
+ 11,
64
+ 12,
65
+ 13,
66
+ 14,
67
+ 15,
68
+ 16,
69
+ 17,
70
+ 18,
71
+ 19,
72
+ 20,
73
+ 21,
74
+ 22,
75
+ 23,
76
+ 24,
77
+ 25,
78
+ 26,
79
+ 27,
80
+ 28,
81
+ 29,
82
+ 30,
83
+ 31
84
+ ],
85
+ "modality_keys": [
86
+ "base",
87
+ "torso",
88
+ "left_arm",
89
+ "left_gripper",
90
+ "right_arm",
91
+ "right_gripper"
92
+ ],
93
+ "sin_cos_embedding_keys": null,
94
+ "mean_std_embedding_keys": null,
95
+ "action_configs": [
96
+ {
97
+ "rep": "ABSOLUTE",
98
+ "type": "NON_EEF",
99
+ "format": "DEFAULT",
100
+ "state_key": null
101
+ },
102
+ {
103
+ "rep": "RELATIVE",
104
+ "type": "NON_EEF",
105
+ "format": "DEFAULT",
106
+ "state_key": "trunk_qpos"
107
+ },
108
+ {
109
+ "rep": "RELATIVE",
110
+ "type": "NON_EEF",
111
+ "format": "DEFAULT",
112
+ "state_key": "arm_left_qpos"
113
+ },
114
+ {
115
+ "rep": "ABSOLUTE",
116
+ "type": "NON_EEF",
117
+ "format": "DEFAULT",
118
+ "state_key": null
119
+ },
120
+ {
121
+ "rep": "RELATIVE",
122
+ "type": "NON_EEF",
123
+ "format": "DEFAULT",
124
+ "state_key": "arm_right_qpos"
125
+ },
126
+ {
127
+ "rep": "ABSOLUTE",
128
+ "type": "NON_EEF",
129
+ "format": "DEFAULT",
130
+ "state_key": null
131
+ }
132
+ ]
133
+ },
134
+ "language": {
135
+ "delta_indices": [
136
+ 0
137
+ ],
138
+ "modality_keys": [
139
+ "annotation.human.coarse_action"
140
+ ],
141
+ "sin_cos_embedding_keys": null,
142
+ "mean_std_embedding_keys": null,
143
+ "action_configs": null
144
+ }
145
+ },
146
+ "gr1": {
147
+ "video": {
148
+ "delta_indices": [
149
+ 0
150
+ ],
151
+ "modality_keys": [
152
+ "ego_view_bg_crop_pad_res256_freq20"
153
+ ],
154
+ "sin_cos_embedding_keys": null,
155
+ "mean_std_embedding_keys": null,
156
+ "action_configs": null
157
+ },
158
+ "state": {
159
+ "delta_indices": [
160
+ 0
161
+ ],
162
+ "modality_keys": [
163
+ "left_arm",
164
+ "right_arm",
165
+ "left_hand",
166
+ "right_hand",
167
+ "waist"
168
+ ],
169
+ "sin_cos_embedding_keys": [
170
+ "left_arm",
171
+ "right_arm",
172
+ "left_hand",
173
+ "right_hand",
174
+ "waist"
175
+ ],
176
+ "mean_std_embedding_keys": null,
177
+ "action_configs": null
178
+ },
179
+ "action": {
180
+ "delta_indices": [
181
+ 0,
182
+ 1,
183
+ 2,
184
+ 3,
185
+ 4,
186
+ 5,
187
+ 6,
188
+ 7,
189
+ 8,
190
+ 9,
191
+ 10,
192
+ 11,
193
+ 12,
194
+ 13,
195
+ 14,
196
+ 15
197
+ ],
198
+ "modality_keys": [
199
+ "left_arm",
200
+ "right_arm",
201
+ "left_hand",
202
+ "right_hand",
203
+ "waist"
204
+ ],
205
+ "sin_cos_embedding_keys": null,
206
+ "mean_std_embedding_keys": null,
207
+ "action_configs": [
208
+ {
209
+ "rep": "RELATIVE",
210
+ "type": "NON_EEF",
211
+ "format": "DEFAULT",
212
+ "state_key": null
213
+ },
214
+ {
215
+ "rep": "RELATIVE",
216
+ "type": "NON_EEF",
217
+ "format": "DEFAULT",
218
+ "state_key": null
219
+ },
220
+ {
221
+ "rep": "RELATIVE",
222
+ "type": "NON_EEF",
223
+ "format": "DEFAULT",
224
+ "state_key": null
225
+ },
226
+ {
227
+ "rep": "RELATIVE",
228
+ "type": "NON_EEF",
229
+ "format": "DEFAULT",
230
+ "state_key": null
231
+ },
232
+ {
233
+ "rep": "ABSOLUTE",
234
+ "type": "NON_EEF",
235
+ "format": "DEFAULT",
236
+ "state_key": null
237
+ }
238
+ ]
239
+ },
240
+ "language": {
241
+ "delta_indices": [
242
+ 0
243
+ ],
244
+ "modality_keys": [
245
+ "task"
246
+ ],
247
+ "sin_cos_embedding_keys": null,
248
+ "mean_std_embedding_keys": null,
249
+ "action_configs": null
250
+ }
251
+ },
252
+ "robocasa_panda_omron": {
253
+ "video": {
254
+ "delta_indices": [
255
+ 0
256
+ ],
257
+ "modality_keys": [
258
+ "res256_image_side_0",
259
+ "res256_image_side_1",
260
+ "res256_image_wrist_0"
261
+ ],
262
+ "sin_cos_embedding_keys": null,
263
+ "mean_std_embedding_keys": null,
264
+ "action_configs": null
265
+ },
266
+ "state": {
267
+ "delta_indices": [
268
+ 0
269
+ ],
270
+ "modality_keys": [
271
+ "end_effector_position_relative",
272
+ "end_effector_rotation_relative",
273
+ "gripper_qpos",
274
+ "base_position",
275
+ "base_rotation"
276
+ ],
277
+ "sin_cos_embedding_keys": null,
278
+ "mean_std_embedding_keys": null,
279
+ "action_configs": null
280
+ },
281
+ "action": {
282
+ "delta_indices": [
283
+ 0,
284
+ 1,
285
+ 2,
286
+ 3,
287
+ 4,
288
+ 5,
289
+ 6,
290
+ 7,
291
+ 8,
292
+ 9,
293
+ 10,
294
+ 11,
295
+ 12,
296
+ 13,
297
+ 14,
298
+ 15
299
+ ],
300
+ "modality_keys": [
301
+ "end_effector_position",
302
+ "end_effector_rotation",
303
+ "gripper_close",
304
+ "base_motion",
305
+ "control_mode"
306
+ ],
307
+ "sin_cos_embedding_keys": null,
308
+ "mean_std_embedding_keys": null,
309
+ "action_configs": [
310
+ {
311
+ "rep": "ABSOLUTE",
312
+ "type": "NON_EEF",
313
+ "format": "DEFAULT",
314
+ "state_key": null
315
+ },
316
+ {
317
+ "rep": "ABSOLUTE",
318
+ "type": "NON_EEF",
319
+ "format": "DEFAULT",
320
+ "state_key": null
321
+ },
322
+ {
323
+ "rep": "ABSOLUTE",
324
+ "type": "NON_EEF",
325
+ "format": "DEFAULT",
326
+ "state_key": null
327
+ },
328
+ {
329
+ "rep": "ABSOLUTE",
330
+ "type": "NON_EEF",
331
+ "format": "DEFAULT",
332
+ "state_key": null
333
+ },
334
+ {
335
+ "rep": "ABSOLUTE",
336
+ "type": "NON_EEF",
337
+ "format": "DEFAULT",
338
+ "state_key": null
339
+ }
340
+ ]
341
+ },
342
+ "language": {
343
+ "delta_indices": [
344
+ 0
345
+ ],
346
+ "modality_keys": [
347
+ "annotation.human.action.task_description"
348
+ ],
349
+ "sin_cos_embedding_keys": null,
350
+ "mean_std_embedding_keys": null,
351
+ "action_configs": null
352
+ }
353
+ },
354
+ "new_embodiment": {
355
+ "video": {
356
+ "delta_indices": [
357
+ 0
358
+ ],
359
+ "modality_keys": [
360
+ "cam_left_head"
361
+ ],
362
+ "sin_cos_embedding_keys": null,
363
+ "mean_std_embedding_keys": null,
364
+ "action_configs": null
365
+ },
366
+ "state": {
367
+ "delta_indices": [
368
+ 0
369
+ ],
370
+ "modality_keys": [
371
+ "arm_left",
372
+ "arm_right"
373
+ ],
374
+ "sin_cos_embedding_keys": null,
375
+ "mean_std_embedding_keys": null,
376
+ "action_configs": null
377
+ },
378
+ "action": {
379
+ "delta_indices": [
380
+ 0,
381
+ 1,
382
+ 2,
383
+ 3,
384
+ 4,
385
+ 5,
386
+ 6,
387
+ 7,
388
+ 8,
389
+ 9,
390
+ 10,
391
+ 11,
392
+ 12,
393
+ 13,
394
+ 14,
395
+ 15
396
+ ],
397
+ "modality_keys": [
398
+ "arm_left",
399
+ "arm_right"
400
+ ],
401
+ "sin_cos_embedding_keys": null,
402
+ "mean_std_embedding_keys": null,
403
+ "action_configs": [
404
+ {
405
+ "rep": "ABSOLUTE",
406
+ "type": "NON_EEF",
407
+ "format": "DEFAULT",
408
+ "state_key": null
409
+ },
410
+ {
411
+ "rep": "ABSOLUTE",
412
+ "type": "NON_EEF",
413
+ "format": "DEFAULT",
414
+ "state_key": null
415
+ }
416
+ ]
417
+ },
418
+ "language": {
419
+ "delta_indices": [
420
+ 0
421
+ ],
422
+ "modality_keys": [
423
+ "annotation.human.task_description"
424
+ ],
425
+ "sin_cos_embedding_keys": null,
426
+ "mean_std_embedding_keys": null,
427
+ "action_configs": null
428
+ }
429
+ }
430
+ },
431
+ "image_crop_size": null,
432
+ "image_target_size": null,
433
+ "use_albumentations": true,
434
+ "random_rotation_angle": null,
435
+ "color_jitter_params": {
436
+ "brightness": 0.3,
437
+ "contrast": 0.4,
438
+ "saturation": 0.5,
439
+ "hue": 0.08
440
+ },
441
+ "shortest_image_edge": 256,
442
+ "crop_fraction": 0.95,
443
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
444
+ "model_type": "eagle",
445
+ "formalize_language": true,
446
+ "max_state_dim": 128,
447
+ "max_action_dim": 128,
448
+ "max_action_horizon": 50,
449
+ "use_percentiles": false,
450
+ "clip_outliers": true,
451
+ "apply_sincos_state_encoding": true,
452
+ "use_relative_action": true
453
+ }
454
+ }
statistics.json ADDED
The diff for this file is too large to render. See raw diff
 
trainer_state.json ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.5,
6
+ "eval_steps": 500,
7
+ "global_step": 200,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "grad_norm": 0.38351285457611084,
14
+ "learning_rate": 9.956320346634876e-05,
15
+ "loss": 1.2186,
16
+ "step": 10
17
+ },
18
+ {
19
+ "grad_norm": 0.453412801027298,
20
+ "learning_rate": 9.473646649103818e-05,
21
+ "loss": 1.176,
22
+ "step": 20
23
+ },
24
+ {
25
+ "grad_norm": 0.3962445557117462,
26
+ "learning_rate": 8.506183921362443e-05,
27
+ "loss": 1.1279,
28
+ "step": 30
29
+ },
30
+ {
31
+ "grad_norm": 0.33287128806114197,
32
+ "learning_rate": 7.158771761692464e-05,
33
+ "loss": 1.0833,
34
+ "step": 40
35
+ },
36
+ {
37
+ "grad_norm": 0.34996676445007324,
38
+ "learning_rate": 5.577423184847932e-05,
39
+ "loss": 1.0655,
40
+ "step": 50
41
+ },
42
+ {
43
+ "grad_norm": 0.33573195338249207,
44
+ "learning_rate": 3.933501846281267e-05,
45
+ "loss": 1.0523,
46
+ "step": 60
47
+ },
48
+ {
49
+ "grad_norm": 0.36666247248649597,
50
+ "learning_rate": 2.405152131093926e-05,
51
+ "loss": 1.0321,
52
+ "step": 70
53
+ },
54
+ {
55
+ "grad_norm": 0.39881908893585205,
56
+ "learning_rate": 1.157994445715706e-05,
57
+ "loss": 1.0118,
58
+ "step": 80
59
+ },
60
+ {
61
+ "grad_norm": 0.40295055508613586,
62
+ "learning_rate": 3.271776770026963e-06,
63
+ "loss": 1.0024,
64
+ "step": 90
65
+ },
66
+ {
67
+ "grad_norm": 0.4325471818447113,
68
+ "learning_rate": 2.7337132953697554e-08,
69
+ "loss": 1.0113,
70
+ "step": 100
71
+ },
72
+ {
73
+ "grad_norm": 0.39502644538879395,
74
+ "learning_rate": 4.669547078371504e-05,
75
+ "loss": 1.0233,
76
+ "step": 110
77
+ },
78
+ {
79
+ "grad_norm": 0.9289461970329285,
80
+ "learning_rate": 3.852880399766243e-05,
81
+ "loss": 0.9895,
82
+ "step": 120
83
+ },
84
+ {
85
+ "grad_norm": 0.8774065375328064,
86
+ "learning_rate": 3.0675041535377405e-05,
87
+ "loss": 0.9202,
88
+ "step": 130
89
+ },
90
+ {
91
+ "grad_norm": 1.0103175640106201,
92
+ "learning_rate": 2.3348413563600325e-05,
93
+ "loss": 0.8666,
94
+ "step": 140
95
+ },
96
+ {
97
+ "grad_norm": 1.0156214237213135,
98
+ "learning_rate": 1.6748771394307585e-05,
99
+ "loss": 0.7999,
100
+ "step": 150
101
+ },
102
+ {
103
+ "grad_norm": 0.923975944519043,
104
+ "learning_rate": 1.1056136061894384e-05,
105
+ "loss": 0.7333,
106
+ "step": 160
107
+ },
108
+ {
109
+ "grad_norm": 1.110439658164978,
110
+ "learning_rate": 6.425787818636131e-06,
111
+ "loss": 0.6969,
112
+ "step": 170
113
+ },
114
+ {
115
+ "grad_norm": 0.8177175521850586,
116
+ "learning_rate": 2.9840304941919415e-06,
117
+ "loss": 0.6798,
118
+ "step": 180
119
+ },
120
+ {
121
+ "grad_norm": 0.762315571308136,
122
+ "learning_rate": 8.247462563808817e-07,
123
+ "loss": 0.6679,
124
+ "step": 190
125
+ },
126
+ {
127
+ "grad_norm": 0.8166346549987793,
128
+ "learning_rate": 6.834750376549792e-09,
129
+ "loss": 0.6646,
130
+ "step": 200
131
+ }
132
+ ],
133
+ "logging_steps": 10,
134
+ "max_steps": 200,
135
+ "num_input_tokens_seen": 0,
136
+ "num_train_epochs": 9223372036854775807,
137
+ "save_steps": 100,
138
+ "stateful_callbacks": {
139
+ "TrainerControl": {
140
+ "args": {
141
+ "should_epoch_stop": false,
142
+ "should_evaluate": false,
143
+ "should_log": false,
144
+ "should_save": true,
145
+ "should_training_stop": true
146
+ },
147
+ "attributes": {}
148
+ }
149
+ },
150
+ "total_flos": 0.0,
151
+ "train_batch_size": 48,
152
+ "trial_name": null,
153
+ "trial_params": null
154
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e468f995e72c66264a30c3d2737be2853d368278cba7aeb88c6f4203412707ae
3
+ size 5713
wandb_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"project": "finetune-gr00t-n1d6", "run_id": "TestModel4"}