azazdeaz commited on
Commit
20de9e0
·
verified ·
1 Parent(s): 772bc14

Upload folder using huggingface_hub

Browse files
Files changed (45) hide show
  1. checkpoint-1000/config.json +70 -0
  2. checkpoint-1000/embodiment_id.json +10 -0
  3. checkpoint-1000/experiment_cfg/conf.yaml +209 -0
  4. checkpoint-1000/experiment_cfg/config.yaml +243 -0
  5. checkpoint-1000/experiment_cfg/dataset_statistics.json +824 -0
  6. checkpoint-1000/experiment_cfg/final_model_config.json +53 -0
  7. checkpoint-1000/experiment_cfg/final_processor_config.json +0 -0
  8. checkpoint-1000/model-00001-of-00002.safetensors +3 -0
  9. checkpoint-1000/model-00002-of-00002.safetensors +3 -0
  10. checkpoint-1000/model.safetensors.index.json +0 -0
  11. checkpoint-1000/optimizer.pt +3 -0
  12. checkpoint-1000/processor_config.json +455 -0
  13. checkpoint-1000/rng_state.pth +3 -0
  14. checkpoint-1000/scheduler.pt +3 -0
  15. checkpoint-1000/statistics.json +0 -0
  16. checkpoint-1000/trainer_state.json +634 -0
  17. checkpoint-1000/training_args.bin +3 -0
  18. checkpoint-1000/wandb_config.json +1 -0
  19. checkpoint-2000/config.json +70 -0
  20. checkpoint-2000/embodiment_id.json +10 -0
  21. checkpoint-2000/experiment_cfg/conf.yaml +209 -0
  22. checkpoint-2000/experiment_cfg/config.yaml +243 -0
  23. checkpoint-2000/experiment_cfg/dataset_statistics.json +824 -0
  24. checkpoint-2000/experiment_cfg/final_model_config.json +53 -0
  25. checkpoint-2000/experiment_cfg/final_processor_config.json +0 -0
  26. checkpoint-2000/model-00001-of-00002.safetensors +3 -0
  27. checkpoint-2000/model-00002-of-00002.safetensors +3 -0
  28. checkpoint-2000/model.safetensors.index.json +0 -0
  29. checkpoint-2000/optimizer.pt +3 -0
  30. checkpoint-2000/processor_config.json +455 -0
  31. checkpoint-2000/rng_state.pth +3 -0
  32. checkpoint-2000/scheduler.pt +3 -0
  33. checkpoint-2000/statistics.json +0 -0
  34. checkpoint-2000/trainer_state.json +1234 -0
  35. checkpoint-2000/training_args.bin +3 -0
  36. checkpoint-2000/wandb_config.json +1 -0
  37. experiment_cfg/conf.yaml +209 -0
  38. experiment_cfg/config.yaml +243 -0
  39. experiment_cfg/dataset_statistics.json +824 -0
  40. experiment_cfg/final_model_config.json +53 -0
  41. experiment_cfg/final_processor_config.json +0 -0
  42. processor/embodiment_id.json +10 -0
  43. processor/processor_config.json +455 -0
  44. processor/statistics.json +0 -0
  45. wandb_config.json +1 -0
checkpoint-1000/config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_horizon": 50,
3
+ "add_pos_embed": true,
4
+ "apply_sincos_state_encoding": true,
5
+ "architectures": [
6
+ "Gr00tN1d6"
7
+ ],
8
+ "attn_dropout": 0.2,
9
+ "attn_implementation": null,
10
+ "backbone_embedding_dim": 2048,
11
+ "backbone_model_type": "eagle",
12
+ "backbone_trainable_params_fp32": true,
13
+ "collator_overwrite_image_inputs": false,
14
+ "color_jitter_params": {
15
+ "brightness": 0.1,
16
+ "contrast": 0.1,
17
+ "hue": 0.1,
18
+ "saturation": 0.1
19
+ },
20
+ "crop_fraction": 0.95,
21
+ "diffusion_model_cfg": {
22
+ "attention_head_dim": 48,
23
+ "dropout": 0.2,
24
+ "final_dropout": true,
25
+ "interleave_self_attention": true,
26
+ "norm_type": "ada_norm",
27
+ "num_attention_heads": 32,
28
+ "num_layers": 32,
29
+ "output_dim": 1024,
30
+ "positional_embeddings": null
31
+ },
32
+ "eagle_collator": true,
33
+ "formalize_language": true,
34
+ "gemma_collator": false,
35
+ "hidden_size": 1024,
36
+ "image_crop_size": null,
37
+ "image_target_size": null,
38
+ "input_embedding_dim": 1536,
39
+ "load_bf16": true,
40
+ "max_action_dim": 128,
41
+ "max_num_embodiments": 32,
42
+ "max_seq_len": 1024,
43
+ "max_state_dim": 128,
44
+ "model_dtype": "bfloat16",
45
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
46
+ "model_type": "Gr00tN1d6",
47
+ "noise_beta_alpha": 1.5,
48
+ "noise_beta_beta": 1.0,
49
+ "noise_s": 0.999,
50
+ "num_inference_timesteps": 4,
51
+ "num_timestep_buckets": 1000,
52
+ "random_rotation_angle": null,
53
+ "reproject_vision": false,
54
+ "select_layer": 16,
55
+ "shortest_image_edge": 256,
56
+ "state_dropout_prob": 0.0,
57
+ "torch_dtype": "bfloat16",
58
+ "transformers_version": "4.51.3",
59
+ "tune_diffusion_model": true,
60
+ "tune_llm": false,
61
+ "tune_projector": true,
62
+ "tune_top_llm_layers": 4,
63
+ "tune_visual": false,
64
+ "tune_vlln": true,
65
+ "use_albumentations_transforms": true,
66
+ "use_alternate_vl_dit": true,
67
+ "use_flash_attention": true,
68
+ "use_relative_action": true,
69
+ "use_vlln": true
70
+ }
checkpoint-1000/embodiment_id.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "robocasa_panda_omron": 13,
3
+ "gr1": 20,
4
+ "behavior_r1_pro": 24,
5
+ "unitree_g1": 8,
6
+ "oxe_google": 0,
7
+ "oxe_widowx": 1,
8
+ "libero_panda": 2,
9
+ "new_embodiment": 10
10
+ }
checkpoint-1000/experiment_cfg/conf.yaml ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ load_config_path: null
2
+ model:
3
+ model_type: Gr00tN1d6
4
+ model_dtype: bfloat16
5
+ model_name: nvidia/Eagle-Block2A-2B-v2
6
+ backbone_model_type: eagle
7
+ model_revision: null
8
+ tune_top_llm_layers: 4
9
+ backbone_embedding_dim: 2048
10
+ tune_llm: false
11
+ tune_visual: false
12
+ select_layer: 16
13
+ reproject_vision: false
14
+ use_flash_attention: true
15
+ load_bf16: false
16
+ collator_overwrite_image_inputs: false
17
+ eagle_collator: true
18
+ backbone_trainable_params_fp32: true
19
+ image_crop_size: null
20
+ image_target_size: null
21
+ shortest_image_edge: 256
22
+ crop_fraction: 0.95
23
+ random_rotation_angle: null
24
+ color_jitter_params:
25
+ brightness: 0.3
26
+ contrast: 0.4
27
+ saturation: 0.5
28
+ hue: 0.08
29
+ use_albumentations_transforms: true
30
+ formalize_language: true
31
+ apply_sincos_state_encoding: false
32
+ use_relative_action: true
33
+ max_state_dim: 29
34
+ max_action_dim: 29
35
+ action_horizon: 16
36
+ hidden_size: 1024
37
+ input_embedding_dim: 1536
38
+ add_pos_embed: true
39
+ attn_dropout: 0.2
40
+ use_vlln: true
41
+ max_seq_len: 1024
42
+ use_alternate_vl_dit: true
43
+ attend_text_every_n_blocks: 2
44
+ diffusion_model_cfg:
45
+ positional_embeddings: null
46
+ num_layers: 32
47
+ num_attention_heads: 32
48
+ attention_head_dim: 48
49
+ norm_type: ada_norm
50
+ dropout: 0.2
51
+ final_dropout: true
52
+ output_dim: 1024
53
+ interleave_self_attention: true
54
+ num_inference_timesteps: 4
55
+ noise_beta_alpha: 1.5
56
+ noise_beta_beta: 1.0
57
+ noise_s: 0.999
58
+ num_timestep_buckets: 1000
59
+ tune_projector: true
60
+ tune_diffusion_model: true
61
+ tune_vlln: true
62
+ state_dropout_prob: 0.0
63
+ state_additive_noise_scale: 0.0
64
+ max_num_embodiments: 32
65
+ data:
66
+ datasets:
67
+ - dataset_paths:
68
+ - /content/dataset/azazdeaz/record-test/azazdeaz/record-test/
69
+ embodiment_tag: new_embodiment
70
+ mix_ratio: 1.0
71
+ dataset_type: physical_embodiment
72
+ val_dataset_path: null
73
+ modality_configs:
74
+ new_embodiment:
75
+ video:
76
+ delta_indices:
77
+ - 0
78
+ modality_keys:
79
+ - front
80
+ - wrist
81
+ sin_cos_embedding_keys: null
82
+ mean_std_embedding_keys: null
83
+ action_configs: null
84
+ state:
85
+ delta_indices:
86
+ - 0
87
+ modality_keys:
88
+ - single_arm
89
+ - gripper
90
+ sin_cos_embedding_keys: null
91
+ mean_std_embedding_keys: null
92
+ action_configs: null
93
+ action:
94
+ delta_indices:
95
+ - 0
96
+ - 1
97
+ - 2
98
+ - 3
99
+ - 4
100
+ - 5
101
+ - 6
102
+ - 7
103
+ - 8
104
+ - 9
105
+ - 10
106
+ - 11
107
+ - 12
108
+ - 13
109
+ - 14
110
+ - 15
111
+ modality_keys:
112
+ - single_arm
113
+ - gripper
114
+ sin_cos_embedding_keys: null
115
+ mean_std_embedding_keys: null
116
+ action_configs:
117
+ - rep: RELATIVE
118
+ type: NON_EEF
119
+ format: DEFAULT
120
+ state_key: null
121
+ - rep: ABSOLUTE
122
+ type: NON_EEF
123
+ format: DEFAULT
124
+ state_key: null
125
+ language:
126
+ delta_indices:
127
+ - 0
128
+ modality_keys:
129
+ - annotation.human.task_description
130
+ sin_cos_embedding_keys: null
131
+ mean_std_embedding_keys: null
132
+ action_configs: null
133
+ download_cache: false
134
+ shard_size: 1024
135
+ episode_sampling_rate: 0.1
136
+ num_shards_per_epoch: 100000
137
+ override_pretraining_statistics: false
138
+ mode: single_turn
139
+ random_chop: 0.0
140
+ mock_dataset_mode: false
141
+ shuffle: true
142
+ seed: 42
143
+ multiprocessing_context: fork
144
+ allow_padding: false
145
+ subsample_ratio: 1.0
146
+ image_crop_size:
147
+ - 244
148
+ - 244
149
+ image_target_size:
150
+ - 224
151
+ - 224
152
+ video_backend: torchcodec
153
+ training:
154
+ output_dir: /content/so100_finetune
155
+ experiment_name: null
156
+ max_steps: 10000
157
+ global_batch_size: 36
158
+ batch_size: null
159
+ gradient_accumulation_steps: 1
160
+ learning_rate: 0.0001
161
+ lr_scheduler_type: cosine
162
+ weight_decay: 1.0e-05
163
+ warmup_ratio: 0.05
164
+ warmup_steps: 0
165
+ max_grad_norm: 1.0
166
+ optim: adamw_torch
167
+ start_from_checkpoint: nvidia/GR00T-N1.6-3B
168
+ tf32: true
169
+ fp16: false
170
+ bf16: true
171
+ eval_bf16: true
172
+ logging_steps: 10
173
+ save_steps: 1000
174
+ save_total_limit: 5
175
+ save_vl_model: false
176
+ upload_checkpoints: false
177
+ upload_every: 1000
178
+ upload_last_n_checkpoints: 5
179
+ max_concurrent_uploads: 2
180
+ eval_strategy: 'no'
181
+ eval_steps: 500
182
+ eval_set_split_ratio: 0.1
183
+ eval_batch_size: 2
184
+ save_best_eval_metric_name: ''
185
+ save_best_eval_metric_greater_is_better: true
186
+ deepspeed_stage: 2
187
+ gradient_checkpointing: false
188
+ transformers_trust_remote_code: true
189
+ transformers_local_files_only: false
190
+ transformers_cache_dir: null
191
+ transformers_access_token: null
192
+ use_ddp: false
193
+ ddp_bucket_cap_mb: 100
194
+ num_gpus: 1
195
+ dataloader_num_workers: 4
196
+ remove_unused_columns: false
197
+ use_wandb: true
198
+ wandb_project: finetune-gr00t-n1d6
199
+ enable_profiling: false
200
+ max_retries: 3
201
+ assert_loss_less_than: null
202
+ add_rl_callback: false
203
+ enable_open_loop_eval: false
204
+ open_loop_eval_traj_ids:
205
+ - 0
206
+ open_loop_eval_steps_per_traj: 100
207
+ open_loop_eval_plot_indices: null
208
+ max_steps: 10000
209
+ save_steps: 1000
checkpoint-1000/experiment_cfg/config.yaml ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !!python/object:gr00t.configs.base_config.Config
2
+ data: !!python/object:gr00t.configs.data.data_config.DataConfig
3
+ allow_padding: false
4
+ datasets:
5
+ - !!python/object:gr00t.configs.data.data_config.SingleDatasetConfig
6
+ dataset_paths:
7
+ - /content/dataset/azazdeaz/record-test/azazdeaz/record-test/
8
+ dataset_type: physical_embodiment
9
+ embodiment_tag: new_embodiment
10
+ mix_ratio: 1.0
11
+ val_dataset_path: null
12
+ download_cache: false
13
+ episode_sampling_rate: 0.1
14
+ image_crop_size:
15
+ - 244
16
+ - 244
17
+ image_target_size:
18
+ - 224
19
+ - 224
20
+ mock_dataset_mode: false
21
+ modality_configs:
22
+ new_embodiment:
23
+ action: !!python/object:gr00t.data.types.ModalityConfig
24
+ action_configs:
25
+ - !!python/object:gr00t.data.types.ActionConfig
26
+ format: &id001 !!python/object/apply:gr00t.data.types.ActionFormat
27
+ - default
28
+ rep: !!python/object/apply:gr00t.data.types.ActionRepresentation
29
+ - relative
30
+ state_key: null
31
+ type: &id002 !!python/object/apply:gr00t.data.types.ActionType
32
+ - non_eef
33
+ - !!python/object:gr00t.data.types.ActionConfig
34
+ format: *id001
35
+ rep: !!python/object/apply:gr00t.data.types.ActionRepresentation
36
+ - absolute
37
+ state_key: null
38
+ type: *id002
39
+ delta_indices:
40
+ - 0
41
+ - 1
42
+ - 2
43
+ - 3
44
+ - 4
45
+ - 5
46
+ - 6
47
+ - 7
48
+ - 8
49
+ - 9
50
+ - 10
51
+ - 11
52
+ - 12
53
+ - 13
54
+ - 14
55
+ - 15
56
+ mean_std_embedding_keys: null
57
+ modality_keys:
58
+ - single_arm
59
+ - gripper
60
+ sin_cos_embedding_keys: null
61
+ language: !!python/object:gr00t.data.types.ModalityConfig
62
+ action_configs: null
63
+ delta_indices:
64
+ - 0
65
+ mean_std_embedding_keys: null
66
+ modality_keys:
67
+ - annotation.human.task_description
68
+ sin_cos_embedding_keys: null
69
+ state: !!python/object:gr00t.data.types.ModalityConfig
70
+ action_configs: null
71
+ delta_indices:
72
+ - 0
73
+ mean_std_embedding_keys: null
74
+ modality_keys:
75
+ - single_arm
76
+ - gripper
77
+ sin_cos_embedding_keys: null
78
+ video: !!python/object:gr00t.data.types.ModalityConfig
79
+ action_configs: null
80
+ delta_indices:
81
+ - 0
82
+ mean_std_embedding_keys: null
83
+ modality_keys:
84
+ - front
85
+ - wrist
86
+ sin_cos_embedding_keys: null
87
+ mode: single_turn
88
+ multiprocessing_context: fork
89
+ num_shards_per_epoch: 100000
90
+ override_pretraining_statistics: false
91
+ random_chop: 0.0
92
+ seed: 42
93
+ shard_size: 1024
94
+ shuffle: true
95
+ subsample_ratio: 1.0
96
+ video_backend: torchcodec
97
+ load_config_path: null
98
+ model: !!python/object:gr00t.configs.model.gr00t_n1d6.Gr00tN1d6Config
99
+ _attn_implementation_autoset: false
100
+ _attn_implementation_internal: null
101
+ _commit_hash: null
102
+ _name_or_path: ''
103
+ add_cross_attention: false
104
+ architectures: null
105
+ backbone_model_type: eagle
106
+ backbone_trainable_params_fp32: true
107
+ bad_words_ids: null
108
+ begin_suppress_tokens: null
109
+ bos_token_id: null
110
+ chunk_size_feed_forward: 0
111
+ color_jitter_params:
112
+ brightness: 0.3
113
+ contrast: 0.4
114
+ hue: 0.08
115
+ saturation: 0.5
116
+ cross_attention_hidden_size: null
117
+ decoder_start_token_id: null
118
+ diffusion_model_cfg:
119
+ attention_head_dim: 48
120
+ dropout: 0.2
121
+ final_dropout: true
122
+ interleave_self_attention: true
123
+ norm_type: ada_norm
124
+ num_attention_heads: 32
125
+ num_layers: 32
126
+ output_dim: 1024
127
+ positional_embeddings: null
128
+ diversity_penalty: 0.0
129
+ do_sample: false
130
+ eagle_collator: true
131
+ early_stopping: false
132
+ encoder_no_repeat_ngram_size: 0
133
+ eos_token_id: null
134
+ exponential_decay_length_penalty: null
135
+ finetuning_task: null
136
+ forced_bos_token_id: null
137
+ forced_eos_token_id: null
138
+ id2label:
139
+ 0: LABEL_0
140
+ 1: LABEL_1
141
+ is_decoder: false
142
+ is_encoder_decoder: false
143
+ label2id:
144
+ LABEL_0: 0
145
+ LABEL_1: 1
146
+ length_penalty: 1.0
147
+ load_bf16: false
148
+ max_length: 20
149
+ min_length: 0
150
+ model_name: nvidia/Eagle-Block2A-2B-v2
151
+ no_repeat_ngram_size: 0
152
+ num_beam_groups: 1
153
+ num_beams: 1
154
+ num_return_sequences: 1
155
+ output_attentions: false
156
+ output_hidden_states: false
157
+ output_scores: false
158
+ pad_token_id: null
159
+ prefix: null
160
+ problem_type: null
161
+ pruned_heads: {}
162
+ random_rotation_angle: null
163
+ remove_invalid_values: false
164
+ repetition_penalty: 1.0
165
+ reproject_vision: false
166
+ return_dict: true
167
+ return_dict_in_generate: false
168
+ sep_token_id: null
169
+ state_dropout_prob: 0.0
170
+ suppress_tokens: null
171
+ task_specific_params: null
172
+ temperature: 1.0
173
+ tf_legacy_loss: false
174
+ tie_encoder_decoder: false
175
+ tie_word_embeddings: true
176
+ tokenizer_class: null
177
+ top_k: 50
178
+ top_p: 1.0
179
+ torch_dtype: null
180
+ torchscript: false
181
+ transformers_version: null
182
+ tune_diffusion_model: true
183
+ tune_llm: false
184
+ tune_projector: true
185
+ tune_visual: false
186
+ typical_p: 1.0
187
+ use_bfloat16: false
188
+ use_relative_action: true
189
+ training: !!python/object:gr00t.configs.training.training_config.TrainingConfig
190
+ add_rl_callback: false
191
+ assert_loss_less_than: null
192
+ batch_size: null
193
+ bf16: true
194
+ dataloader_num_workers: 4
195
+ ddp_bucket_cap_mb: 100
196
+ deepspeed_stage: 2
197
+ enable_open_loop_eval: false
198
+ enable_profiling: false
199
+ eval_batch_size: 2
200
+ eval_bf16: true
201
+ eval_set_split_ratio: 0.1
202
+ eval_steps: 500
203
+ eval_strategy: 'no'
204
+ experiment_name: null
205
+ fp16: false
206
+ global_batch_size: 36
207
+ gradient_accumulation_steps: 1
208
+ gradient_checkpointing: false
209
+ learning_rate: 0.0001
210
+ logging_steps: 10
211
+ lr_scheduler_type: cosine
212
+ max_concurrent_uploads: 2
213
+ max_grad_norm: 1.0
214
+ max_retries: 3
215
+ max_steps: 10000
216
+ num_gpus: 1
217
+ open_loop_eval_plot_indices: null
218
+ open_loop_eval_steps_per_traj: 100
219
+ open_loop_eval_traj_ids:
220
+ - 0
221
+ optim: adamw_torch
222
+ output_dir: /content/so100_finetune
223
+ remove_unused_columns: false
224
+ save_best_eval_metric_greater_is_better: true
225
+ save_best_eval_metric_name: ''
226
+ save_steps: 1000
227
+ save_total_limit: 5
228
+ save_vl_model: false
229
+ start_from_checkpoint: nvidia/GR00T-N1.6-3B
230
+ tf32: true
231
+ transformers_access_token: null
232
+ transformers_cache_dir: null
233
+ transformers_local_files_only: false
234
+ transformers_trust_remote_code: true
235
+ upload_checkpoints: false
236
+ upload_every: 1000
237
+ upload_last_n_checkpoints: 5
238
+ use_ddp: false
239
+ use_wandb: true
240
+ wandb_project: finetune-gr00t-n1d6
241
+ warmup_ratio: 0.05
242
+ warmup_steps: 0
243
+ weight_decay: 1.0e-05
checkpoint-1000/experiment_cfg/dataset_statistics.json ADDED
@@ -0,0 +1,824 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "state": {
4
+ "single_arm": {
5
+ "min": [
6
+ -60.105262756347656,
7
+ -99.31827545166016,
8
+ -43.24324417114258,
9
+ 44.688323974609375,
10
+ -80.68241119384766
11
+ ],
12
+ "max": [
13
+ 34.31578826904297,
14
+ 45.80315399169922,
15
+ 99.62721252441406,
16
+ 97.62950134277344,
17
+ -4.8293962478637695
18
+ ],
19
+ "mean": [
20
+ -3.5325422842834118,
21
+ -39.55833784368612,
22
+ 47.61425589641663,
23
+ 68.3154743394685,
24
+ -46.70357059936142
25
+ ],
26
+ "std": [
27
+ 27.459571708144434,
28
+ 41.074676874488844,
29
+ 34.075122244859095,
30
+ 13.351843152336091,
31
+ 19.483248111140814
32
+ ],
33
+ "q01": [
34
+ -43.89320641903556,
35
+ -99.31827545166016,
36
+ 2.96704117550838,
37
+ 52.128458407208086,
38
+ -64.9047852897644
39
+ ],
40
+ "q99": [
41
+ 23.91888349814585,
42
+ 10.271718478515513,
43
+ 99.48718710059721,
44
+ 91.53274853632244,
45
+ -26.5701696578577
46
+ ]
47
+ },
48
+ "gripper": {
49
+ "min": [
50
+ 0.9803921580314636
51
+ ],
52
+ "max": [
53
+ 35.9943962097168
54
+ ],
55
+ "mean": [
56
+ 20.298597213926165
57
+ ],
58
+ "std": [
59
+ 9.780378321306017
60
+ ],
61
+ "q01": [
62
+ 6.1409783557640525
63
+ ],
64
+ "q99": [
65
+ 29.83243577963609
66
+ ]
67
+ }
68
+ },
69
+ "action": {
70
+ "single_arm": {
71
+ "min": [
72
+ -60.843040466308594,
73
+ -100.0,
74
+ -47.45454406738281,
75
+ 44.675209045410156,
76
+ -81.04986572265625
77
+ ],
78
+ "max": [
79
+ 34.99722671508789,
80
+ 45.694801330566406,
81
+ 100.0,
82
+ 97.96730041503906,
83
+ -4.566929340362549
84
+ ],
85
+ "mean": [
86
+ -3.4051861822555503,
87
+ -40.21667093678776,
88
+ 46.219495035627304,
89
+ 68.11321517887163,
90
+ -46.72253879798044
91
+ ],
92
+ "std": [
93
+ 27.604988373472494,
94
+ 40.826917813818575,
95
+ 34.82453909741334,
96
+ 13.439408862929378,
97
+ 19.518525610689558
98
+ ],
99
+ "q01": [
100
+ -44.29914832538184,
101
+ -99.9431293020161,
102
+ -0.6294070457600729,
103
+ 51.77255040025036,
104
+ -65.3110059408939
105
+ ],
106
+ "q99": [
107
+ 24.517798066878782,
108
+ 10.287138016986633,
109
+ 99.9985524069893,
110
+ 91.79085298874445,
111
+ -26.382156973884847
112
+ ]
113
+ },
114
+ "gripper": {
115
+ "min": [
116
+ 0.25553661584854126
117
+ ],
118
+ "max": [
119
+ 36.28620147705078
120
+ ],
121
+ "mean": [
122
+ 19.60203928355869
123
+ ],
124
+ "std": [
125
+ 10.896771214917207
126
+ ],
127
+ "q01": [
128
+ 2.708348265017052
129
+ ],
130
+ "q99": [
131
+ 30.222431877944597
132
+ ]
133
+ }
134
+ },
135
+ "relative_action": {
136
+ "single_arm": {
137
+ "min": [
138
+ [
139
+ -9.055549621582031,
140
+ -13.95650863647461,
141
+ -14.800384521484375,
142
+ -13.61447525024414,
143
+ -7.874013900756836
144
+ ],
145
+ [
146
+ -11.160816192626953,
147
+ -16.68364715576172,
148
+ -17.25493621826172,
149
+ -16.177448272705078,
150
+ -9.501310348510742
151
+ ],
152
+ [
153
+ -13.476604461669922,
154
+ -19.667449951171875,
155
+ -19.89129638671875,
156
+ -18.372356414794922,
157
+ -11.23359489440918
158
+ ],
159
+ [
160
+ -15.26607894897461,
161
+ -22.73650360107422,
162
+ -22.314407348632812,
163
+ -20.567264556884766,
164
+ -12.755905151367188
165
+ ],
166
+ [
167
+ -16.950286865234375,
168
+ -25.550708770751953,
169
+ -24.457931518554688,
170
+ -22.246448516845703,
171
+ -14.120733261108398
172
+ ],
173
+ [
174
+ -18.84502410888672,
175
+ -28.619762420654297,
176
+ -26.764127731323242,
177
+ -23.572124481201172,
178
+ -15.538057327270508
179
+ ],
180
+ [
181
+ -20.50891876220703,
182
+ -31.262561798095703,
183
+ -29.18724250793457,
184
+ -24.625682830810547,
185
+ -16.79789924621582
186
+ ],
187
+ [
188
+ -21.950958251953125,
189
+ -33.90535354614258,
190
+ -31.703550338745117,
191
+ -25.421085357666016,
192
+ -17.9002628326416
193
+ ],
194
+ [
195
+ -23.351024627685547,
196
+ -36.545902252197266,
197
+ -33.976280212402344,
198
+ -25.947864532470703,
199
+ -18.89763641357422
200
+ ],
201
+ [
202
+ -24.79306411743164,
203
+ -39.18869400024414,
204
+ -36.27018737792969,
205
+ -26.389755249023438,
206
+ -19.737533569335938
207
+ ],
208
+ [
209
+ -26.056222915649414,
210
+ -41.40523910522461,
211
+ -38.54291534423828,
212
+ -26.566513061523438,
213
+ -20.57742691040039
214
+ ],
215
+ [
216
+ -27.2141170501709,
217
+ -43.45127487182617,
218
+ -40.633819580078125,
219
+ -26.654312133789062,
220
+ -21.732280731201172
221
+ ],
222
+ [
223
+ -28.372011184692383,
224
+ -45.497310638427734,
225
+ -42.90654754638672,
226
+ -26.654312133789062,
227
+ -22.834644317626953
228
+ ],
229
+ [
230
+ -29.5299072265625,
231
+ -47.542476654052734,
232
+ -44.9827995300293,
233
+ -26.654312133789062,
234
+ -23.674541473388672
235
+ ],
236
+ [
237
+ -30.687801361083984,
238
+ -49.50242614746094,
239
+ -47.19833755493164,
240
+ -26.654312133789062,
241
+ -24.304462432861328
242
+ ],
243
+ [
244
+ -31.84569549560547,
245
+ -51.29353713989258,
246
+ -49.528255462646484,
247
+ -26.654312133789062,
248
+ -25.406824111938477
249
+ ]
250
+ ],
251
+ "max": [
252
+ [
253
+ 11.800273895263672,
254
+ 13.390182495117188,
255
+ 11.2484130859375,
256
+ 12.953540802001953,
257
+ 8.97637939453125
258
+ ],
259
+ [
260
+ 14.462503433227539,
261
+ 16.288734436035156,
262
+ 13.857921600341797,
263
+ 15.763023376464844,
264
+ 10.813648223876953
265
+ ],
266
+ [
267
+ 16.902881622314453,
268
+ 19.443038940429688,
269
+ 16.2484130859375,
270
+ 18.39691162109375,
271
+ 12.703411102294922
272
+ ],
273
+ [
274
+ 19.40987205505371,
275
+ 22.16992950439453,
276
+ 18.857921600341797,
277
+ 20.783126831054688,
278
+ 14.645668029785156
279
+ ],
280
+ [
281
+ 21.850250244140625,
282
+ 24.812728881835938,
283
+ 21.221553802490234,
284
+ 22.815826416015625,
285
+ 16.482940673828125
286
+ ],
287
+ [
288
+ 24.290626525878906,
289
+ 27.455524444580078,
290
+ 23.551467895507812,
291
+ 24.671768188476562,
292
+ 18.530181884765625
293
+ ],
294
+ [
295
+ 26.50115203857422,
296
+ 30.011974334716797,
297
+ 25.857921600341797,
298
+ 26.515491485595703,
299
+ 20.262466430664062
300
+ ],
301
+ [
302
+ 28.71167755126953,
303
+ 32.31278610229492,
304
+ 28.187835693359375,
305
+ 27.920230865478516,
306
+ 22.099735260009766
307
+ ],
308
+ [
309
+ 31.132732391357422,
310
+ 34.52932357788086,
311
+ 30.585193634033203,
312
+ 29.34360122680664,
313
+ 23.832019805908203
314
+ ],
315
+ [
316
+ 33.24032974243164,
317
+ 36.490108489990234,
318
+ 32.91510772705078,
319
+ 30.757648468017578,
320
+ 25.56430435180664
321
+ ],
322
+ [
323
+ 35.237003326416016,
324
+ 38.621395111083984,
325
+ 35.187835693359375,
326
+ 32.26008224487305,
327
+ 27.296588897705078
328
+ ],
329
+ [
330
+ 37.0264778137207,
331
+ 40.411678314208984,
332
+ 37.27873992919922,
333
+ 33.66482162475586,
334
+ 28.87139129638672
335
+ ],
336
+ [
337
+ 38.815948486328125,
338
+ 42.372459411621094,
339
+ 39.32906723022461,
340
+ 34.99049758911133,
341
+ 30.498687744140625
342
+ ],
343
+ [
344
+ 40.147064208984375,
345
+ 43.99224090576172,
346
+ 41.238162994384766,
347
+ 35.6975212097168,
348
+ 31.9160099029541
349
+ ],
350
+ [
351
+ 41.552791595458984,
352
+ 45.61201858520508,
353
+ 43.13301467895508,
354
+ 36.0510368347168,
355
+ 33.33333206176758
356
+ ],
357
+ [
358
+ 43.0264778137207,
359
+ 47.23110580444336,
360
+ 45.22392654418945,
361
+ 36.402225494384766,
362
+ 34.75065612792969
363
+ ]
364
+ ],
365
+ "mean": [
366
+ [
367
+ 0.11750347912311554,
368
+ -0.6653266549110413,
369
+ -1.4583468437194824,
370
+ -0.2053278237581253,
371
+ -0.01863335259258747
372
+ ],
373
+ [
374
+ 0.12236570566892624,
375
+ -0.6657301187515259,
376
+ -1.4583476781845093,
377
+ -0.20791052281856537,
378
+ -0.02185939997434616
379
+ ],
380
+ [
381
+ 0.12741880118846893,
382
+ -0.6660965085029602,
383
+ -1.4583467245101929,
384
+ -0.21049359440803528,
385
+ -0.025085171684622765
386
+ ],
387
+ [
388
+ 0.13261470198631287,
389
+ -0.6664630174636841,
390
+ -1.4583468437194824,
391
+ -0.21307627856731415,
392
+ -0.02831093594431877
393
+ ],
394
+ [
395
+ 0.13785837590694427,
396
+ -0.6668296456336975,
397
+ -1.4583486318588257,
398
+ -0.21565881371498108,
399
+ -0.03153714910149574
400
+ ],
401
+ [
402
+ 0.14319713413715363,
403
+ -0.6671591401100159,
404
+ -1.4583512544631958,
405
+ -0.21824198961257935,
406
+ -0.03474012389779091
407
+ ],
408
+ [
409
+ 0.14853604137897491,
410
+ -0.6674894094467163,
411
+ -1.458350419998169,
412
+ -0.22082529962062836,
413
+ -0.03794342279434204
414
+ ],
415
+ [
416
+ 0.15382729470729828,
417
+ -0.6678190231323242,
418
+ -1.4583524465560913,
419
+ -0.22340813279151917,
420
+ -0.041146621108055115
421
+ ],
422
+ [
423
+ 0.15907101333141327,
424
+ -0.6681490540504456,
425
+ -1.458353042602539,
426
+ -0.2259913831949234,
427
+ -0.04434990882873535
428
+ ],
429
+ [
430
+ 0.16426703333854675,
431
+ -0.668441653251648,
432
+ -1.458351492881775,
433
+ -0.22857406735420227,
434
+ -0.04755344241857529
435
+ ],
436
+ [
437
+ 0.1694149672985077,
438
+ -0.668734610080719,
439
+ -1.4583535194396973,
440
+ -0.23115694522857666,
441
+ -0.05075618624687195
442
+ ],
443
+ [
444
+ 0.17465919256210327,
445
+ -0.6690278649330139,
446
+ -1.458351731300354,
447
+ -0.2337394654750824,
448
+ -0.05395958200097084
449
+ ],
450
+ [
451
+ 0.17990276217460632,
452
+ -0.6693212985992432,
453
+ -1.4583524465560913,
454
+ -0.23632188141345978,
455
+ -0.05716199427843094
456
+ ],
457
+ [
458
+ 0.18505056202411652,
459
+ -0.6696141958236694,
460
+ -1.4583513736724854,
461
+ -0.2389044612646103,
462
+ -0.060365449637174606
463
+ ],
464
+ [
465
+ 0.1901988983154297,
466
+ -0.6699072122573853,
467
+ -1.4583531618118286,
468
+ -0.24148696660995483,
469
+ -0.06356889009475708
470
+ ],
471
+ [
472
+ 0.19529956579208374,
473
+ -0.6702005863189697,
474
+ -1.4583516120910645,
475
+ -0.24407006800174713,
476
+ -0.0667722150683403
477
+ ]
478
+ ],
479
+ "std": [
480
+ [
481
+ 2.756209135055542,
482
+ 4.048781394958496,
483
+ 4.834221839904785,
484
+ 2.4045844078063965,
485
+ 1.831181526184082
486
+ ],
487
+ [
488
+ 3.375519037246704,
489
+ 4.940067768096924,
490
+ 5.704192638397217,
491
+ 2.936358690261841,
492
+ 2.2514567375183105
493
+ ],
494
+ [
495
+ 3.9847497940063477,
496
+ 5.8236775398254395,
497
+ 6.570866107940674,
498
+ 3.443351984024048,
499
+ 2.663818359375
500
+ ],
501
+ [
502
+ 4.582452774047852,
503
+ 6.696892738342285,
504
+ 7.430098056793213,
505
+ 3.9259438514709473,
506
+ 3.067777633666992
507
+ ],
508
+ [
509
+ 5.167651653289795,
510
+ 7.5579352378845215,
511
+ 8.279448509216309,
512
+ 4.385776996612549,
513
+ 3.462792158126831
514
+ ],
515
+ [
516
+ 5.740141868591309,
517
+ 8.406184196472168,
518
+ 9.11788558959961,
519
+ 4.825246810913086,
520
+ 3.848816156387329
521
+ ],
522
+ [
523
+ 6.2996673583984375,
524
+ 9.240605354309082,
525
+ 9.944042205810547,
526
+ 5.246663570404053,
527
+ 4.225630283355713
528
+ ],
529
+ [
530
+ 6.846652507781982,
531
+ 10.06134033203125,
532
+ 10.75770092010498,
533
+ 5.652442932128906,
534
+ 4.593677043914795
535
+ ],
536
+ [
537
+ 7.3811469078063965,
538
+ 10.86816120147705,
539
+ 11.558343887329102,
540
+ 6.044579982757568,
541
+ 4.9531779289245605
542
+ ],
543
+ [
544
+ 7.903446197509766,
545
+ 11.661355972290039,
546
+ 12.34595012664795,
547
+ 6.424712657928467,
548
+ 5.304342269897461
549
+ ],
550
+ [
551
+ 8.414060592651367,
552
+ 12.44128131866455,
553
+ 13.120593070983887,
554
+ 6.794016361236572,
555
+ 5.647256851196289
556
+ ],
557
+ [
558
+ 8.913477897644043,
559
+ 13.208165168762207,
560
+ 13.88218879699707,
561
+ 7.153115272521973,
562
+ 5.98213529586792
563
+ ],
564
+ [
565
+ 9.402256965637207,
566
+ 13.962782859802246,
567
+ 14.631062507629395,
568
+ 7.5023603439331055,
569
+ 6.309183597564697
570
+ ],
571
+ [
572
+ 9.880831718444824,
573
+ 14.70537281036377,
574
+ 15.367119789123535,
575
+ 7.841666221618652,
576
+ 6.628418445587158
577
+ ],
578
+ [
579
+ 10.349787712097168,
580
+ 15.43662166595459,
581
+ 16.090578079223633,
582
+ 8.171104431152344,
583
+ 6.940072536468506
584
+ ],
585
+ [
586
+ 10.809670448303223,
587
+ 16.15700340270996,
588
+ 16.801616668701172,
589
+ 8.490680694580078,
590
+ 7.244339466094971
591
+ ]
592
+ ],
593
+ "q01": [
594
+ [
595
+ -7.149912223815918,
596
+ -12.172602081298828,
597
+ -13.191639633178712,
598
+ -7.083407516479492,
599
+ -5.301837921142578
600
+ ],
601
+ [
602
+ -8.969949054718018,
603
+ -14.76386520385742,
604
+ -15.673493194580079,
605
+ -8.597449951171875,
606
+ -6.4955373382568355
607
+ ],
608
+ [
609
+ -10.634601535797119,
610
+ -17.415445404052733,
611
+ -18.04468208312988,
612
+ -9.786780090332032,
613
+ -7.755381164550781
614
+ ],
615
+ [
616
+ -12.332097396850585,
617
+ -19.81734550476074,
618
+ -20.49539176940918,
619
+ -10.772034606933593,
620
+ -8.81890007019043
621
+ ],
622
+ [
623
+ -14.054951286315918,
624
+ -22.659150772094726,
625
+ -22.971768417358398,
626
+ -11.810748291015624,
627
+ -10.076640815734862
628
+ ],
629
+ [
630
+ -15.467545299530029,
631
+ -24.854450073242187,
632
+ -25.42631248474121,
633
+ -12.89973159790039,
634
+ -11.391075839996338
635
+ ],
636
+ [
637
+ -16.85467258453369,
638
+ -27.482084159851073,
639
+ -27.82916431427002,
640
+ -13.832246704101562,
641
+ -12.532282905578613
642
+ ],
643
+ [
644
+ -18.258717765808104,
645
+ -29.645261459350586,
646
+ -29.891822814941406,
647
+ -14.627758026123047,
648
+ -13.543306350708008
649
+ ],
650
+ [
651
+ -19.36220642089844,
652
+ -31.862525711059572,
653
+ -32.179530944824215,
654
+ -15.569841918945311,
655
+ -14.78740104675293
656
+ ],
657
+ [
658
+ -20.866409225463865,
659
+ -34.33618782043457,
660
+ -34.25658073425293,
661
+ -16.842924346923827,
662
+ -15.576902465820313
663
+ ],
664
+ [
665
+ -21.95573440551758,
666
+ -36.528687438964845,
667
+ -36.38519187927246,
668
+ -17.423948669433592,
669
+ -16.82309829711914
670
+ ],
671
+ [
672
+ -23.461913070678712,
673
+ -38.73653938293457,
674
+ -38.30317817687988,
675
+ -17.647796630859375,
676
+ -17.742782592773438
677
+ ],
678
+ [
679
+ -24.53435989379883,
680
+ -40.874423828124996,
681
+ -40.44069320678711,
682
+ -18.515239715576172,
683
+ -18.879788017272947
684
+ ],
685
+ [
686
+ -25.933171768188476,
687
+ -43.23724739074707,
688
+ -42.29842636108398,
689
+ -19.954179382324217,
690
+ -19.801573486328124
691
+ ],
692
+ [
693
+ -27.245162963867188,
694
+ -45.308877029418944,
695
+ -43.92444366455078,
696
+ -20.61133575439453,
697
+ -20.9133874130249
698
+ ],
699
+ [
700
+ -28.540471458435057,
701
+ -47.20507698059082,
702
+ -45.95519866943359,
703
+ -20.804307861328123,
704
+ -21.69133777618408
705
+ ]
706
+ ],
707
+ "q99": [
708
+ [
709
+ 8.69469722747799,
710
+ 10.299373474121074,
711
+ 9.785606613159175,
712
+ 9.179881286621086,
713
+ 6.271916503906227
714
+ ],
715
+ [
716
+ 10.615129890441846,
717
+ 12.666508712768518,
718
+ 12.091986007690421,
719
+ 11.210918426513642,
720
+ 7.833070821762051
721
+ ],
722
+ [
723
+ 12.4686942863464,
724
+ 14.899181365966779,
725
+ 14.446693725585934,
726
+ 13.182954406738268,
727
+ 9.380576934814396
728
+ ],
729
+ [
730
+ 14.271173782348615,
731
+ 17.33157432556147,
732
+ 16.80153594970703,
733
+ 15.230424041748023,
734
+ 10.432546234130848
735
+ ],
736
+ [
737
+ 16.08265884399413,
738
+ 19.270514526367187,
739
+ 19.092461013793933,
740
+ 17.081269989013663,
741
+ 12.112335987091052
742
+ ],
743
+ [
744
+ 18.145391998291,
745
+ 21.44050186157223,
746
+ 21.280475997924803,
747
+ 18.68309165954589,
748
+ 13.620999450683572
749
+ ],
750
+ [
751
+ 19.996333007812442,
752
+ 23.52975692749016,
753
+ 23.596975326538082,
754
+ 20.103778152465818,
755
+ 14.602626724243084
756
+ ],
757
+ [
758
+ 21.60820228576658,
759
+ 25.1919183349609,
760
+ 25.76568862915039,
761
+ 21.541843643188393,
762
+ 16.10183769226073
763
+ ],
764
+ [
765
+ 23.362480049133293,
766
+ 27.08537643432608,
767
+ 28.15810386657714,
768
+ 22.8827821350097,
769
+ 17.433070678710767
770
+ ],
771
+ [
772
+ 25.225577430725092,
773
+ 28.52370849609371,
774
+ 30.372146911621076,
775
+ 23.82381240844726,
776
+ 18.712861175537086
777
+ ],
778
+ [
779
+ 26.625544967651358,
780
+ 30.16027656555174,
781
+ 32.49975234985349,
782
+ 24.50854393005371,
783
+ 19.443568878173714
784
+ ],
785
+ [
786
+ 28.116129837036127,
787
+ 31.295492095947264,
788
+ 34.53191467285156,
789
+ 25.490360641479374,
790
+ 20.561679687499943
791
+ ],
792
+ [
793
+ 29.457272415161103,
794
+ 32.63257324218748,
795
+ 36.42262039184569,
796
+ 26.227766571044913,
797
+ 21.088712310791003
798
+ ],
799
+ [
800
+ 30.8263483428955,
801
+ 33.570856246948225,
802
+ 38.26254928588863,
803
+ 26.711691284179675,
804
+ 22.396849746704
805
+ ],
806
+ [
807
+ 32.008242797851516,
808
+ 34.67565246582029,
809
+ 40.05519142150879,
810
+ 27.399809188842735,
811
+ 22.793699951171842
812
+ ],
813
+ [
814
+ 33.28292778015133,
815
+ 35.806464462280275,
816
+ 41.85474884033201,
817
+ 27.812029418945265,
818
+ 23.47191642761218
819
+ ]
820
+ ]
821
+ }
822
+ }
823
+ }
824
+ }
checkpoint-1000/experiment_cfg/final_model_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "Gr00tN1d6",
3
+ "model_dtype": "bfloat16",
4
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
5
+ "backbone_model_type": "eagle",
6
+ "model_revision": null,
7
+ "tune_top_llm_layers": 4,
8
+ "backbone_embedding_dim": 2048,
9
+ "tune_llm": false,
10
+ "tune_visual": false,
11
+ "select_layer": 16,
12
+ "reproject_vision": false,
13
+ "use_flash_attention": true,
14
+ "load_bf16": true,
15
+ "collator_overwrite_image_inputs": false,
16
+ "eagle_collator": true,
17
+ "backbone_trainable_params_fp32": true,
18
+ "apply_sincos_state_encoding": true,
19
+ "use_relative_action": true,
20
+ "max_state_dim": 128,
21
+ "max_action_dim": 128,
22
+ "action_horizon": 50,
23
+ "hidden_size": 1024,
24
+ "input_embedding_dim": 1536,
25
+ "add_pos_embed": true,
26
+ "attn_dropout": 0.2,
27
+ "use_vlln": true,
28
+ "max_seq_len": 1024,
29
+ "use_alternate_vl_dit": true,
30
+ "attend_text_every_n_blocks": 2,
31
+ "diffusion_model_cfg": {
32
+ "attention_head_dim": 48,
33
+ "dropout": 0.2,
34
+ "final_dropout": true,
35
+ "interleave_self_attention": true,
36
+ "norm_type": "ada_norm",
37
+ "num_attention_heads": 32,
38
+ "num_layers": 32,
39
+ "output_dim": 1024,
40
+ "positional_embeddings": null
41
+ },
42
+ "num_inference_timesteps": 4,
43
+ "noise_beta_alpha": 1.5,
44
+ "noise_beta_beta": 1.0,
45
+ "noise_s": 0.999,
46
+ "num_timestep_buckets": 1000,
47
+ "tune_projector": true,
48
+ "tune_diffusion_model": true,
49
+ "tune_vlln": true,
50
+ "state_dropout_prob": 0.0,
51
+ "state_additive_noise_scale": 0.0,
52
+ "max_num_embodiments": 32
53
+ }
checkpoint-1000/experiment_cfg/final_processor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dc2fbc659733405270834529d9192d68993cb85b7997499c872ce83696ea0d3
3
+ size 4990120184
checkpoint-1000/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d23cc8c9d0cca6f623ac055e00eaeff1b050c2af1d5366cf478fd2e2c411bdf
3
+ size 4823190320
checkpoint-1000/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d37f434807d08f03597bf76c2277fc55bcc5109261b04921191f1cc8c0311ed
3
+ size 12960193762
checkpoint-1000/processor_config.json ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processor_class": "Gr00tN1d6Processor",
3
+ "processor_kwargs": {
4
+ "modality_configs": {
5
+ "behavior_r1_pro": {
6
+ "video": {
7
+ "delta_indices": [
8
+ 0
9
+ ],
10
+ "modality_keys": [
11
+ "observation.images.rgb.head_256_256",
12
+ "observation.images.rgb.left_wrist_256_256",
13
+ "observation.images.rgb.right_wrist_256_256"
14
+ ],
15
+ "sin_cos_embedding_keys": null,
16
+ "mean_std_embedding_keys": null,
17
+ "action_configs": null
18
+ },
19
+ "state": {
20
+ "delta_indices": [
21
+ 0
22
+ ],
23
+ "modality_keys": [
24
+ "robot_pos",
25
+ "robot_ori_cos",
26
+ "robot_ori_sin",
27
+ "robot_2d_ori",
28
+ "robot_2d_ori_cos",
29
+ "robot_2d_ori_sin",
30
+ "robot_lin_vel",
31
+ "robot_ang_vel",
32
+ "arm_left_qpos",
33
+ "arm_left_qpos_sin",
34
+ "arm_left_qpos_cos",
35
+ "eef_left_pos",
36
+ "eef_left_quat",
37
+ "gripper_left_qpos",
38
+ "arm_right_qpos",
39
+ "arm_right_qpos_sin",
40
+ "arm_right_qpos_cos",
41
+ "eef_right_pos",
42
+ "eef_right_quat",
43
+ "gripper_right_qpos",
44
+ "trunk_qpos"
45
+ ],
46
+ "sin_cos_embedding_keys": null,
47
+ "mean_std_embedding_keys": null,
48
+ "action_configs": null
49
+ },
50
+ "action": {
51
+ "delta_indices": [
52
+ 0,
53
+ 1,
54
+ 2,
55
+ 3,
56
+ 4,
57
+ 5,
58
+ 6,
59
+ 7,
60
+ 8,
61
+ 9,
62
+ 10,
63
+ 11,
64
+ 12,
65
+ 13,
66
+ 14,
67
+ 15,
68
+ 16,
69
+ 17,
70
+ 18,
71
+ 19,
72
+ 20,
73
+ 21,
74
+ 22,
75
+ 23,
76
+ 24,
77
+ 25,
78
+ 26,
79
+ 27,
80
+ 28,
81
+ 29,
82
+ 30,
83
+ 31
84
+ ],
85
+ "modality_keys": [
86
+ "base",
87
+ "torso",
88
+ "left_arm",
89
+ "left_gripper",
90
+ "right_arm",
91
+ "right_gripper"
92
+ ],
93
+ "sin_cos_embedding_keys": null,
94
+ "mean_std_embedding_keys": null,
95
+ "action_configs": [
96
+ {
97
+ "rep": "ABSOLUTE",
98
+ "type": "NON_EEF",
99
+ "format": "DEFAULT",
100
+ "state_key": null
101
+ },
102
+ {
103
+ "rep": "RELATIVE",
104
+ "type": "NON_EEF",
105
+ "format": "DEFAULT",
106
+ "state_key": "trunk_qpos"
107
+ },
108
+ {
109
+ "rep": "RELATIVE",
110
+ "type": "NON_EEF",
111
+ "format": "DEFAULT",
112
+ "state_key": "arm_left_qpos"
113
+ },
114
+ {
115
+ "rep": "ABSOLUTE",
116
+ "type": "NON_EEF",
117
+ "format": "DEFAULT",
118
+ "state_key": null
119
+ },
120
+ {
121
+ "rep": "RELATIVE",
122
+ "type": "NON_EEF",
123
+ "format": "DEFAULT",
124
+ "state_key": "arm_right_qpos"
125
+ },
126
+ {
127
+ "rep": "ABSOLUTE",
128
+ "type": "NON_EEF",
129
+ "format": "DEFAULT",
130
+ "state_key": null
131
+ }
132
+ ]
133
+ },
134
+ "language": {
135
+ "delta_indices": [
136
+ 0
137
+ ],
138
+ "modality_keys": [
139
+ "annotation.human.coarse_action"
140
+ ],
141
+ "sin_cos_embedding_keys": null,
142
+ "mean_std_embedding_keys": null,
143
+ "action_configs": null
144
+ }
145
+ },
146
+ "gr1": {
147
+ "video": {
148
+ "delta_indices": [
149
+ 0
150
+ ],
151
+ "modality_keys": [
152
+ "ego_view_bg_crop_pad_res256_freq20"
153
+ ],
154
+ "sin_cos_embedding_keys": null,
155
+ "mean_std_embedding_keys": null,
156
+ "action_configs": null
157
+ },
158
+ "state": {
159
+ "delta_indices": [
160
+ 0
161
+ ],
162
+ "modality_keys": [
163
+ "left_arm",
164
+ "right_arm",
165
+ "left_hand",
166
+ "right_hand",
167
+ "waist"
168
+ ],
169
+ "sin_cos_embedding_keys": [
170
+ "left_arm",
171
+ "right_arm",
172
+ "left_hand",
173
+ "right_hand",
174
+ "waist"
175
+ ],
176
+ "mean_std_embedding_keys": null,
177
+ "action_configs": null
178
+ },
179
+ "action": {
180
+ "delta_indices": [
181
+ 0,
182
+ 1,
183
+ 2,
184
+ 3,
185
+ 4,
186
+ 5,
187
+ 6,
188
+ 7,
189
+ 8,
190
+ 9,
191
+ 10,
192
+ 11,
193
+ 12,
194
+ 13,
195
+ 14,
196
+ 15
197
+ ],
198
+ "modality_keys": [
199
+ "left_arm",
200
+ "right_arm",
201
+ "left_hand",
202
+ "right_hand",
203
+ "waist"
204
+ ],
205
+ "sin_cos_embedding_keys": null,
206
+ "mean_std_embedding_keys": null,
207
+ "action_configs": [
208
+ {
209
+ "rep": "RELATIVE",
210
+ "type": "NON_EEF",
211
+ "format": "DEFAULT",
212
+ "state_key": null
213
+ },
214
+ {
215
+ "rep": "RELATIVE",
216
+ "type": "NON_EEF",
217
+ "format": "DEFAULT",
218
+ "state_key": null
219
+ },
220
+ {
221
+ "rep": "RELATIVE",
222
+ "type": "NON_EEF",
223
+ "format": "DEFAULT",
224
+ "state_key": null
225
+ },
226
+ {
227
+ "rep": "RELATIVE",
228
+ "type": "NON_EEF",
229
+ "format": "DEFAULT",
230
+ "state_key": null
231
+ },
232
+ {
233
+ "rep": "ABSOLUTE",
234
+ "type": "NON_EEF",
235
+ "format": "DEFAULT",
236
+ "state_key": null
237
+ }
238
+ ]
239
+ },
240
+ "language": {
241
+ "delta_indices": [
242
+ 0
243
+ ],
244
+ "modality_keys": [
245
+ "task"
246
+ ],
247
+ "sin_cos_embedding_keys": null,
248
+ "mean_std_embedding_keys": null,
249
+ "action_configs": null
250
+ }
251
+ },
252
+ "robocasa_panda_omron": {
253
+ "video": {
254
+ "delta_indices": [
255
+ 0
256
+ ],
257
+ "modality_keys": [
258
+ "res256_image_side_0",
259
+ "res256_image_side_1",
260
+ "res256_image_wrist_0"
261
+ ],
262
+ "sin_cos_embedding_keys": null,
263
+ "mean_std_embedding_keys": null,
264
+ "action_configs": null
265
+ },
266
+ "state": {
267
+ "delta_indices": [
268
+ 0
269
+ ],
270
+ "modality_keys": [
271
+ "end_effector_position_relative",
272
+ "end_effector_rotation_relative",
273
+ "gripper_qpos",
274
+ "base_position",
275
+ "base_rotation"
276
+ ],
277
+ "sin_cos_embedding_keys": null,
278
+ "mean_std_embedding_keys": null,
279
+ "action_configs": null
280
+ },
281
+ "action": {
282
+ "delta_indices": [
283
+ 0,
284
+ 1,
285
+ 2,
286
+ 3,
287
+ 4,
288
+ 5,
289
+ 6,
290
+ 7,
291
+ 8,
292
+ 9,
293
+ 10,
294
+ 11,
295
+ 12,
296
+ 13,
297
+ 14,
298
+ 15
299
+ ],
300
+ "modality_keys": [
301
+ "end_effector_position",
302
+ "end_effector_rotation",
303
+ "gripper_close",
304
+ "base_motion",
305
+ "control_mode"
306
+ ],
307
+ "sin_cos_embedding_keys": null,
308
+ "mean_std_embedding_keys": null,
309
+ "action_configs": [
310
+ {
311
+ "rep": "ABSOLUTE",
312
+ "type": "NON_EEF",
313
+ "format": "DEFAULT",
314
+ "state_key": null
315
+ },
316
+ {
317
+ "rep": "ABSOLUTE",
318
+ "type": "NON_EEF",
319
+ "format": "DEFAULT",
320
+ "state_key": null
321
+ },
322
+ {
323
+ "rep": "ABSOLUTE",
324
+ "type": "NON_EEF",
325
+ "format": "DEFAULT",
326
+ "state_key": null
327
+ },
328
+ {
329
+ "rep": "ABSOLUTE",
330
+ "type": "NON_EEF",
331
+ "format": "DEFAULT",
332
+ "state_key": null
333
+ },
334
+ {
335
+ "rep": "ABSOLUTE",
336
+ "type": "NON_EEF",
337
+ "format": "DEFAULT",
338
+ "state_key": null
339
+ }
340
+ ]
341
+ },
342
+ "language": {
343
+ "delta_indices": [
344
+ 0
345
+ ],
346
+ "modality_keys": [
347
+ "annotation.human.action.task_description"
348
+ ],
349
+ "sin_cos_embedding_keys": null,
350
+ "mean_std_embedding_keys": null,
351
+ "action_configs": null
352
+ }
353
+ },
354
+ "new_embodiment": {
355
+ "video": {
356
+ "delta_indices": [
357
+ 0
358
+ ],
359
+ "modality_keys": [
360
+ "front",
361
+ "wrist"
362
+ ],
363
+ "sin_cos_embedding_keys": null,
364
+ "mean_std_embedding_keys": null,
365
+ "action_configs": null
366
+ },
367
+ "state": {
368
+ "delta_indices": [
369
+ 0
370
+ ],
371
+ "modality_keys": [
372
+ "single_arm",
373
+ "gripper"
374
+ ],
375
+ "sin_cos_embedding_keys": null,
376
+ "mean_std_embedding_keys": null,
377
+ "action_configs": null
378
+ },
379
+ "action": {
380
+ "delta_indices": [
381
+ 0,
382
+ 1,
383
+ 2,
384
+ 3,
385
+ 4,
386
+ 5,
387
+ 6,
388
+ 7,
389
+ 8,
390
+ 9,
391
+ 10,
392
+ 11,
393
+ 12,
394
+ 13,
395
+ 14,
396
+ 15
397
+ ],
398
+ "modality_keys": [
399
+ "single_arm",
400
+ "gripper"
401
+ ],
402
+ "sin_cos_embedding_keys": null,
403
+ "mean_std_embedding_keys": null,
404
+ "action_configs": [
405
+ {
406
+ "rep": "RELATIVE",
407
+ "type": "NON_EEF",
408
+ "format": "DEFAULT",
409
+ "state_key": null
410
+ },
411
+ {
412
+ "rep": "ABSOLUTE",
413
+ "type": "NON_EEF",
414
+ "format": "DEFAULT",
415
+ "state_key": null
416
+ }
417
+ ]
418
+ },
419
+ "language": {
420
+ "delta_indices": [
421
+ 0
422
+ ],
423
+ "modality_keys": [
424
+ "annotation.human.task_description"
425
+ ],
426
+ "sin_cos_embedding_keys": null,
427
+ "mean_std_embedding_keys": null,
428
+ "action_configs": null
429
+ }
430
+ }
431
+ },
432
+ "image_crop_size": null,
433
+ "image_target_size": null,
434
+ "use_albumentations": true,
435
+ "random_rotation_angle": null,
436
+ "color_jitter_params": {
437
+ "brightness": 0.3,
438
+ "contrast": 0.4,
439
+ "saturation": 0.5,
440
+ "hue": 0.08
441
+ },
442
+ "shortest_image_edge": 256,
443
+ "crop_fraction": 0.95,
444
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
445
+ "model_type": "eagle",
446
+ "formalize_language": true,
447
+ "max_state_dim": 128,
448
+ "max_action_dim": 128,
449
+ "max_action_horizon": 50,
450
+ "use_percentiles": false,
451
+ "clip_outliers": true,
452
+ "apply_sincos_state_encoding": true,
453
+ "use_relative_action": true
454
+ }
455
+ }
checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19c8cd95b87e2c74d2b8eeb87e84e4bdde690b9a799e7bfed5ac657fbe0263a2
3
+ size 14645
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed5dcdc121951b51745034e467890f8a7b69b505c12b1a668d378806746fee59
3
+ size 1465
checkpoint-1000/statistics.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,634 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.1,
6
+ "eval_steps": 500,
7
+ "global_step": 1000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "grad_norm": 2.416853189468384,
14
+ "learning_rate": 1.8e-06,
15
+ "loss": 1.1685,
16
+ "step": 10
17
+ },
18
+ {
19
+ "grad_norm": 1.0608258247375488,
20
+ "learning_rate": 3.8e-06,
21
+ "loss": 1.1645,
22
+ "step": 20
23
+ },
24
+ {
25
+ "grad_norm": 0.6068993210792542,
26
+ "learning_rate": 5.8e-06,
27
+ "loss": 1.1434,
28
+ "step": 30
29
+ },
30
+ {
31
+ "grad_norm": 0.4165553152561188,
32
+ "learning_rate": 7.8e-06,
33
+ "loss": 1.1309,
34
+ "step": 40
35
+ },
36
+ {
37
+ "grad_norm": 0.5539636015892029,
38
+ "learning_rate": 9.800000000000001e-06,
39
+ "loss": 1.1056,
40
+ "step": 50
41
+ },
42
+ {
43
+ "grad_norm": 0.48063579201698303,
44
+ "learning_rate": 1.18e-05,
45
+ "loss": 1.1079,
46
+ "step": 60
47
+ },
48
+ {
49
+ "grad_norm": 0.4390954077243805,
50
+ "learning_rate": 1.3800000000000002e-05,
51
+ "loss": 1.0909,
52
+ "step": 70
53
+ },
54
+ {
55
+ "grad_norm": 0.39306357502937317,
56
+ "learning_rate": 1.58e-05,
57
+ "loss": 1.0837,
58
+ "step": 80
59
+ },
60
+ {
61
+ "grad_norm": 0.5071999430656433,
62
+ "learning_rate": 1.78e-05,
63
+ "loss": 1.0625,
64
+ "step": 90
65
+ },
66
+ {
67
+ "grad_norm": 0.38793864846229553,
68
+ "learning_rate": 1.9800000000000004e-05,
69
+ "loss": 1.0615,
70
+ "step": 100
71
+ },
72
+ {
73
+ "grad_norm": 0.6121100187301636,
74
+ "learning_rate": 2.18e-05,
75
+ "loss": 1.0406,
76
+ "step": 110
77
+ },
78
+ {
79
+ "grad_norm": 0.6436424851417542,
80
+ "learning_rate": 2.38e-05,
81
+ "loss": 1.0538,
82
+ "step": 120
83
+ },
84
+ {
85
+ "grad_norm": 3.180732011795044,
86
+ "learning_rate": 2.58e-05,
87
+ "loss": 0.9922,
88
+ "step": 130
89
+ },
90
+ {
91
+ "grad_norm": 0.8403401374816895,
92
+ "learning_rate": 2.7800000000000005e-05,
93
+ "loss": 0.9564,
94
+ "step": 140
95
+ },
96
+ {
97
+ "grad_norm": 1.5241442918777466,
98
+ "learning_rate": 2.98e-05,
99
+ "loss": 0.8884,
100
+ "step": 150
101
+ },
102
+ {
103
+ "grad_norm": 1.140098214149475,
104
+ "learning_rate": 3.18e-05,
105
+ "loss": 0.8313,
106
+ "step": 160
107
+ },
108
+ {
109
+ "grad_norm": 1.406286597251892,
110
+ "learning_rate": 3.38e-05,
111
+ "loss": 0.7616,
112
+ "step": 170
113
+ },
114
+ {
115
+ "grad_norm": 2.2860851287841797,
116
+ "learning_rate": 3.58e-05,
117
+ "loss": 0.6981,
118
+ "step": 180
119
+ },
120
+ {
121
+ "grad_norm": 2.2754695415496826,
122
+ "learning_rate": 3.7800000000000004e-05,
123
+ "loss": 0.6285,
124
+ "step": 190
125
+ },
126
+ {
127
+ "grad_norm": 2.531214714050293,
128
+ "learning_rate": 3.9800000000000005e-05,
129
+ "loss": 0.4895,
130
+ "step": 200
131
+ },
132
+ {
133
+ "grad_norm": 2.7056477069854736,
134
+ "learning_rate": 4.18e-05,
135
+ "loss": 0.3478,
136
+ "step": 210
137
+ },
138
+ {
139
+ "grad_norm": 1.7513630390167236,
140
+ "learning_rate": 4.38e-05,
141
+ "loss": 0.2509,
142
+ "step": 220
143
+ },
144
+ {
145
+ "grad_norm": 1.6927777528762817,
146
+ "learning_rate": 4.58e-05,
147
+ "loss": 0.1911,
148
+ "step": 230
149
+ },
150
+ {
151
+ "grad_norm": 2.264531135559082,
152
+ "learning_rate": 4.78e-05,
153
+ "loss": 0.2006,
154
+ "step": 240
155
+ },
156
+ {
157
+ "grad_norm": 1.3290256261825562,
158
+ "learning_rate": 4.9800000000000004e-05,
159
+ "loss": 0.1654,
160
+ "step": 250
161
+ },
162
+ {
163
+ "grad_norm": 1.5439292192459106,
164
+ "learning_rate": 5.1800000000000005e-05,
165
+ "loss": 0.172,
166
+ "step": 260
167
+ },
168
+ {
169
+ "grad_norm": 1.5491207838058472,
170
+ "learning_rate": 5.380000000000001e-05,
171
+ "loss": 0.1494,
172
+ "step": 270
173
+ },
174
+ {
175
+ "grad_norm": 1.1139971017837524,
176
+ "learning_rate": 5.580000000000001e-05,
177
+ "loss": 0.143,
178
+ "step": 280
179
+ },
180
+ {
181
+ "grad_norm": 1.2239474058151245,
182
+ "learning_rate": 5.7799999999999995e-05,
183
+ "loss": 0.1281,
184
+ "step": 290
185
+ },
186
+ {
187
+ "grad_norm": 1.093898057937622,
188
+ "learning_rate": 5.9800000000000003e-05,
189
+ "loss": 0.1234,
190
+ "step": 300
191
+ },
192
+ {
193
+ "grad_norm": 1.1576344966888428,
194
+ "learning_rate": 6.18e-05,
195
+ "loss": 0.1147,
196
+ "step": 310
197
+ },
198
+ {
199
+ "grad_norm": 1.2395023107528687,
200
+ "learning_rate": 6.38e-05,
201
+ "loss": 0.1143,
202
+ "step": 320
203
+ },
204
+ {
205
+ "grad_norm": 1.2169476747512817,
206
+ "learning_rate": 6.58e-05,
207
+ "loss": 0.1144,
208
+ "step": 330
209
+ },
210
+ {
211
+ "grad_norm": 0.9942442774772644,
212
+ "learning_rate": 6.780000000000001e-05,
213
+ "loss": 0.1152,
214
+ "step": 340
215
+ },
216
+ {
217
+ "grad_norm": 1.0080304145812988,
218
+ "learning_rate": 6.98e-05,
219
+ "loss": 0.1081,
220
+ "step": 350
221
+ },
222
+ {
223
+ "grad_norm": 1.1731500625610352,
224
+ "learning_rate": 7.18e-05,
225
+ "loss": 0.1146,
226
+ "step": 360
227
+ },
228
+ {
229
+ "grad_norm": 0.8233430981636047,
230
+ "learning_rate": 7.38e-05,
231
+ "loss": 0.1135,
232
+ "step": 370
233
+ },
234
+ {
235
+ "grad_norm": 1.0143144130706787,
236
+ "learning_rate": 7.58e-05,
237
+ "loss": 0.0979,
238
+ "step": 380
239
+ },
240
+ {
241
+ "grad_norm": 0.8134552240371704,
242
+ "learning_rate": 7.780000000000001e-05,
243
+ "loss": 0.1075,
244
+ "step": 390
245
+ },
246
+ {
247
+ "grad_norm": 0.736839234828949,
248
+ "learning_rate": 7.98e-05,
249
+ "loss": 0.1023,
250
+ "step": 400
251
+ },
252
+ {
253
+ "grad_norm": 0.8744317889213562,
254
+ "learning_rate": 8.18e-05,
255
+ "loss": 0.1093,
256
+ "step": 410
257
+ },
258
+ {
259
+ "grad_norm": 0.7313347458839417,
260
+ "learning_rate": 8.38e-05,
261
+ "loss": 0.1022,
262
+ "step": 420
263
+ },
264
+ {
265
+ "grad_norm": 0.8588263988494873,
266
+ "learning_rate": 8.58e-05,
267
+ "loss": 0.0957,
268
+ "step": 430
269
+ },
270
+ {
271
+ "grad_norm": 0.8712506890296936,
272
+ "learning_rate": 8.78e-05,
273
+ "loss": 0.0986,
274
+ "step": 440
275
+ },
276
+ {
277
+ "grad_norm": 0.5078502893447876,
278
+ "learning_rate": 8.98e-05,
279
+ "loss": 0.0941,
280
+ "step": 450
281
+ },
282
+ {
283
+ "grad_norm": 0.8700484037399292,
284
+ "learning_rate": 9.180000000000001e-05,
285
+ "loss": 0.0968,
286
+ "step": 460
287
+ },
288
+ {
289
+ "grad_norm": 0.8209111094474792,
290
+ "learning_rate": 9.38e-05,
291
+ "loss": 0.1025,
292
+ "step": 470
293
+ },
294
+ {
295
+ "grad_norm": 0.7114784717559814,
296
+ "learning_rate": 9.58e-05,
297
+ "loss": 0.0907,
298
+ "step": 480
299
+ },
300
+ {
301
+ "grad_norm": 0.8576576709747314,
302
+ "learning_rate": 9.78e-05,
303
+ "loss": 0.085,
304
+ "step": 490
305
+ },
306
+ {
307
+ "grad_norm": 0.7132489085197449,
308
+ "learning_rate": 9.98e-05,
309
+ "loss": 0.0826,
310
+ "step": 500
311
+ },
312
+ {
313
+ "grad_norm": 0.5286405682563782,
314
+ "learning_rate": 9.9999778549206e-05,
315
+ "loss": 0.0773,
316
+ "step": 510
317
+ },
318
+ {
319
+ "grad_norm": 0.6641912460327148,
320
+ "learning_rate": 9.999901304280685e-05,
321
+ "loss": 0.0832,
322
+ "step": 520
323
+ },
324
+ {
325
+ "grad_norm": 0.6113771200180054,
326
+ "learning_rate": 9.999770075521164e-05,
327
+ "loss": 0.0818,
328
+ "step": 530
329
+ },
330
+ {
331
+ "grad_norm": 0.6849881410598755,
332
+ "learning_rate": 9.99958417007713e-05,
333
+ "loss": 0.0844,
334
+ "step": 540
335
+ },
336
+ {
337
+ "grad_norm": 0.510468602180481,
338
+ "learning_rate": 9.999343589981615e-05,
339
+ "loss": 0.0712,
340
+ "step": 550
341
+ },
342
+ {
343
+ "grad_norm": 0.4960484802722931,
344
+ "learning_rate": 9.999048337865568e-05,
345
+ "loss": 0.0677,
346
+ "step": 560
347
+ },
348
+ {
349
+ "grad_norm": 0.6222489476203918,
350
+ "learning_rate": 9.998698416957815e-05,
351
+ "loss": 0.0677,
352
+ "step": 570
353
+ },
354
+ {
355
+ "grad_norm": 0.6377440690994263,
356
+ "learning_rate": 9.998293831085037e-05,
357
+ "loss": 0.0757,
358
+ "step": 580
359
+ },
360
+ {
361
+ "grad_norm": 0.6644724011421204,
362
+ "learning_rate": 9.997834584671719e-05,
363
+ "loss": 0.0721,
364
+ "step": 590
365
+ },
366
+ {
367
+ "grad_norm": 0.42810145020484924,
368
+ "learning_rate": 9.997320682740107e-05,
369
+ "loss": 0.0741,
370
+ "step": 600
371
+ },
372
+ {
373
+ "grad_norm": 0.47897064685821533,
374
+ "learning_rate": 9.996752130910149e-05,
375
+ "loss": 0.0723,
376
+ "step": 610
377
+ },
378
+ {
379
+ "grad_norm": 0.512750506401062,
380
+ "learning_rate": 9.99612893539944e-05,
381
+ "loss": 0.0773,
382
+ "step": 620
383
+ },
384
+ {
385
+ "grad_norm": 0.5588595867156982,
386
+ "learning_rate": 9.995451103023144e-05,
387
+ "loss": 0.0772,
388
+ "step": 630
389
+ },
390
+ {
391
+ "grad_norm": 0.6482793092727661,
392
+ "learning_rate": 9.994718641193928e-05,
393
+ "loss": 0.071,
394
+ "step": 640
395
+ },
396
+ {
397
+ "grad_norm": 0.4272823631763458,
398
+ "learning_rate": 9.993931557921874e-05,
399
+ "loss": 0.0695,
400
+ "step": 650
401
+ },
402
+ {
403
+ "grad_norm": 0.5539937019348145,
404
+ "learning_rate": 9.993089861814402e-05,
405
+ "loss": 0.0548,
406
+ "step": 660
407
+ },
408
+ {
409
+ "grad_norm": 0.5200349688529968,
410
+ "learning_rate": 9.992193562076166e-05,
411
+ "loss": 0.0649,
412
+ "step": 670
413
+ },
414
+ {
415
+ "grad_norm": 0.35027024149894714,
416
+ "learning_rate": 9.991242668508954e-05,
417
+ "loss": 0.0599,
418
+ "step": 680
419
+ },
420
+ {
421
+ "grad_norm": 0.4623376429080963,
422
+ "learning_rate": 9.990237191511587e-05,
423
+ "loss": 0.0593,
424
+ "step": 690
425
+ },
426
+ {
427
+ "grad_norm": 0.42851006984710693,
428
+ "learning_rate": 9.989177142079802e-05,
429
+ "loss": 0.0518,
430
+ "step": 700
431
+ },
432
+ {
433
+ "grad_norm": 0.5397882461547852,
434
+ "learning_rate": 9.988062531806126e-05,
435
+ "loss": 0.0614,
436
+ "step": 710
437
+ },
438
+ {
439
+ "grad_norm": 0.298032283782959,
440
+ "learning_rate": 9.986893372879762e-05,
441
+ "loss": 0.0663,
442
+ "step": 720
443
+ },
444
+ {
445
+ "grad_norm": 0.6170604228973389,
446
+ "learning_rate": 9.985669678086443e-05,
447
+ "loss": 0.0594,
448
+ "step": 730
449
+ },
450
+ {
451
+ "grad_norm": 0.49378132820129395,
452
+ "learning_rate": 9.984391460808298e-05,
453
+ "loss": 0.0707,
454
+ "step": 740
455
+ },
456
+ {
457
+ "grad_norm": 0.5525211095809937,
458
+ "learning_rate": 9.983058735023709e-05,
459
+ "loss": 0.0643,
460
+ "step": 750
461
+ },
462
+ {
463
+ "grad_norm": 0.4275728166103363,
464
+ "learning_rate": 9.98167151530715e-05,
465
+ "loss": 0.0632,
466
+ "step": 760
467
+ },
468
+ {
469
+ "grad_norm": 0.3775525391101837,
470
+ "learning_rate": 9.980229816829034e-05,
471
+ "loss": 0.0595,
472
+ "step": 770
473
+ },
474
+ {
475
+ "grad_norm": 0.42739713191986084,
476
+ "learning_rate": 9.978733655355544e-05,
477
+ "loss": 0.0568,
478
+ "step": 780
479
+ },
480
+ {
481
+ "grad_norm": 0.4654758870601654,
482
+ "learning_rate": 9.977183047248464e-05,
483
+ "loss": 0.0516,
484
+ "step": 790
485
+ },
486
+ {
487
+ "grad_norm": 0.4031694233417511,
488
+ "learning_rate": 9.975578009464992e-05,
489
+ "loss": 0.0581,
490
+ "step": 800
491
+ },
492
+ {
493
+ "grad_norm": 0.34200575947761536,
494
+ "learning_rate": 9.97391855955757e-05,
495
+ "loss": 0.0457,
496
+ "step": 810
497
+ },
498
+ {
499
+ "grad_norm": 0.3750764727592468,
500
+ "learning_rate": 9.972204715673669e-05,
501
+ "loss": 0.0509,
502
+ "step": 820
503
+ },
504
+ {
505
+ "grad_norm": 0.5093910694122314,
506
+ "learning_rate": 9.970436496555617e-05,
507
+ "loss": 0.0489,
508
+ "step": 830
509
+ },
510
+ {
511
+ "grad_norm": 0.6330306529998779,
512
+ "learning_rate": 9.968613921540373e-05,
513
+ "loss": 0.0531,
514
+ "step": 840
515
+ },
516
+ {
517
+ "grad_norm": 0.44549983739852905,
518
+ "learning_rate": 9.966737010559326e-05,
519
+ "loss": 0.0569,
520
+ "step": 850
521
+ },
522
+ {
523
+ "grad_norm": 0.3454737961292267,
524
+ "learning_rate": 9.964805784138072e-05,
525
+ "loss": 0.0497,
526
+ "step": 860
527
+ },
528
+ {
529
+ "grad_norm": 0.29266422986984253,
530
+ "learning_rate": 9.962820263396195e-05,
531
+ "loss": 0.0472,
532
+ "step": 870
533
+ },
534
+ {
535
+ "grad_norm": 0.38099658489227295,
536
+ "learning_rate": 9.960780470047033e-05,
537
+ "loss": 0.0556,
538
+ "step": 880
539
+ },
540
+ {
541
+ "grad_norm": 0.37069812417030334,
542
+ "learning_rate": 9.958686426397437e-05,
543
+ "loss": 0.0464,
544
+ "step": 890
545
+ },
546
+ {
547
+ "grad_norm": 0.5007408261299133,
548
+ "learning_rate": 9.956538155347534e-05,
549
+ "loss": 0.0507,
550
+ "step": 900
551
+ },
552
+ {
553
+ "grad_norm": 0.45427390933036804,
554
+ "learning_rate": 9.95433568039047e-05,
555
+ "loss": 0.048,
556
+ "step": 910
557
+ },
558
+ {
559
+ "grad_norm": 0.38306254148483276,
560
+ "learning_rate": 9.952079025612162e-05,
561
+ "loss": 0.0535,
562
+ "step": 920
563
+ },
564
+ {
565
+ "grad_norm": 0.6907458901405334,
566
+ "learning_rate": 9.949768215691022e-05,
567
+ "loss": 0.0505,
568
+ "step": 930
569
+ },
570
+ {
571
+ "grad_norm": 0.5054745674133301,
572
+ "learning_rate": 9.9474032758977e-05,
573
+ "loss": 0.0551,
574
+ "step": 940
575
+ },
576
+ {
577
+ "grad_norm": 0.38975587487220764,
578
+ "learning_rate": 9.944984232094794e-05,
579
+ "loss": 0.0571,
580
+ "step": 950
581
+ },
582
+ {
583
+ "grad_norm": 0.4025571644306183,
584
+ "learning_rate": 9.942511110736584e-05,
585
+ "loss": 0.0486,
586
+ "step": 960
587
+ },
588
+ {
589
+ "grad_norm": 0.3891156017780304,
590
+ "learning_rate": 9.939983938868726e-05,
591
+ "loss": 0.0408,
592
+ "step": 970
593
+ },
594
+ {
595
+ "grad_norm": 0.38383904099464417,
596
+ "learning_rate": 9.93740274412797e-05,
597
+ "loss": 0.0468,
598
+ "step": 980
599
+ },
600
+ {
601
+ "grad_norm": 0.45907771587371826,
602
+ "learning_rate": 9.934767554741846e-05,
603
+ "loss": 0.0532,
604
+ "step": 990
605
+ },
606
+ {
607
+ "grad_norm": 0.28416934609413147,
608
+ "learning_rate": 9.932078399528361e-05,
609
+ "loss": 0.0454,
610
+ "step": 1000
611
+ }
612
+ ],
613
+ "logging_steps": 10,
614
+ "max_steps": 10000,
615
+ "num_input_tokens_seen": 0,
616
+ "num_train_epochs": 9223372036854775807,
617
+ "save_steps": 1000,
618
+ "stateful_callbacks": {
619
+ "TrainerControl": {
620
+ "args": {
621
+ "should_epoch_stop": false,
622
+ "should_evaluate": false,
623
+ "should_log": false,
624
+ "should_save": true,
625
+ "should_training_stop": false
626
+ },
627
+ "attributes": {}
628
+ }
629
+ },
630
+ "total_flos": 0.0,
631
+ "train_batch_size": 36,
632
+ "trial_name": null,
633
+ "trial_params": null
634
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4524ac143a9f2191e97d0434f3b2f8c78c2ac53e3116f1e79b416f15557f3fa0
3
+ size 5713
checkpoint-1000/wandb_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"project": "finetune-gr00t-n1d6", "run_id": "so100_finetune"}
checkpoint-2000/config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_horizon": 50,
3
+ "add_pos_embed": true,
4
+ "apply_sincos_state_encoding": true,
5
+ "architectures": [
6
+ "Gr00tN1d6"
7
+ ],
8
+ "attn_dropout": 0.2,
9
+ "attn_implementation": null,
10
+ "backbone_embedding_dim": 2048,
11
+ "backbone_model_type": "eagle",
12
+ "backbone_trainable_params_fp32": true,
13
+ "collator_overwrite_image_inputs": false,
14
+ "color_jitter_params": {
15
+ "brightness": 0.1,
16
+ "contrast": 0.1,
17
+ "hue": 0.1,
18
+ "saturation": 0.1
19
+ },
20
+ "crop_fraction": 0.95,
21
+ "diffusion_model_cfg": {
22
+ "attention_head_dim": 48,
23
+ "dropout": 0.2,
24
+ "final_dropout": true,
25
+ "interleave_self_attention": true,
26
+ "norm_type": "ada_norm",
27
+ "num_attention_heads": 32,
28
+ "num_layers": 32,
29
+ "output_dim": 1024,
30
+ "positional_embeddings": null
31
+ },
32
+ "eagle_collator": true,
33
+ "formalize_language": true,
34
+ "gemma_collator": false,
35
+ "hidden_size": 1024,
36
+ "image_crop_size": null,
37
+ "image_target_size": null,
38
+ "input_embedding_dim": 1536,
39
+ "load_bf16": true,
40
+ "max_action_dim": 128,
41
+ "max_num_embodiments": 32,
42
+ "max_seq_len": 1024,
43
+ "max_state_dim": 128,
44
+ "model_dtype": "bfloat16",
45
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
46
+ "model_type": "Gr00tN1d6",
47
+ "noise_beta_alpha": 1.5,
48
+ "noise_beta_beta": 1.0,
49
+ "noise_s": 0.999,
50
+ "num_inference_timesteps": 4,
51
+ "num_timestep_buckets": 1000,
52
+ "random_rotation_angle": null,
53
+ "reproject_vision": false,
54
+ "select_layer": 16,
55
+ "shortest_image_edge": 256,
56
+ "state_dropout_prob": 0.0,
57
+ "torch_dtype": "bfloat16",
58
+ "transformers_version": "4.51.3",
59
+ "tune_diffusion_model": true,
60
+ "tune_llm": false,
61
+ "tune_projector": true,
62
+ "tune_top_llm_layers": 4,
63
+ "tune_visual": false,
64
+ "tune_vlln": true,
65
+ "use_albumentations_transforms": true,
66
+ "use_alternate_vl_dit": true,
67
+ "use_flash_attention": true,
68
+ "use_relative_action": true,
69
+ "use_vlln": true
70
+ }
checkpoint-2000/embodiment_id.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "robocasa_panda_omron": 13,
3
+ "gr1": 20,
4
+ "behavior_r1_pro": 24,
5
+ "unitree_g1": 8,
6
+ "oxe_google": 0,
7
+ "oxe_widowx": 1,
8
+ "libero_panda": 2,
9
+ "new_embodiment": 10
10
+ }
checkpoint-2000/experiment_cfg/conf.yaml ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ load_config_path: null
2
+ model:
3
+ model_type: Gr00tN1d6
4
+ model_dtype: bfloat16
5
+ model_name: nvidia/Eagle-Block2A-2B-v2
6
+ backbone_model_type: eagle
7
+ model_revision: null
8
+ tune_top_llm_layers: 4
9
+ backbone_embedding_dim: 2048
10
+ tune_llm: false
11
+ tune_visual: false
12
+ select_layer: 16
13
+ reproject_vision: false
14
+ use_flash_attention: true
15
+ load_bf16: false
16
+ collator_overwrite_image_inputs: false
17
+ eagle_collator: true
18
+ backbone_trainable_params_fp32: true
19
+ image_crop_size: null
20
+ image_target_size: null
21
+ shortest_image_edge: 256
22
+ crop_fraction: 0.95
23
+ random_rotation_angle: null
24
+ color_jitter_params:
25
+ brightness: 0.3
26
+ contrast: 0.4
27
+ saturation: 0.5
28
+ hue: 0.08
29
+ use_albumentations_transforms: true
30
+ formalize_language: true
31
+ apply_sincos_state_encoding: false
32
+ use_relative_action: true
33
+ max_state_dim: 29
34
+ max_action_dim: 29
35
+ action_horizon: 16
36
+ hidden_size: 1024
37
+ input_embedding_dim: 1536
38
+ add_pos_embed: true
39
+ attn_dropout: 0.2
40
+ use_vlln: true
41
+ max_seq_len: 1024
42
+ use_alternate_vl_dit: true
43
+ attend_text_every_n_blocks: 2
44
+ diffusion_model_cfg:
45
+ positional_embeddings: null
46
+ num_layers: 32
47
+ num_attention_heads: 32
48
+ attention_head_dim: 48
49
+ norm_type: ada_norm
50
+ dropout: 0.2
51
+ final_dropout: true
52
+ output_dim: 1024
53
+ interleave_self_attention: true
54
+ num_inference_timesteps: 4
55
+ noise_beta_alpha: 1.5
56
+ noise_beta_beta: 1.0
57
+ noise_s: 0.999
58
+ num_timestep_buckets: 1000
59
+ tune_projector: true
60
+ tune_diffusion_model: true
61
+ tune_vlln: true
62
+ state_dropout_prob: 0.0
63
+ state_additive_noise_scale: 0.0
64
+ max_num_embodiments: 32
65
+ data:
66
+ datasets:
67
+ - dataset_paths:
68
+ - /content/dataset/azazdeaz/record-test/azazdeaz/record-test/
69
+ embodiment_tag: new_embodiment
70
+ mix_ratio: 1.0
71
+ dataset_type: physical_embodiment
72
+ val_dataset_path: null
73
+ modality_configs:
74
+ new_embodiment:
75
+ video:
76
+ delta_indices:
77
+ - 0
78
+ modality_keys:
79
+ - front
80
+ - wrist
81
+ sin_cos_embedding_keys: null
82
+ mean_std_embedding_keys: null
83
+ action_configs: null
84
+ state:
85
+ delta_indices:
86
+ - 0
87
+ modality_keys:
88
+ - single_arm
89
+ - gripper
90
+ sin_cos_embedding_keys: null
91
+ mean_std_embedding_keys: null
92
+ action_configs: null
93
+ action:
94
+ delta_indices:
95
+ - 0
96
+ - 1
97
+ - 2
98
+ - 3
99
+ - 4
100
+ - 5
101
+ - 6
102
+ - 7
103
+ - 8
104
+ - 9
105
+ - 10
106
+ - 11
107
+ - 12
108
+ - 13
109
+ - 14
110
+ - 15
111
+ modality_keys:
112
+ - single_arm
113
+ - gripper
114
+ sin_cos_embedding_keys: null
115
+ mean_std_embedding_keys: null
116
+ action_configs:
117
+ - rep: RELATIVE
118
+ type: NON_EEF
119
+ format: DEFAULT
120
+ state_key: null
121
+ - rep: ABSOLUTE
122
+ type: NON_EEF
123
+ format: DEFAULT
124
+ state_key: null
125
+ language:
126
+ delta_indices:
127
+ - 0
128
+ modality_keys:
129
+ - annotation.human.task_description
130
+ sin_cos_embedding_keys: null
131
+ mean_std_embedding_keys: null
132
+ action_configs: null
133
+ download_cache: false
134
+ shard_size: 1024
135
+ episode_sampling_rate: 0.1
136
+ num_shards_per_epoch: 100000
137
+ override_pretraining_statistics: false
138
+ mode: single_turn
139
+ random_chop: 0.0
140
+ mock_dataset_mode: false
141
+ shuffle: true
142
+ seed: 42
143
+ multiprocessing_context: fork
144
+ allow_padding: false
145
+ subsample_ratio: 1.0
146
+ image_crop_size:
147
+ - 244
148
+ - 244
149
+ image_target_size:
150
+ - 224
151
+ - 224
152
+ video_backend: torchcodec
153
+ training:
154
+ output_dir: /content/so100_finetune
155
+ experiment_name: null
156
+ max_steps: 10000
157
+ global_batch_size: 36
158
+ batch_size: null
159
+ gradient_accumulation_steps: 1
160
+ learning_rate: 0.0001
161
+ lr_scheduler_type: cosine
162
+ weight_decay: 1.0e-05
163
+ warmup_ratio: 0.05
164
+ warmup_steps: 0
165
+ max_grad_norm: 1.0
166
+ optim: adamw_torch
167
+ start_from_checkpoint: nvidia/GR00T-N1.6-3B
168
+ tf32: true
169
+ fp16: false
170
+ bf16: true
171
+ eval_bf16: true
172
+ logging_steps: 10
173
+ save_steps: 1000
174
+ save_total_limit: 5
175
+ save_vl_model: false
176
+ upload_checkpoints: false
177
+ upload_every: 1000
178
+ upload_last_n_checkpoints: 5
179
+ max_concurrent_uploads: 2
180
+ eval_strategy: 'no'
181
+ eval_steps: 500
182
+ eval_set_split_ratio: 0.1
183
+ eval_batch_size: 2
184
+ save_best_eval_metric_name: ''
185
+ save_best_eval_metric_greater_is_better: true
186
+ deepspeed_stage: 2
187
+ gradient_checkpointing: false
188
+ transformers_trust_remote_code: true
189
+ transformers_local_files_only: false
190
+ transformers_cache_dir: null
191
+ transformers_access_token: null
192
+ use_ddp: false
193
+ ddp_bucket_cap_mb: 100
194
+ num_gpus: 1
195
+ dataloader_num_workers: 4
196
+ remove_unused_columns: false
197
+ use_wandb: true
198
+ wandb_project: finetune-gr00t-n1d6
199
+ enable_profiling: false
200
+ max_retries: 3
201
+ assert_loss_less_than: null
202
+ add_rl_callback: false
203
+ enable_open_loop_eval: false
204
+ open_loop_eval_traj_ids:
205
+ - 0
206
+ open_loop_eval_steps_per_traj: 100
207
+ open_loop_eval_plot_indices: null
208
+ max_steps: 10000
209
+ save_steps: 1000
checkpoint-2000/experiment_cfg/config.yaml ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !!python/object:gr00t.configs.base_config.Config
2
+ data: !!python/object:gr00t.configs.data.data_config.DataConfig
3
+ allow_padding: false
4
+ datasets:
5
+ - !!python/object:gr00t.configs.data.data_config.SingleDatasetConfig
6
+ dataset_paths:
7
+ - /content/dataset/azazdeaz/record-test/azazdeaz/record-test/
8
+ dataset_type: physical_embodiment
9
+ embodiment_tag: new_embodiment
10
+ mix_ratio: 1.0
11
+ val_dataset_path: null
12
+ download_cache: false
13
+ episode_sampling_rate: 0.1
14
+ image_crop_size:
15
+ - 244
16
+ - 244
17
+ image_target_size:
18
+ - 224
19
+ - 224
20
+ mock_dataset_mode: false
21
+ modality_configs:
22
+ new_embodiment:
23
+ action: !!python/object:gr00t.data.types.ModalityConfig
24
+ action_configs:
25
+ - !!python/object:gr00t.data.types.ActionConfig
26
+ format: &id001 !!python/object/apply:gr00t.data.types.ActionFormat
27
+ - default
28
+ rep: !!python/object/apply:gr00t.data.types.ActionRepresentation
29
+ - relative
30
+ state_key: null
31
+ type: &id002 !!python/object/apply:gr00t.data.types.ActionType
32
+ - non_eef
33
+ - !!python/object:gr00t.data.types.ActionConfig
34
+ format: *id001
35
+ rep: !!python/object/apply:gr00t.data.types.ActionRepresentation
36
+ - absolute
37
+ state_key: null
38
+ type: *id002
39
+ delta_indices:
40
+ - 0
41
+ - 1
42
+ - 2
43
+ - 3
44
+ - 4
45
+ - 5
46
+ - 6
47
+ - 7
48
+ - 8
49
+ - 9
50
+ - 10
51
+ - 11
52
+ - 12
53
+ - 13
54
+ - 14
55
+ - 15
56
+ mean_std_embedding_keys: null
57
+ modality_keys:
58
+ - single_arm
59
+ - gripper
60
+ sin_cos_embedding_keys: null
61
+ language: !!python/object:gr00t.data.types.ModalityConfig
62
+ action_configs: null
63
+ delta_indices:
64
+ - 0
65
+ mean_std_embedding_keys: null
66
+ modality_keys:
67
+ - annotation.human.task_description
68
+ sin_cos_embedding_keys: null
69
+ state: !!python/object:gr00t.data.types.ModalityConfig
70
+ action_configs: null
71
+ delta_indices:
72
+ - 0
73
+ mean_std_embedding_keys: null
74
+ modality_keys:
75
+ - single_arm
76
+ - gripper
77
+ sin_cos_embedding_keys: null
78
+ video: !!python/object:gr00t.data.types.ModalityConfig
79
+ action_configs: null
80
+ delta_indices:
81
+ - 0
82
+ mean_std_embedding_keys: null
83
+ modality_keys:
84
+ - front
85
+ - wrist
86
+ sin_cos_embedding_keys: null
87
+ mode: single_turn
88
+ multiprocessing_context: fork
89
+ num_shards_per_epoch: 100000
90
+ override_pretraining_statistics: false
91
+ random_chop: 0.0
92
+ seed: 42
93
+ shard_size: 1024
94
+ shuffle: true
95
+ subsample_ratio: 1.0
96
+ video_backend: torchcodec
97
+ load_config_path: null
98
+ model: !!python/object:gr00t.configs.model.gr00t_n1d6.Gr00tN1d6Config
99
+ _attn_implementation_autoset: false
100
+ _attn_implementation_internal: null
101
+ _commit_hash: null
102
+ _name_or_path: ''
103
+ add_cross_attention: false
104
+ architectures: null
105
+ backbone_model_type: eagle
106
+ backbone_trainable_params_fp32: true
107
+ bad_words_ids: null
108
+ begin_suppress_tokens: null
109
+ bos_token_id: null
110
+ chunk_size_feed_forward: 0
111
+ color_jitter_params:
112
+ brightness: 0.3
113
+ contrast: 0.4
114
+ hue: 0.08
115
+ saturation: 0.5
116
+ cross_attention_hidden_size: null
117
+ decoder_start_token_id: null
118
+ diffusion_model_cfg:
119
+ attention_head_dim: 48
120
+ dropout: 0.2
121
+ final_dropout: true
122
+ interleave_self_attention: true
123
+ norm_type: ada_norm
124
+ num_attention_heads: 32
125
+ num_layers: 32
126
+ output_dim: 1024
127
+ positional_embeddings: null
128
+ diversity_penalty: 0.0
129
+ do_sample: false
130
+ eagle_collator: true
131
+ early_stopping: false
132
+ encoder_no_repeat_ngram_size: 0
133
+ eos_token_id: null
134
+ exponential_decay_length_penalty: null
135
+ finetuning_task: null
136
+ forced_bos_token_id: null
137
+ forced_eos_token_id: null
138
+ id2label:
139
+ 0: LABEL_0
140
+ 1: LABEL_1
141
+ is_decoder: false
142
+ is_encoder_decoder: false
143
+ label2id:
144
+ LABEL_0: 0
145
+ LABEL_1: 1
146
+ length_penalty: 1.0
147
+ load_bf16: false
148
+ max_length: 20
149
+ min_length: 0
150
+ model_name: nvidia/Eagle-Block2A-2B-v2
151
+ no_repeat_ngram_size: 0
152
+ num_beam_groups: 1
153
+ num_beams: 1
154
+ num_return_sequences: 1
155
+ output_attentions: false
156
+ output_hidden_states: false
157
+ output_scores: false
158
+ pad_token_id: null
159
+ prefix: null
160
+ problem_type: null
161
+ pruned_heads: {}
162
+ random_rotation_angle: null
163
+ remove_invalid_values: false
164
+ repetition_penalty: 1.0
165
+ reproject_vision: false
166
+ return_dict: true
167
+ return_dict_in_generate: false
168
+ sep_token_id: null
169
+ state_dropout_prob: 0.0
170
+ suppress_tokens: null
171
+ task_specific_params: null
172
+ temperature: 1.0
173
+ tf_legacy_loss: false
174
+ tie_encoder_decoder: false
175
+ tie_word_embeddings: true
176
+ tokenizer_class: null
177
+ top_k: 50
178
+ top_p: 1.0
179
+ torch_dtype: null
180
+ torchscript: false
181
+ transformers_version: null
182
+ tune_diffusion_model: true
183
+ tune_llm: false
184
+ tune_projector: true
185
+ tune_visual: false
186
+ typical_p: 1.0
187
+ use_bfloat16: false
188
+ use_relative_action: true
189
+ training: !!python/object:gr00t.configs.training.training_config.TrainingConfig
190
+ add_rl_callback: false
191
+ assert_loss_less_than: null
192
+ batch_size: null
193
+ bf16: true
194
+ dataloader_num_workers: 4
195
+ ddp_bucket_cap_mb: 100
196
+ deepspeed_stage: 2
197
+ enable_open_loop_eval: false
198
+ enable_profiling: false
199
+ eval_batch_size: 2
200
+ eval_bf16: true
201
+ eval_set_split_ratio: 0.1
202
+ eval_steps: 500
203
+ eval_strategy: 'no'
204
+ experiment_name: null
205
+ fp16: false
206
+ global_batch_size: 36
207
+ gradient_accumulation_steps: 1
208
+ gradient_checkpointing: false
209
+ learning_rate: 0.0001
210
+ logging_steps: 10
211
+ lr_scheduler_type: cosine
212
+ max_concurrent_uploads: 2
213
+ max_grad_norm: 1.0
214
+ max_retries: 3
215
+ max_steps: 10000
216
+ num_gpus: 1
217
+ open_loop_eval_plot_indices: null
218
+ open_loop_eval_steps_per_traj: 100
219
+ open_loop_eval_traj_ids:
220
+ - 0
221
+ optim: adamw_torch
222
+ output_dir: /content/so100_finetune
223
+ remove_unused_columns: false
224
+ save_best_eval_metric_greater_is_better: true
225
+ save_best_eval_metric_name: ''
226
+ save_steps: 1000
227
+ save_total_limit: 5
228
+ save_vl_model: false
229
+ start_from_checkpoint: nvidia/GR00T-N1.6-3B
230
+ tf32: true
231
+ transformers_access_token: null
232
+ transformers_cache_dir: null
233
+ transformers_local_files_only: false
234
+ transformers_trust_remote_code: true
235
+ upload_checkpoints: false
236
+ upload_every: 1000
237
+ upload_last_n_checkpoints: 5
238
+ use_ddp: false
239
+ use_wandb: true
240
+ wandb_project: finetune-gr00t-n1d6
241
+ warmup_ratio: 0.05
242
+ warmup_steps: 0
243
+ weight_decay: 1.0e-05
checkpoint-2000/experiment_cfg/dataset_statistics.json ADDED
@@ -0,0 +1,824 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "state": {
4
+ "single_arm": {
5
+ "min": [
6
+ -60.105262756347656,
7
+ -99.31827545166016,
8
+ -43.24324417114258,
9
+ 44.688323974609375,
10
+ -80.68241119384766
11
+ ],
12
+ "max": [
13
+ 34.31578826904297,
14
+ 45.80315399169922,
15
+ 99.62721252441406,
16
+ 97.62950134277344,
17
+ -4.8293962478637695
18
+ ],
19
+ "mean": [
20
+ -3.5325422842834118,
21
+ -39.55833784368612,
22
+ 47.61425589641663,
23
+ 68.3154743394685,
24
+ -46.70357059936142
25
+ ],
26
+ "std": [
27
+ 27.459571708144434,
28
+ 41.074676874488844,
29
+ 34.075122244859095,
30
+ 13.351843152336091,
31
+ 19.483248111140814
32
+ ],
33
+ "q01": [
34
+ -43.89320641903556,
35
+ -99.31827545166016,
36
+ 2.96704117550838,
37
+ 52.128458407208086,
38
+ -64.9047852897644
39
+ ],
40
+ "q99": [
41
+ 23.91888349814585,
42
+ 10.271718478515513,
43
+ 99.48718710059721,
44
+ 91.53274853632244,
45
+ -26.5701696578577
46
+ ]
47
+ },
48
+ "gripper": {
49
+ "min": [
50
+ 0.9803921580314636
51
+ ],
52
+ "max": [
53
+ 35.9943962097168
54
+ ],
55
+ "mean": [
56
+ 20.298597213926165
57
+ ],
58
+ "std": [
59
+ 9.780378321306017
60
+ ],
61
+ "q01": [
62
+ 6.1409783557640525
63
+ ],
64
+ "q99": [
65
+ 29.83243577963609
66
+ ]
67
+ }
68
+ },
69
+ "action": {
70
+ "single_arm": {
71
+ "min": [
72
+ -60.843040466308594,
73
+ -100.0,
74
+ -47.45454406738281,
75
+ 44.675209045410156,
76
+ -81.04986572265625
77
+ ],
78
+ "max": [
79
+ 34.99722671508789,
80
+ 45.694801330566406,
81
+ 100.0,
82
+ 97.96730041503906,
83
+ -4.566929340362549
84
+ ],
85
+ "mean": [
86
+ -3.4051861822555503,
87
+ -40.21667093678776,
88
+ 46.219495035627304,
89
+ 68.11321517887163,
90
+ -46.72253879798044
91
+ ],
92
+ "std": [
93
+ 27.604988373472494,
94
+ 40.826917813818575,
95
+ 34.82453909741334,
96
+ 13.439408862929378,
97
+ 19.518525610689558
98
+ ],
99
+ "q01": [
100
+ -44.29914832538184,
101
+ -99.9431293020161,
102
+ -0.6294070457600729,
103
+ 51.77255040025036,
104
+ -65.3110059408939
105
+ ],
106
+ "q99": [
107
+ 24.517798066878782,
108
+ 10.287138016986633,
109
+ 99.9985524069893,
110
+ 91.79085298874445,
111
+ -26.382156973884847
112
+ ]
113
+ },
114
+ "gripper": {
115
+ "min": [
116
+ 0.25553661584854126
117
+ ],
118
+ "max": [
119
+ 36.28620147705078
120
+ ],
121
+ "mean": [
122
+ 19.60203928355869
123
+ ],
124
+ "std": [
125
+ 10.896771214917207
126
+ ],
127
+ "q01": [
128
+ 2.708348265017052
129
+ ],
130
+ "q99": [
131
+ 30.222431877944597
132
+ ]
133
+ }
134
+ },
135
+ "relative_action": {
136
+ "single_arm": {
137
+ "min": [
138
+ [
139
+ -9.055549621582031,
140
+ -13.95650863647461,
141
+ -14.800384521484375,
142
+ -13.61447525024414,
143
+ -7.874013900756836
144
+ ],
145
+ [
146
+ -11.160816192626953,
147
+ -16.68364715576172,
148
+ -17.25493621826172,
149
+ -16.177448272705078,
150
+ -9.501310348510742
151
+ ],
152
+ [
153
+ -13.476604461669922,
154
+ -19.667449951171875,
155
+ -19.89129638671875,
156
+ -18.372356414794922,
157
+ -11.23359489440918
158
+ ],
159
+ [
160
+ -15.26607894897461,
161
+ -22.73650360107422,
162
+ -22.314407348632812,
163
+ -20.567264556884766,
164
+ -12.755905151367188
165
+ ],
166
+ [
167
+ -16.950286865234375,
168
+ -25.550708770751953,
169
+ -24.457931518554688,
170
+ -22.246448516845703,
171
+ -14.120733261108398
172
+ ],
173
+ [
174
+ -18.84502410888672,
175
+ -28.619762420654297,
176
+ -26.764127731323242,
177
+ -23.572124481201172,
178
+ -15.538057327270508
179
+ ],
180
+ [
181
+ -20.50891876220703,
182
+ -31.262561798095703,
183
+ -29.18724250793457,
184
+ -24.625682830810547,
185
+ -16.79789924621582
186
+ ],
187
+ [
188
+ -21.950958251953125,
189
+ -33.90535354614258,
190
+ -31.703550338745117,
191
+ -25.421085357666016,
192
+ -17.9002628326416
193
+ ],
194
+ [
195
+ -23.351024627685547,
196
+ -36.545902252197266,
197
+ -33.976280212402344,
198
+ -25.947864532470703,
199
+ -18.89763641357422
200
+ ],
201
+ [
202
+ -24.79306411743164,
203
+ -39.18869400024414,
204
+ -36.27018737792969,
205
+ -26.389755249023438,
206
+ -19.737533569335938
207
+ ],
208
+ [
209
+ -26.056222915649414,
210
+ -41.40523910522461,
211
+ -38.54291534423828,
212
+ -26.566513061523438,
213
+ -20.57742691040039
214
+ ],
215
+ [
216
+ -27.2141170501709,
217
+ -43.45127487182617,
218
+ -40.633819580078125,
219
+ -26.654312133789062,
220
+ -21.732280731201172
221
+ ],
222
+ [
223
+ -28.372011184692383,
224
+ -45.497310638427734,
225
+ -42.90654754638672,
226
+ -26.654312133789062,
227
+ -22.834644317626953
228
+ ],
229
+ [
230
+ -29.5299072265625,
231
+ -47.542476654052734,
232
+ -44.9827995300293,
233
+ -26.654312133789062,
234
+ -23.674541473388672
235
+ ],
236
+ [
237
+ -30.687801361083984,
238
+ -49.50242614746094,
239
+ -47.19833755493164,
240
+ -26.654312133789062,
241
+ -24.304462432861328
242
+ ],
243
+ [
244
+ -31.84569549560547,
245
+ -51.29353713989258,
246
+ -49.528255462646484,
247
+ -26.654312133789062,
248
+ -25.406824111938477
249
+ ]
250
+ ],
251
+ "max": [
252
+ [
253
+ 11.800273895263672,
254
+ 13.390182495117188,
255
+ 11.2484130859375,
256
+ 12.953540802001953,
257
+ 8.97637939453125
258
+ ],
259
+ [
260
+ 14.462503433227539,
261
+ 16.288734436035156,
262
+ 13.857921600341797,
263
+ 15.763023376464844,
264
+ 10.813648223876953
265
+ ],
266
+ [
267
+ 16.902881622314453,
268
+ 19.443038940429688,
269
+ 16.2484130859375,
270
+ 18.39691162109375,
271
+ 12.703411102294922
272
+ ],
273
+ [
274
+ 19.40987205505371,
275
+ 22.16992950439453,
276
+ 18.857921600341797,
277
+ 20.783126831054688,
278
+ 14.645668029785156
279
+ ],
280
+ [
281
+ 21.850250244140625,
282
+ 24.812728881835938,
283
+ 21.221553802490234,
284
+ 22.815826416015625,
285
+ 16.482940673828125
286
+ ],
287
+ [
288
+ 24.290626525878906,
289
+ 27.455524444580078,
290
+ 23.551467895507812,
291
+ 24.671768188476562,
292
+ 18.530181884765625
293
+ ],
294
+ [
295
+ 26.50115203857422,
296
+ 30.011974334716797,
297
+ 25.857921600341797,
298
+ 26.515491485595703,
299
+ 20.262466430664062
300
+ ],
301
+ [
302
+ 28.71167755126953,
303
+ 32.31278610229492,
304
+ 28.187835693359375,
305
+ 27.920230865478516,
306
+ 22.099735260009766
307
+ ],
308
+ [
309
+ 31.132732391357422,
310
+ 34.52932357788086,
311
+ 30.585193634033203,
312
+ 29.34360122680664,
313
+ 23.832019805908203
314
+ ],
315
+ [
316
+ 33.24032974243164,
317
+ 36.490108489990234,
318
+ 32.91510772705078,
319
+ 30.757648468017578,
320
+ 25.56430435180664
321
+ ],
322
+ [
323
+ 35.237003326416016,
324
+ 38.621395111083984,
325
+ 35.187835693359375,
326
+ 32.26008224487305,
327
+ 27.296588897705078
328
+ ],
329
+ [
330
+ 37.0264778137207,
331
+ 40.411678314208984,
332
+ 37.27873992919922,
333
+ 33.66482162475586,
334
+ 28.87139129638672
335
+ ],
336
+ [
337
+ 38.815948486328125,
338
+ 42.372459411621094,
339
+ 39.32906723022461,
340
+ 34.99049758911133,
341
+ 30.498687744140625
342
+ ],
343
+ [
344
+ 40.147064208984375,
345
+ 43.99224090576172,
346
+ 41.238162994384766,
347
+ 35.6975212097168,
348
+ 31.9160099029541
349
+ ],
350
+ [
351
+ 41.552791595458984,
352
+ 45.61201858520508,
353
+ 43.13301467895508,
354
+ 36.0510368347168,
355
+ 33.33333206176758
356
+ ],
357
+ [
358
+ 43.0264778137207,
359
+ 47.23110580444336,
360
+ 45.22392654418945,
361
+ 36.402225494384766,
362
+ 34.75065612792969
363
+ ]
364
+ ],
365
+ "mean": [
366
+ [
367
+ 0.11750347912311554,
368
+ -0.6653266549110413,
369
+ -1.4583468437194824,
370
+ -0.2053278237581253,
371
+ -0.01863335259258747
372
+ ],
373
+ [
374
+ 0.12236570566892624,
375
+ -0.6657301187515259,
376
+ -1.4583476781845093,
377
+ -0.20791052281856537,
378
+ -0.02185939997434616
379
+ ],
380
+ [
381
+ 0.12741880118846893,
382
+ -0.6660965085029602,
383
+ -1.4583467245101929,
384
+ -0.21049359440803528,
385
+ -0.025085171684622765
386
+ ],
387
+ [
388
+ 0.13261470198631287,
389
+ -0.6664630174636841,
390
+ -1.4583468437194824,
391
+ -0.21307627856731415,
392
+ -0.02831093594431877
393
+ ],
394
+ [
395
+ 0.13785837590694427,
396
+ -0.6668296456336975,
397
+ -1.4583486318588257,
398
+ -0.21565881371498108,
399
+ -0.03153714910149574
400
+ ],
401
+ [
402
+ 0.14319713413715363,
403
+ -0.6671591401100159,
404
+ -1.4583512544631958,
405
+ -0.21824198961257935,
406
+ -0.03474012389779091
407
+ ],
408
+ [
409
+ 0.14853604137897491,
410
+ -0.6674894094467163,
411
+ -1.458350419998169,
412
+ -0.22082529962062836,
413
+ -0.03794342279434204
414
+ ],
415
+ [
416
+ 0.15382729470729828,
417
+ -0.6678190231323242,
418
+ -1.4583524465560913,
419
+ -0.22340813279151917,
420
+ -0.041146621108055115
421
+ ],
422
+ [
423
+ 0.15907101333141327,
424
+ -0.6681490540504456,
425
+ -1.458353042602539,
426
+ -0.2259913831949234,
427
+ -0.04434990882873535
428
+ ],
429
+ [
430
+ 0.16426703333854675,
431
+ -0.668441653251648,
432
+ -1.458351492881775,
433
+ -0.22857406735420227,
434
+ -0.04755344241857529
435
+ ],
436
+ [
437
+ 0.1694149672985077,
438
+ -0.668734610080719,
439
+ -1.4583535194396973,
440
+ -0.23115694522857666,
441
+ -0.05075618624687195
442
+ ],
443
+ [
444
+ 0.17465919256210327,
445
+ -0.6690278649330139,
446
+ -1.458351731300354,
447
+ -0.2337394654750824,
448
+ -0.05395958200097084
449
+ ],
450
+ [
451
+ 0.17990276217460632,
452
+ -0.6693212985992432,
453
+ -1.4583524465560913,
454
+ -0.23632188141345978,
455
+ -0.05716199427843094
456
+ ],
457
+ [
458
+ 0.18505056202411652,
459
+ -0.6696141958236694,
460
+ -1.4583513736724854,
461
+ -0.2389044612646103,
462
+ -0.060365449637174606
463
+ ],
464
+ [
465
+ 0.1901988983154297,
466
+ -0.6699072122573853,
467
+ -1.4583531618118286,
468
+ -0.24148696660995483,
469
+ -0.06356889009475708
470
+ ],
471
+ [
472
+ 0.19529956579208374,
473
+ -0.6702005863189697,
474
+ -1.4583516120910645,
475
+ -0.24407006800174713,
476
+ -0.0667722150683403
477
+ ]
478
+ ],
479
+ "std": [
480
+ [
481
+ 2.756209135055542,
482
+ 4.048781394958496,
483
+ 4.834221839904785,
484
+ 2.4045844078063965,
485
+ 1.831181526184082
486
+ ],
487
+ [
488
+ 3.375519037246704,
489
+ 4.940067768096924,
490
+ 5.704192638397217,
491
+ 2.936358690261841,
492
+ 2.2514567375183105
493
+ ],
494
+ [
495
+ 3.9847497940063477,
496
+ 5.8236775398254395,
497
+ 6.570866107940674,
498
+ 3.443351984024048,
499
+ 2.663818359375
500
+ ],
501
+ [
502
+ 4.582452774047852,
503
+ 6.696892738342285,
504
+ 7.430098056793213,
505
+ 3.9259438514709473,
506
+ 3.067777633666992
507
+ ],
508
+ [
509
+ 5.167651653289795,
510
+ 7.5579352378845215,
511
+ 8.279448509216309,
512
+ 4.385776996612549,
513
+ 3.462792158126831
514
+ ],
515
+ [
516
+ 5.740141868591309,
517
+ 8.406184196472168,
518
+ 9.11788558959961,
519
+ 4.825246810913086,
520
+ 3.848816156387329
521
+ ],
522
+ [
523
+ 6.2996673583984375,
524
+ 9.240605354309082,
525
+ 9.944042205810547,
526
+ 5.246663570404053,
527
+ 4.225630283355713
528
+ ],
529
+ [
530
+ 6.846652507781982,
531
+ 10.06134033203125,
532
+ 10.75770092010498,
533
+ 5.652442932128906,
534
+ 4.593677043914795
535
+ ],
536
+ [
537
+ 7.3811469078063965,
538
+ 10.86816120147705,
539
+ 11.558343887329102,
540
+ 6.044579982757568,
541
+ 4.9531779289245605
542
+ ],
543
+ [
544
+ 7.903446197509766,
545
+ 11.661355972290039,
546
+ 12.34595012664795,
547
+ 6.424712657928467,
548
+ 5.304342269897461
549
+ ],
550
+ [
551
+ 8.414060592651367,
552
+ 12.44128131866455,
553
+ 13.120593070983887,
554
+ 6.794016361236572,
555
+ 5.647256851196289
556
+ ],
557
+ [
558
+ 8.913477897644043,
559
+ 13.208165168762207,
560
+ 13.88218879699707,
561
+ 7.153115272521973,
562
+ 5.98213529586792
563
+ ],
564
+ [
565
+ 9.402256965637207,
566
+ 13.962782859802246,
567
+ 14.631062507629395,
568
+ 7.5023603439331055,
569
+ 6.309183597564697
570
+ ],
571
+ [
572
+ 9.880831718444824,
573
+ 14.70537281036377,
574
+ 15.367119789123535,
575
+ 7.841666221618652,
576
+ 6.628418445587158
577
+ ],
578
+ [
579
+ 10.349787712097168,
580
+ 15.43662166595459,
581
+ 16.090578079223633,
582
+ 8.171104431152344,
583
+ 6.940072536468506
584
+ ],
585
+ [
586
+ 10.809670448303223,
587
+ 16.15700340270996,
588
+ 16.801616668701172,
589
+ 8.490680694580078,
590
+ 7.244339466094971
591
+ ]
592
+ ],
593
+ "q01": [
594
+ [
595
+ -7.149912223815918,
596
+ -12.172602081298828,
597
+ -13.191639633178712,
598
+ -7.083407516479492,
599
+ -5.301837921142578
600
+ ],
601
+ [
602
+ -8.969949054718018,
603
+ -14.76386520385742,
604
+ -15.673493194580079,
605
+ -8.597449951171875,
606
+ -6.4955373382568355
607
+ ],
608
+ [
609
+ -10.634601535797119,
610
+ -17.415445404052733,
611
+ -18.04468208312988,
612
+ -9.786780090332032,
613
+ -7.755381164550781
614
+ ],
615
+ [
616
+ -12.332097396850585,
617
+ -19.81734550476074,
618
+ -20.49539176940918,
619
+ -10.772034606933593,
620
+ -8.81890007019043
621
+ ],
622
+ [
623
+ -14.054951286315918,
624
+ -22.659150772094726,
625
+ -22.971768417358398,
626
+ -11.810748291015624,
627
+ -10.076640815734862
628
+ ],
629
+ [
630
+ -15.467545299530029,
631
+ -24.854450073242187,
632
+ -25.42631248474121,
633
+ -12.89973159790039,
634
+ -11.391075839996338
635
+ ],
636
+ [
637
+ -16.85467258453369,
638
+ -27.482084159851073,
639
+ -27.82916431427002,
640
+ -13.832246704101562,
641
+ -12.532282905578613
642
+ ],
643
+ [
644
+ -18.258717765808104,
645
+ -29.645261459350586,
646
+ -29.891822814941406,
647
+ -14.627758026123047,
648
+ -13.543306350708008
649
+ ],
650
+ [
651
+ -19.36220642089844,
652
+ -31.862525711059572,
653
+ -32.179530944824215,
654
+ -15.569841918945311,
655
+ -14.78740104675293
656
+ ],
657
+ [
658
+ -20.866409225463865,
659
+ -34.33618782043457,
660
+ -34.25658073425293,
661
+ -16.842924346923827,
662
+ -15.576902465820313
663
+ ],
664
+ [
665
+ -21.95573440551758,
666
+ -36.528687438964845,
667
+ -36.38519187927246,
668
+ -17.423948669433592,
669
+ -16.82309829711914
670
+ ],
671
+ [
672
+ -23.461913070678712,
673
+ -38.73653938293457,
674
+ -38.30317817687988,
675
+ -17.647796630859375,
676
+ -17.742782592773438
677
+ ],
678
+ [
679
+ -24.53435989379883,
680
+ -40.874423828124996,
681
+ -40.44069320678711,
682
+ -18.515239715576172,
683
+ -18.879788017272947
684
+ ],
685
+ [
686
+ -25.933171768188476,
687
+ -43.23724739074707,
688
+ -42.29842636108398,
689
+ -19.954179382324217,
690
+ -19.801573486328124
691
+ ],
692
+ [
693
+ -27.245162963867188,
694
+ -45.308877029418944,
695
+ -43.92444366455078,
696
+ -20.61133575439453,
697
+ -20.9133874130249
698
+ ],
699
+ [
700
+ -28.540471458435057,
701
+ -47.20507698059082,
702
+ -45.95519866943359,
703
+ -20.804307861328123,
704
+ -21.69133777618408
705
+ ]
706
+ ],
707
+ "q99": [
708
+ [
709
+ 8.69469722747799,
710
+ 10.299373474121074,
711
+ 9.785606613159175,
712
+ 9.179881286621086,
713
+ 6.271916503906227
714
+ ],
715
+ [
716
+ 10.615129890441846,
717
+ 12.666508712768518,
718
+ 12.091986007690421,
719
+ 11.210918426513642,
720
+ 7.833070821762051
721
+ ],
722
+ [
723
+ 12.4686942863464,
724
+ 14.899181365966779,
725
+ 14.446693725585934,
726
+ 13.182954406738268,
727
+ 9.380576934814396
728
+ ],
729
+ [
730
+ 14.271173782348615,
731
+ 17.33157432556147,
732
+ 16.80153594970703,
733
+ 15.230424041748023,
734
+ 10.432546234130848
735
+ ],
736
+ [
737
+ 16.08265884399413,
738
+ 19.270514526367187,
739
+ 19.092461013793933,
740
+ 17.081269989013663,
741
+ 12.112335987091052
742
+ ],
743
+ [
744
+ 18.145391998291,
745
+ 21.44050186157223,
746
+ 21.280475997924803,
747
+ 18.68309165954589,
748
+ 13.620999450683572
749
+ ],
750
+ [
751
+ 19.996333007812442,
752
+ 23.52975692749016,
753
+ 23.596975326538082,
754
+ 20.103778152465818,
755
+ 14.602626724243084
756
+ ],
757
+ [
758
+ 21.60820228576658,
759
+ 25.1919183349609,
760
+ 25.76568862915039,
761
+ 21.541843643188393,
762
+ 16.10183769226073
763
+ ],
764
+ [
765
+ 23.362480049133293,
766
+ 27.08537643432608,
767
+ 28.15810386657714,
768
+ 22.8827821350097,
769
+ 17.433070678710767
770
+ ],
771
+ [
772
+ 25.225577430725092,
773
+ 28.52370849609371,
774
+ 30.372146911621076,
775
+ 23.82381240844726,
776
+ 18.712861175537086
777
+ ],
778
+ [
779
+ 26.625544967651358,
780
+ 30.16027656555174,
781
+ 32.49975234985349,
782
+ 24.50854393005371,
783
+ 19.443568878173714
784
+ ],
785
+ [
786
+ 28.116129837036127,
787
+ 31.295492095947264,
788
+ 34.53191467285156,
789
+ 25.490360641479374,
790
+ 20.561679687499943
791
+ ],
792
+ [
793
+ 29.457272415161103,
794
+ 32.63257324218748,
795
+ 36.42262039184569,
796
+ 26.227766571044913,
797
+ 21.088712310791003
798
+ ],
799
+ [
800
+ 30.8263483428955,
801
+ 33.570856246948225,
802
+ 38.26254928588863,
803
+ 26.711691284179675,
804
+ 22.396849746704
805
+ ],
806
+ [
807
+ 32.008242797851516,
808
+ 34.67565246582029,
809
+ 40.05519142150879,
810
+ 27.399809188842735,
811
+ 22.793699951171842
812
+ ],
813
+ [
814
+ 33.28292778015133,
815
+ 35.806464462280275,
816
+ 41.85474884033201,
817
+ 27.812029418945265,
818
+ 23.47191642761218
819
+ ]
820
+ ]
821
+ }
822
+ }
823
+ }
824
+ }
checkpoint-2000/experiment_cfg/final_model_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "Gr00tN1d6",
3
+ "model_dtype": "bfloat16",
4
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
5
+ "backbone_model_type": "eagle",
6
+ "model_revision": null,
7
+ "tune_top_llm_layers": 4,
8
+ "backbone_embedding_dim": 2048,
9
+ "tune_llm": false,
10
+ "tune_visual": false,
11
+ "select_layer": 16,
12
+ "reproject_vision": false,
13
+ "use_flash_attention": true,
14
+ "load_bf16": true,
15
+ "collator_overwrite_image_inputs": false,
16
+ "eagle_collator": true,
17
+ "backbone_trainable_params_fp32": true,
18
+ "apply_sincos_state_encoding": true,
19
+ "use_relative_action": true,
20
+ "max_state_dim": 128,
21
+ "max_action_dim": 128,
22
+ "action_horizon": 50,
23
+ "hidden_size": 1024,
24
+ "input_embedding_dim": 1536,
25
+ "add_pos_embed": true,
26
+ "attn_dropout": 0.2,
27
+ "use_vlln": true,
28
+ "max_seq_len": 1024,
29
+ "use_alternate_vl_dit": true,
30
+ "attend_text_every_n_blocks": 2,
31
+ "diffusion_model_cfg": {
32
+ "attention_head_dim": 48,
33
+ "dropout": 0.2,
34
+ "final_dropout": true,
35
+ "interleave_self_attention": true,
36
+ "norm_type": "ada_norm",
37
+ "num_attention_heads": 32,
38
+ "num_layers": 32,
39
+ "output_dim": 1024,
40
+ "positional_embeddings": null
41
+ },
42
+ "num_inference_timesteps": 4,
43
+ "noise_beta_alpha": 1.5,
44
+ "noise_beta_beta": 1.0,
45
+ "noise_s": 0.999,
46
+ "num_timestep_buckets": 1000,
47
+ "tune_projector": true,
48
+ "tune_diffusion_model": true,
49
+ "tune_vlln": true,
50
+ "state_dropout_prob": 0.0,
51
+ "state_additive_noise_scale": 0.0,
52
+ "max_num_embodiments": 32
53
+ }
checkpoint-2000/experiment_cfg/final_processor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efb7418ffa7dcabd9858be0df3f27ccbff8cb2615abe8b53e7786c26f698be4c
3
+ size 4990120184
checkpoint-2000/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db2ed5815745a9febc81afe19fbc32c5f03de7a9c65d582d41ba874a7302e729
3
+ size 4823190320
checkpoint-2000/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3299fa1359c27c77bb5210e4b402861412b721a9bf5c7e5e96a969c7034e3a03
3
+ size 12960193762
checkpoint-2000/processor_config.json ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processor_class": "Gr00tN1d6Processor",
3
+ "processor_kwargs": {
4
+ "modality_configs": {
5
+ "behavior_r1_pro": {
6
+ "video": {
7
+ "delta_indices": [
8
+ 0
9
+ ],
10
+ "modality_keys": [
11
+ "observation.images.rgb.head_256_256",
12
+ "observation.images.rgb.left_wrist_256_256",
13
+ "observation.images.rgb.right_wrist_256_256"
14
+ ],
15
+ "sin_cos_embedding_keys": null,
16
+ "mean_std_embedding_keys": null,
17
+ "action_configs": null
18
+ },
19
+ "state": {
20
+ "delta_indices": [
21
+ 0
22
+ ],
23
+ "modality_keys": [
24
+ "robot_pos",
25
+ "robot_ori_cos",
26
+ "robot_ori_sin",
27
+ "robot_2d_ori",
28
+ "robot_2d_ori_cos",
29
+ "robot_2d_ori_sin",
30
+ "robot_lin_vel",
31
+ "robot_ang_vel",
32
+ "arm_left_qpos",
33
+ "arm_left_qpos_sin",
34
+ "arm_left_qpos_cos",
35
+ "eef_left_pos",
36
+ "eef_left_quat",
37
+ "gripper_left_qpos",
38
+ "arm_right_qpos",
39
+ "arm_right_qpos_sin",
40
+ "arm_right_qpos_cos",
41
+ "eef_right_pos",
42
+ "eef_right_quat",
43
+ "gripper_right_qpos",
44
+ "trunk_qpos"
45
+ ],
46
+ "sin_cos_embedding_keys": null,
47
+ "mean_std_embedding_keys": null,
48
+ "action_configs": null
49
+ },
50
+ "action": {
51
+ "delta_indices": [
52
+ 0,
53
+ 1,
54
+ 2,
55
+ 3,
56
+ 4,
57
+ 5,
58
+ 6,
59
+ 7,
60
+ 8,
61
+ 9,
62
+ 10,
63
+ 11,
64
+ 12,
65
+ 13,
66
+ 14,
67
+ 15,
68
+ 16,
69
+ 17,
70
+ 18,
71
+ 19,
72
+ 20,
73
+ 21,
74
+ 22,
75
+ 23,
76
+ 24,
77
+ 25,
78
+ 26,
79
+ 27,
80
+ 28,
81
+ 29,
82
+ 30,
83
+ 31
84
+ ],
85
+ "modality_keys": [
86
+ "base",
87
+ "torso",
88
+ "left_arm",
89
+ "left_gripper",
90
+ "right_arm",
91
+ "right_gripper"
92
+ ],
93
+ "sin_cos_embedding_keys": null,
94
+ "mean_std_embedding_keys": null,
95
+ "action_configs": [
96
+ {
97
+ "rep": "ABSOLUTE",
98
+ "type": "NON_EEF",
99
+ "format": "DEFAULT",
100
+ "state_key": null
101
+ },
102
+ {
103
+ "rep": "RELATIVE",
104
+ "type": "NON_EEF",
105
+ "format": "DEFAULT",
106
+ "state_key": "trunk_qpos"
107
+ },
108
+ {
109
+ "rep": "RELATIVE",
110
+ "type": "NON_EEF",
111
+ "format": "DEFAULT",
112
+ "state_key": "arm_left_qpos"
113
+ },
114
+ {
115
+ "rep": "ABSOLUTE",
116
+ "type": "NON_EEF",
117
+ "format": "DEFAULT",
118
+ "state_key": null
119
+ },
120
+ {
121
+ "rep": "RELATIVE",
122
+ "type": "NON_EEF",
123
+ "format": "DEFAULT",
124
+ "state_key": "arm_right_qpos"
125
+ },
126
+ {
127
+ "rep": "ABSOLUTE",
128
+ "type": "NON_EEF",
129
+ "format": "DEFAULT",
130
+ "state_key": null
131
+ }
132
+ ]
133
+ },
134
+ "language": {
135
+ "delta_indices": [
136
+ 0
137
+ ],
138
+ "modality_keys": [
139
+ "annotation.human.coarse_action"
140
+ ],
141
+ "sin_cos_embedding_keys": null,
142
+ "mean_std_embedding_keys": null,
143
+ "action_configs": null
144
+ }
145
+ },
146
+ "gr1": {
147
+ "video": {
148
+ "delta_indices": [
149
+ 0
150
+ ],
151
+ "modality_keys": [
152
+ "ego_view_bg_crop_pad_res256_freq20"
153
+ ],
154
+ "sin_cos_embedding_keys": null,
155
+ "mean_std_embedding_keys": null,
156
+ "action_configs": null
157
+ },
158
+ "state": {
159
+ "delta_indices": [
160
+ 0
161
+ ],
162
+ "modality_keys": [
163
+ "left_arm",
164
+ "right_arm",
165
+ "left_hand",
166
+ "right_hand",
167
+ "waist"
168
+ ],
169
+ "sin_cos_embedding_keys": [
170
+ "left_arm",
171
+ "right_arm",
172
+ "left_hand",
173
+ "right_hand",
174
+ "waist"
175
+ ],
176
+ "mean_std_embedding_keys": null,
177
+ "action_configs": null
178
+ },
179
+ "action": {
180
+ "delta_indices": [
181
+ 0,
182
+ 1,
183
+ 2,
184
+ 3,
185
+ 4,
186
+ 5,
187
+ 6,
188
+ 7,
189
+ 8,
190
+ 9,
191
+ 10,
192
+ 11,
193
+ 12,
194
+ 13,
195
+ 14,
196
+ 15
197
+ ],
198
+ "modality_keys": [
199
+ "left_arm",
200
+ "right_arm",
201
+ "left_hand",
202
+ "right_hand",
203
+ "waist"
204
+ ],
205
+ "sin_cos_embedding_keys": null,
206
+ "mean_std_embedding_keys": null,
207
+ "action_configs": [
208
+ {
209
+ "rep": "RELATIVE",
210
+ "type": "NON_EEF",
211
+ "format": "DEFAULT",
212
+ "state_key": null
213
+ },
214
+ {
215
+ "rep": "RELATIVE",
216
+ "type": "NON_EEF",
217
+ "format": "DEFAULT",
218
+ "state_key": null
219
+ },
220
+ {
221
+ "rep": "RELATIVE",
222
+ "type": "NON_EEF",
223
+ "format": "DEFAULT",
224
+ "state_key": null
225
+ },
226
+ {
227
+ "rep": "RELATIVE",
228
+ "type": "NON_EEF",
229
+ "format": "DEFAULT",
230
+ "state_key": null
231
+ },
232
+ {
233
+ "rep": "ABSOLUTE",
234
+ "type": "NON_EEF",
235
+ "format": "DEFAULT",
236
+ "state_key": null
237
+ }
238
+ ]
239
+ },
240
+ "language": {
241
+ "delta_indices": [
242
+ 0
243
+ ],
244
+ "modality_keys": [
245
+ "task"
246
+ ],
247
+ "sin_cos_embedding_keys": null,
248
+ "mean_std_embedding_keys": null,
249
+ "action_configs": null
250
+ }
251
+ },
252
+ "robocasa_panda_omron": {
253
+ "video": {
254
+ "delta_indices": [
255
+ 0
256
+ ],
257
+ "modality_keys": [
258
+ "res256_image_side_0",
259
+ "res256_image_side_1",
260
+ "res256_image_wrist_0"
261
+ ],
262
+ "sin_cos_embedding_keys": null,
263
+ "mean_std_embedding_keys": null,
264
+ "action_configs": null
265
+ },
266
+ "state": {
267
+ "delta_indices": [
268
+ 0
269
+ ],
270
+ "modality_keys": [
271
+ "end_effector_position_relative",
272
+ "end_effector_rotation_relative",
273
+ "gripper_qpos",
274
+ "base_position",
275
+ "base_rotation"
276
+ ],
277
+ "sin_cos_embedding_keys": null,
278
+ "mean_std_embedding_keys": null,
279
+ "action_configs": null
280
+ },
281
+ "action": {
282
+ "delta_indices": [
283
+ 0,
284
+ 1,
285
+ 2,
286
+ 3,
287
+ 4,
288
+ 5,
289
+ 6,
290
+ 7,
291
+ 8,
292
+ 9,
293
+ 10,
294
+ 11,
295
+ 12,
296
+ 13,
297
+ 14,
298
+ 15
299
+ ],
300
+ "modality_keys": [
301
+ "end_effector_position",
302
+ "end_effector_rotation",
303
+ "gripper_close",
304
+ "base_motion",
305
+ "control_mode"
306
+ ],
307
+ "sin_cos_embedding_keys": null,
308
+ "mean_std_embedding_keys": null,
309
+ "action_configs": [
310
+ {
311
+ "rep": "ABSOLUTE",
312
+ "type": "NON_EEF",
313
+ "format": "DEFAULT",
314
+ "state_key": null
315
+ },
316
+ {
317
+ "rep": "ABSOLUTE",
318
+ "type": "NON_EEF",
319
+ "format": "DEFAULT",
320
+ "state_key": null
321
+ },
322
+ {
323
+ "rep": "ABSOLUTE",
324
+ "type": "NON_EEF",
325
+ "format": "DEFAULT",
326
+ "state_key": null
327
+ },
328
+ {
329
+ "rep": "ABSOLUTE",
330
+ "type": "NON_EEF",
331
+ "format": "DEFAULT",
332
+ "state_key": null
333
+ },
334
+ {
335
+ "rep": "ABSOLUTE",
336
+ "type": "NON_EEF",
337
+ "format": "DEFAULT",
338
+ "state_key": null
339
+ }
340
+ ]
341
+ },
342
+ "language": {
343
+ "delta_indices": [
344
+ 0
345
+ ],
346
+ "modality_keys": [
347
+ "annotation.human.action.task_description"
348
+ ],
349
+ "sin_cos_embedding_keys": null,
350
+ "mean_std_embedding_keys": null,
351
+ "action_configs": null
352
+ }
353
+ },
354
+ "new_embodiment": {
355
+ "video": {
356
+ "delta_indices": [
357
+ 0
358
+ ],
359
+ "modality_keys": [
360
+ "front",
361
+ "wrist"
362
+ ],
363
+ "sin_cos_embedding_keys": null,
364
+ "mean_std_embedding_keys": null,
365
+ "action_configs": null
366
+ },
367
+ "state": {
368
+ "delta_indices": [
369
+ 0
370
+ ],
371
+ "modality_keys": [
372
+ "single_arm",
373
+ "gripper"
374
+ ],
375
+ "sin_cos_embedding_keys": null,
376
+ "mean_std_embedding_keys": null,
377
+ "action_configs": null
378
+ },
379
+ "action": {
380
+ "delta_indices": [
381
+ 0,
382
+ 1,
383
+ 2,
384
+ 3,
385
+ 4,
386
+ 5,
387
+ 6,
388
+ 7,
389
+ 8,
390
+ 9,
391
+ 10,
392
+ 11,
393
+ 12,
394
+ 13,
395
+ 14,
396
+ 15
397
+ ],
398
+ "modality_keys": [
399
+ "single_arm",
400
+ "gripper"
401
+ ],
402
+ "sin_cos_embedding_keys": null,
403
+ "mean_std_embedding_keys": null,
404
+ "action_configs": [
405
+ {
406
+ "rep": "RELATIVE",
407
+ "type": "NON_EEF",
408
+ "format": "DEFAULT",
409
+ "state_key": null
410
+ },
411
+ {
412
+ "rep": "ABSOLUTE",
413
+ "type": "NON_EEF",
414
+ "format": "DEFAULT",
415
+ "state_key": null
416
+ }
417
+ ]
418
+ },
419
+ "language": {
420
+ "delta_indices": [
421
+ 0
422
+ ],
423
+ "modality_keys": [
424
+ "annotation.human.task_description"
425
+ ],
426
+ "sin_cos_embedding_keys": null,
427
+ "mean_std_embedding_keys": null,
428
+ "action_configs": null
429
+ }
430
+ }
431
+ },
432
+ "image_crop_size": null,
433
+ "image_target_size": null,
434
+ "use_albumentations": true,
435
+ "random_rotation_angle": null,
436
+ "color_jitter_params": {
437
+ "brightness": 0.3,
438
+ "contrast": 0.4,
439
+ "saturation": 0.5,
440
+ "hue": 0.08
441
+ },
442
+ "shortest_image_edge": 256,
443
+ "crop_fraction": 0.95,
444
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
445
+ "model_type": "eagle",
446
+ "formalize_language": true,
447
+ "max_state_dim": 128,
448
+ "max_action_dim": 128,
449
+ "max_action_horizon": 50,
450
+ "use_percentiles": false,
451
+ "clip_outliers": true,
452
+ "apply_sincos_state_encoding": true,
453
+ "use_relative_action": true
454
+ }
455
+ }
checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5efd8171a93a38e78841ea34d2929fad62621f2e3b4db564fe3f9ead855ddf85
3
+ size 14645
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c7bf0ea49bd9803cab03463db93f95b3262938087679ec5f7931fc0cbfeb1b1
3
+ size 1465
checkpoint-2000/statistics.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,1234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.2,
6
+ "eval_steps": 500,
7
+ "global_step": 2000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "grad_norm": 2.416853189468384,
14
+ "learning_rate": 1.8e-06,
15
+ "loss": 1.1685,
16
+ "step": 10
17
+ },
18
+ {
19
+ "grad_norm": 1.0608258247375488,
20
+ "learning_rate": 3.8e-06,
21
+ "loss": 1.1645,
22
+ "step": 20
23
+ },
24
+ {
25
+ "grad_norm": 0.6068993210792542,
26
+ "learning_rate": 5.8e-06,
27
+ "loss": 1.1434,
28
+ "step": 30
29
+ },
30
+ {
31
+ "grad_norm": 0.4165553152561188,
32
+ "learning_rate": 7.8e-06,
33
+ "loss": 1.1309,
34
+ "step": 40
35
+ },
36
+ {
37
+ "grad_norm": 0.5539636015892029,
38
+ "learning_rate": 9.800000000000001e-06,
39
+ "loss": 1.1056,
40
+ "step": 50
41
+ },
42
+ {
43
+ "grad_norm": 0.48063579201698303,
44
+ "learning_rate": 1.18e-05,
45
+ "loss": 1.1079,
46
+ "step": 60
47
+ },
48
+ {
49
+ "grad_norm": 0.4390954077243805,
50
+ "learning_rate": 1.3800000000000002e-05,
51
+ "loss": 1.0909,
52
+ "step": 70
53
+ },
54
+ {
55
+ "grad_norm": 0.39306357502937317,
56
+ "learning_rate": 1.58e-05,
57
+ "loss": 1.0837,
58
+ "step": 80
59
+ },
60
+ {
61
+ "grad_norm": 0.5071999430656433,
62
+ "learning_rate": 1.78e-05,
63
+ "loss": 1.0625,
64
+ "step": 90
65
+ },
66
+ {
67
+ "grad_norm": 0.38793864846229553,
68
+ "learning_rate": 1.9800000000000004e-05,
69
+ "loss": 1.0615,
70
+ "step": 100
71
+ },
72
+ {
73
+ "grad_norm": 0.6121100187301636,
74
+ "learning_rate": 2.18e-05,
75
+ "loss": 1.0406,
76
+ "step": 110
77
+ },
78
+ {
79
+ "grad_norm": 0.6436424851417542,
80
+ "learning_rate": 2.38e-05,
81
+ "loss": 1.0538,
82
+ "step": 120
83
+ },
84
+ {
85
+ "grad_norm": 3.180732011795044,
86
+ "learning_rate": 2.58e-05,
87
+ "loss": 0.9922,
88
+ "step": 130
89
+ },
90
+ {
91
+ "grad_norm": 0.8403401374816895,
92
+ "learning_rate": 2.7800000000000005e-05,
93
+ "loss": 0.9564,
94
+ "step": 140
95
+ },
96
+ {
97
+ "grad_norm": 1.5241442918777466,
98
+ "learning_rate": 2.98e-05,
99
+ "loss": 0.8884,
100
+ "step": 150
101
+ },
102
+ {
103
+ "grad_norm": 1.140098214149475,
104
+ "learning_rate": 3.18e-05,
105
+ "loss": 0.8313,
106
+ "step": 160
107
+ },
108
+ {
109
+ "grad_norm": 1.406286597251892,
110
+ "learning_rate": 3.38e-05,
111
+ "loss": 0.7616,
112
+ "step": 170
113
+ },
114
+ {
115
+ "grad_norm": 2.2860851287841797,
116
+ "learning_rate": 3.58e-05,
117
+ "loss": 0.6981,
118
+ "step": 180
119
+ },
120
+ {
121
+ "grad_norm": 2.2754695415496826,
122
+ "learning_rate": 3.7800000000000004e-05,
123
+ "loss": 0.6285,
124
+ "step": 190
125
+ },
126
+ {
127
+ "grad_norm": 2.531214714050293,
128
+ "learning_rate": 3.9800000000000005e-05,
129
+ "loss": 0.4895,
130
+ "step": 200
131
+ },
132
+ {
133
+ "grad_norm": 2.7056477069854736,
134
+ "learning_rate": 4.18e-05,
135
+ "loss": 0.3478,
136
+ "step": 210
137
+ },
138
+ {
139
+ "grad_norm": 1.7513630390167236,
140
+ "learning_rate": 4.38e-05,
141
+ "loss": 0.2509,
142
+ "step": 220
143
+ },
144
+ {
145
+ "grad_norm": 1.6927777528762817,
146
+ "learning_rate": 4.58e-05,
147
+ "loss": 0.1911,
148
+ "step": 230
149
+ },
150
+ {
151
+ "grad_norm": 2.264531135559082,
152
+ "learning_rate": 4.78e-05,
153
+ "loss": 0.2006,
154
+ "step": 240
155
+ },
156
+ {
157
+ "grad_norm": 1.3290256261825562,
158
+ "learning_rate": 4.9800000000000004e-05,
159
+ "loss": 0.1654,
160
+ "step": 250
161
+ },
162
+ {
163
+ "grad_norm": 1.5439292192459106,
164
+ "learning_rate": 5.1800000000000005e-05,
165
+ "loss": 0.172,
166
+ "step": 260
167
+ },
168
+ {
169
+ "grad_norm": 1.5491207838058472,
170
+ "learning_rate": 5.380000000000001e-05,
171
+ "loss": 0.1494,
172
+ "step": 270
173
+ },
174
+ {
175
+ "grad_norm": 1.1139971017837524,
176
+ "learning_rate": 5.580000000000001e-05,
177
+ "loss": 0.143,
178
+ "step": 280
179
+ },
180
+ {
181
+ "grad_norm": 1.2239474058151245,
182
+ "learning_rate": 5.7799999999999995e-05,
183
+ "loss": 0.1281,
184
+ "step": 290
185
+ },
186
+ {
187
+ "grad_norm": 1.093898057937622,
188
+ "learning_rate": 5.9800000000000003e-05,
189
+ "loss": 0.1234,
190
+ "step": 300
191
+ },
192
+ {
193
+ "grad_norm": 1.1576344966888428,
194
+ "learning_rate": 6.18e-05,
195
+ "loss": 0.1147,
196
+ "step": 310
197
+ },
198
+ {
199
+ "grad_norm": 1.2395023107528687,
200
+ "learning_rate": 6.38e-05,
201
+ "loss": 0.1143,
202
+ "step": 320
203
+ },
204
+ {
205
+ "grad_norm": 1.2169476747512817,
206
+ "learning_rate": 6.58e-05,
207
+ "loss": 0.1144,
208
+ "step": 330
209
+ },
210
+ {
211
+ "grad_norm": 0.9942442774772644,
212
+ "learning_rate": 6.780000000000001e-05,
213
+ "loss": 0.1152,
214
+ "step": 340
215
+ },
216
+ {
217
+ "grad_norm": 1.0080304145812988,
218
+ "learning_rate": 6.98e-05,
219
+ "loss": 0.1081,
220
+ "step": 350
221
+ },
222
+ {
223
+ "grad_norm": 1.1731500625610352,
224
+ "learning_rate": 7.18e-05,
225
+ "loss": 0.1146,
226
+ "step": 360
227
+ },
228
+ {
229
+ "grad_norm": 0.8233430981636047,
230
+ "learning_rate": 7.38e-05,
231
+ "loss": 0.1135,
232
+ "step": 370
233
+ },
234
+ {
235
+ "grad_norm": 1.0143144130706787,
236
+ "learning_rate": 7.58e-05,
237
+ "loss": 0.0979,
238
+ "step": 380
239
+ },
240
+ {
241
+ "grad_norm": 0.8134552240371704,
242
+ "learning_rate": 7.780000000000001e-05,
243
+ "loss": 0.1075,
244
+ "step": 390
245
+ },
246
+ {
247
+ "grad_norm": 0.736839234828949,
248
+ "learning_rate": 7.98e-05,
249
+ "loss": 0.1023,
250
+ "step": 400
251
+ },
252
+ {
253
+ "grad_norm": 0.8744317889213562,
254
+ "learning_rate": 8.18e-05,
255
+ "loss": 0.1093,
256
+ "step": 410
257
+ },
258
+ {
259
+ "grad_norm": 0.7313347458839417,
260
+ "learning_rate": 8.38e-05,
261
+ "loss": 0.1022,
262
+ "step": 420
263
+ },
264
+ {
265
+ "grad_norm": 0.8588263988494873,
266
+ "learning_rate": 8.58e-05,
267
+ "loss": 0.0957,
268
+ "step": 430
269
+ },
270
+ {
271
+ "grad_norm": 0.8712506890296936,
272
+ "learning_rate": 8.78e-05,
273
+ "loss": 0.0986,
274
+ "step": 440
275
+ },
276
+ {
277
+ "grad_norm": 0.5078502893447876,
278
+ "learning_rate": 8.98e-05,
279
+ "loss": 0.0941,
280
+ "step": 450
281
+ },
282
+ {
283
+ "grad_norm": 0.8700484037399292,
284
+ "learning_rate": 9.180000000000001e-05,
285
+ "loss": 0.0968,
286
+ "step": 460
287
+ },
288
+ {
289
+ "grad_norm": 0.8209111094474792,
290
+ "learning_rate": 9.38e-05,
291
+ "loss": 0.1025,
292
+ "step": 470
293
+ },
294
+ {
295
+ "grad_norm": 0.7114784717559814,
296
+ "learning_rate": 9.58e-05,
297
+ "loss": 0.0907,
298
+ "step": 480
299
+ },
300
+ {
301
+ "grad_norm": 0.8576576709747314,
302
+ "learning_rate": 9.78e-05,
303
+ "loss": 0.085,
304
+ "step": 490
305
+ },
306
+ {
307
+ "grad_norm": 0.7132489085197449,
308
+ "learning_rate": 9.98e-05,
309
+ "loss": 0.0826,
310
+ "step": 500
311
+ },
312
+ {
313
+ "grad_norm": 0.5286405682563782,
314
+ "learning_rate": 9.9999778549206e-05,
315
+ "loss": 0.0773,
316
+ "step": 510
317
+ },
318
+ {
319
+ "grad_norm": 0.6641912460327148,
320
+ "learning_rate": 9.999901304280685e-05,
321
+ "loss": 0.0832,
322
+ "step": 520
323
+ },
324
+ {
325
+ "grad_norm": 0.6113771200180054,
326
+ "learning_rate": 9.999770075521164e-05,
327
+ "loss": 0.0818,
328
+ "step": 530
329
+ },
330
+ {
331
+ "grad_norm": 0.6849881410598755,
332
+ "learning_rate": 9.99958417007713e-05,
333
+ "loss": 0.0844,
334
+ "step": 540
335
+ },
336
+ {
337
+ "grad_norm": 0.510468602180481,
338
+ "learning_rate": 9.999343589981615e-05,
339
+ "loss": 0.0712,
340
+ "step": 550
341
+ },
342
+ {
343
+ "grad_norm": 0.4960484802722931,
344
+ "learning_rate": 9.999048337865568e-05,
345
+ "loss": 0.0677,
346
+ "step": 560
347
+ },
348
+ {
349
+ "grad_norm": 0.6222489476203918,
350
+ "learning_rate": 9.998698416957815e-05,
351
+ "loss": 0.0677,
352
+ "step": 570
353
+ },
354
+ {
355
+ "grad_norm": 0.6377440690994263,
356
+ "learning_rate": 9.998293831085037e-05,
357
+ "loss": 0.0757,
358
+ "step": 580
359
+ },
360
+ {
361
+ "grad_norm": 0.6644724011421204,
362
+ "learning_rate": 9.997834584671719e-05,
363
+ "loss": 0.0721,
364
+ "step": 590
365
+ },
366
+ {
367
+ "grad_norm": 0.42810145020484924,
368
+ "learning_rate": 9.997320682740107e-05,
369
+ "loss": 0.0741,
370
+ "step": 600
371
+ },
372
+ {
373
+ "grad_norm": 0.47897064685821533,
374
+ "learning_rate": 9.996752130910149e-05,
375
+ "loss": 0.0723,
376
+ "step": 610
377
+ },
378
+ {
379
+ "grad_norm": 0.512750506401062,
380
+ "learning_rate": 9.99612893539944e-05,
381
+ "loss": 0.0773,
382
+ "step": 620
383
+ },
384
+ {
385
+ "grad_norm": 0.5588595867156982,
386
+ "learning_rate": 9.995451103023144e-05,
387
+ "loss": 0.0772,
388
+ "step": 630
389
+ },
390
+ {
391
+ "grad_norm": 0.6482793092727661,
392
+ "learning_rate": 9.994718641193928e-05,
393
+ "loss": 0.071,
394
+ "step": 640
395
+ },
396
+ {
397
+ "grad_norm": 0.4272823631763458,
398
+ "learning_rate": 9.993931557921874e-05,
399
+ "loss": 0.0695,
400
+ "step": 650
401
+ },
402
+ {
403
+ "grad_norm": 0.5539937019348145,
404
+ "learning_rate": 9.993089861814402e-05,
405
+ "loss": 0.0548,
406
+ "step": 660
407
+ },
408
+ {
409
+ "grad_norm": 0.5200349688529968,
410
+ "learning_rate": 9.992193562076166e-05,
411
+ "loss": 0.0649,
412
+ "step": 670
413
+ },
414
+ {
415
+ "grad_norm": 0.35027024149894714,
416
+ "learning_rate": 9.991242668508954e-05,
417
+ "loss": 0.0599,
418
+ "step": 680
419
+ },
420
+ {
421
+ "grad_norm": 0.4623376429080963,
422
+ "learning_rate": 9.990237191511587e-05,
423
+ "loss": 0.0593,
424
+ "step": 690
425
+ },
426
+ {
427
+ "grad_norm": 0.42851006984710693,
428
+ "learning_rate": 9.989177142079802e-05,
429
+ "loss": 0.0518,
430
+ "step": 700
431
+ },
432
+ {
433
+ "grad_norm": 0.5397882461547852,
434
+ "learning_rate": 9.988062531806126e-05,
435
+ "loss": 0.0614,
436
+ "step": 710
437
+ },
438
+ {
439
+ "grad_norm": 0.298032283782959,
440
+ "learning_rate": 9.986893372879762e-05,
441
+ "loss": 0.0663,
442
+ "step": 720
443
+ },
444
+ {
445
+ "grad_norm": 0.6170604228973389,
446
+ "learning_rate": 9.985669678086443e-05,
447
+ "loss": 0.0594,
448
+ "step": 730
449
+ },
450
+ {
451
+ "grad_norm": 0.49378132820129395,
452
+ "learning_rate": 9.984391460808298e-05,
453
+ "loss": 0.0707,
454
+ "step": 740
455
+ },
456
+ {
457
+ "grad_norm": 0.5525211095809937,
458
+ "learning_rate": 9.983058735023709e-05,
459
+ "loss": 0.0643,
460
+ "step": 750
461
+ },
462
+ {
463
+ "grad_norm": 0.4275728166103363,
464
+ "learning_rate": 9.98167151530715e-05,
465
+ "loss": 0.0632,
466
+ "step": 760
467
+ },
468
+ {
469
+ "grad_norm": 0.3775525391101837,
470
+ "learning_rate": 9.980229816829034e-05,
471
+ "loss": 0.0595,
472
+ "step": 770
473
+ },
474
+ {
475
+ "grad_norm": 0.42739713191986084,
476
+ "learning_rate": 9.978733655355544e-05,
477
+ "loss": 0.0568,
478
+ "step": 780
479
+ },
480
+ {
481
+ "grad_norm": 0.4654758870601654,
482
+ "learning_rate": 9.977183047248464e-05,
483
+ "loss": 0.0516,
484
+ "step": 790
485
+ },
486
+ {
487
+ "grad_norm": 0.4031694233417511,
488
+ "learning_rate": 9.975578009464992e-05,
489
+ "loss": 0.0581,
490
+ "step": 800
491
+ },
492
+ {
493
+ "grad_norm": 0.34200575947761536,
494
+ "learning_rate": 9.97391855955757e-05,
495
+ "loss": 0.0457,
496
+ "step": 810
497
+ },
498
+ {
499
+ "grad_norm": 0.3750764727592468,
500
+ "learning_rate": 9.972204715673669e-05,
501
+ "loss": 0.0509,
502
+ "step": 820
503
+ },
504
+ {
505
+ "grad_norm": 0.5093910694122314,
506
+ "learning_rate": 9.970436496555617e-05,
507
+ "loss": 0.0489,
508
+ "step": 830
509
+ },
510
+ {
511
+ "grad_norm": 0.6330306529998779,
512
+ "learning_rate": 9.968613921540373e-05,
513
+ "loss": 0.0531,
514
+ "step": 840
515
+ },
516
+ {
517
+ "grad_norm": 0.44549983739852905,
518
+ "learning_rate": 9.966737010559326e-05,
519
+ "loss": 0.0569,
520
+ "step": 850
521
+ },
522
+ {
523
+ "grad_norm": 0.3454737961292267,
524
+ "learning_rate": 9.964805784138072e-05,
525
+ "loss": 0.0497,
526
+ "step": 860
527
+ },
528
+ {
529
+ "grad_norm": 0.29266422986984253,
530
+ "learning_rate": 9.962820263396195e-05,
531
+ "loss": 0.0472,
532
+ "step": 870
533
+ },
534
+ {
535
+ "grad_norm": 0.38099658489227295,
536
+ "learning_rate": 9.960780470047033e-05,
537
+ "loss": 0.0556,
538
+ "step": 880
539
+ },
540
+ {
541
+ "grad_norm": 0.37069812417030334,
542
+ "learning_rate": 9.958686426397437e-05,
543
+ "loss": 0.0464,
544
+ "step": 890
545
+ },
546
+ {
547
+ "grad_norm": 0.5007408261299133,
548
+ "learning_rate": 9.956538155347534e-05,
549
+ "loss": 0.0507,
550
+ "step": 900
551
+ },
552
+ {
553
+ "grad_norm": 0.45427390933036804,
554
+ "learning_rate": 9.95433568039047e-05,
555
+ "loss": 0.048,
556
+ "step": 910
557
+ },
558
+ {
559
+ "grad_norm": 0.38306254148483276,
560
+ "learning_rate": 9.952079025612162e-05,
561
+ "loss": 0.0535,
562
+ "step": 920
563
+ },
564
+ {
565
+ "grad_norm": 0.6907458901405334,
566
+ "learning_rate": 9.949768215691022e-05,
567
+ "loss": 0.0505,
568
+ "step": 930
569
+ },
570
+ {
571
+ "grad_norm": 0.5054745674133301,
572
+ "learning_rate": 9.9474032758977e-05,
573
+ "loss": 0.0551,
574
+ "step": 940
575
+ },
576
+ {
577
+ "grad_norm": 0.38975587487220764,
578
+ "learning_rate": 9.944984232094794e-05,
579
+ "loss": 0.0571,
580
+ "step": 950
581
+ },
582
+ {
583
+ "grad_norm": 0.4025571644306183,
584
+ "learning_rate": 9.942511110736584e-05,
585
+ "loss": 0.0486,
586
+ "step": 960
587
+ },
588
+ {
589
+ "grad_norm": 0.3891156017780304,
590
+ "learning_rate": 9.939983938868726e-05,
591
+ "loss": 0.0408,
592
+ "step": 970
593
+ },
594
+ {
595
+ "grad_norm": 0.38383904099464417,
596
+ "learning_rate": 9.93740274412797e-05,
597
+ "loss": 0.0468,
598
+ "step": 980
599
+ },
600
+ {
601
+ "grad_norm": 0.45907771587371826,
602
+ "learning_rate": 9.934767554741846e-05,
603
+ "loss": 0.0532,
604
+ "step": 990
605
+ },
606
+ {
607
+ "grad_norm": 0.28416934609413147,
608
+ "learning_rate": 9.932078399528361e-05,
609
+ "loss": 0.0454,
610
+ "step": 1000
611
+ },
612
+ {
613
+ "grad_norm": 0.5906932950019836,
614
+ "learning_rate": 9.929335307895689e-05,
615
+ "loss": 0.0627,
616
+ "step": 1010
617
+ },
618
+ {
619
+ "grad_norm": 0.31994298100471497,
620
+ "learning_rate": 9.926538309841839e-05,
621
+ "loss": 0.0447,
622
+ "step": 1020
623
+ },
624
+ {
625
+ "grad_norm": 0.39826470613479614,
626
+ "learning_rate": 9.923687435954334e-05,
627
+ "loss": 0.0452,
628
+ "step": 1030
629
+ },
630
+ {
631
+ "grad_norm": 0.38305869698524475,
632
+ "learning_rate": 9.920782717409873e-05,
633
+ "loss": 0.0491,
634
+ "step": 1040
635
+ },
636
+ {
637
+ "grad_norm": 0.5647104978561401,
638
+ "learning_rate": 9.917824185973994e-05,
639
+ "loss": 0.0485,
640
+ "step": 1050
641
+ },
642
+ {
643
+ "grad_norm": 0.45855265855789185,
644
+ "learning_rate": 9.914811874000723e-05,
645
+ "loss": 0.0486,
646
+ "step": 1060
647
+ },
648
+ {
649
+ "grad_norm": 0.5201910734176636,
650
+ "learning_rate": 9.911745814432218e-05,
651
+ "loss": 0.0474,
652
+ "step": 1070
653
+ },
654
+ {
655
+ "grad_norm": 0.27793142199516296,
656
+ "learning_rate": 9.90862604079842e-05,
657
+ "loss": 0.0476,
658
+ "step": 1080
659
+ },
660
+ {
661
+ "grad_norm": 0.2942734658718109,
662
+ "learning_rate": 9.90545258721667e-05,
663
+ "loss": 0.0467,
664
+ "step": 1090
665
+ },
666
+ {
667
+ "grad_norm": 0.4430975914001465,
668
+ "learning_rate": 9.90222548839135e-05,
669
+ "loss": 0.0472,
670
+ "step": 1100
671
+ },
672
+ {
673
+ "grad_norm": 0.4262060225009918,
674
+ "learning_rate": 9.898944779613495e-05,
675
+ "loss": 0.0484,
676
+ "step": 1110
677
+ },
678
+ {
679
+ "grad_norm": 0.3938030004501343,
680
+ "learning_rate": 9.89561049676041e-05,
681
+ "loss": 0.046,
682
+ "step": 1120
683
+ },
684
+ {
685
+ "grad_norm": 0.4077751934528351,
686
+ "learning_rate": 9.89222267629528e-05,
687
+ "loss": 0.0446,
688
+ "step": 1130
689
+ },
690
+ {
691
+ "grad_norm": 0.3400469422340393,
692
+ "learning_rate": 9.888781355266763e-05,
693
+ "loss": 0.0458,
694
+ "step": 1140
695
+ },
696
+ {
697
+ "grad_norm": 0.37941423058509827,
698
+ "learning_rate": 9.885286571308598e-05,
699
+ "loss": 0.0419,
700
+ "step": 1150
701
+ },
702
+ {
703
+ "grad_norm": 0.32883521914482117,
704
+ "learning_rate": 9.881738362639182e-05,
705
+ "loss": 0.0446,
706
+ "step": 1160
707
+ },
708
+ {
709
+ "grad_norm": 0.3390735387802124,
710
+ "learning_rate": 9.878136768061154e-05,
711
+ "loss": 0.0431,
712
+ "step": 1170
713
+ },
714
+ {
715
+ "grad_norm": 0.24958841502666473,
716
+ "learning_rate": 9.874481826960979e-05,
717
+ "loss": 0.0388,
718
+ "step": 1180
719
+ },
720
+ {
721
+ "grad_norm": 0.33186545968055725,
722
+ "learning_rate": 9.870773579308503e-05,
723
+ "loss": 0.0424,
724
+ "step": 1190
725
+ },
726
+ {
727
+ "grad_norm": 0.2618488073348999,
728
+ "learning_rate": 9.867012065656533e-05,
729
+ "loss": 0.038,
730
+ "step": 1200
731
+ },
732
+ {
733
+ "grad_norm": 0.3369114100933075,
734
+ "learning_rate": 9.863197327140376e-05,
735
+ "loss": 0.0435,
736
+ "step": 1210
737
+ },
738
+ {
739
+ "grad_norm": 0.38006556034088135,
740
+ "learning_rate": 9.859329405477403e-05,
741
+ "loss": 0.0371,
742
+ "step": 1220
743
+ },
744
+ {
745
+ "grad_norm": 0.3106013536453247,
746
+ "learning_rate": 9.855408342966585e-05,
747
+ "loss": 0.041,
748
+ "step": 1230
749
+ },
750
+ {
751
+ "grad_norm": 0.5991314053535461,
752
+ "learning_rate": 9.851434182488033e-05,
753
+ "loss": 0.0404,
754
+ "step": 1240
755
+ },
756
+ {
757
+ "grad_norm": 0.4325745701789856,
758
+ "learning_rate": 9.84740696750253e-05,
759
+ "loss": 0.04,
760
+ "step": 1250
761
+ },
762
+ {
763
+ "grad_norm": 0.4575672149658203,
764
+ "learning_rate": 9.843326742051055e-05,
765
+ "loss": 0.0446,
766
+ "step": 1260
767
+ },
768
+ {
769
+ "grad_norm": 0.3128894865512848,
770
+ "learning_rate": 9.839193550754297e-05,
771
+ "loss": 0.045,
772
+ "step": 1270
773
+ },
774
+ {
775
+ "grad_norm": 0.5669920444488525,
776
+ "learning_rate": 9.835007438812177e-05,
777
+ "loss": 0.0448,
778
+ "step": 1280
779
+ },
780
+ {
781
+ "grad_norm": 0.3259505331516266,
782
+ "learning_rate": 9.830768452003341e-05,
783
+ "loss": 0.0484,
784
+ "step": 1290
785
+ },
786
+ {
787
+ "grad_norm": 0.3997496962547302,
788
+ "learning_rate": 9.826476636684671e-05,
789
+ "loss": 0.0384,
790
+ "step": 1300
791
+ },
792
+ {
793
+ "grad_norm": 0.28296664357185364,
794
+ "learning_rate": 9.822132039790773e-05,
795
+ "loss": 0.0496,
796
+ "step": 1310
797
+ },
798
+ {
799
+ "grad_norm": 0.3684375584125519,
800
+ "learning_rate": 9.817734708833461e-05,
801
+ "loss": 0.0494,
802
+ "step": 1320
803
+ },
804
+ {
805
+ "grad_norm": 0.5209456086158752,
806
+ "learning_rate": 9.813284691901243e-05,
807
+ "loss": 0.0432,
808
+ "step": 1330
809
+ },
810
+ {
811
+ "grad_norm": 0.3386315703392029,
812
+ "learning_rate": 9.808782037658792e-05,
813
+ "loss": 0.0501,
814
+ "step": 1340
815
+ },
816
+ {
817
+ "grad_norm": 0.6467092037200928,
818
+ "learning_rate": 9.804226795346411e-05,
819
+ "loss": 0.0421,
820
+ "step": 1350
821
+ },
822
+ {
823
+ "grad_norm": 0.5830236077308655,
824
+ "learning_rate": 9.799619014779503e-05,
825
+ "loss": 0.0496,
826
+ "step": 1360
827
+ },
828
+ {
829
+ "grad_norm": 0.3474409580230713,
830
+ "learning_rate": 9.794958746348013e-05,
831
+ "loss": 0.0521,
832
+ "step": 1370
833
+ },
834
+ {
835
+ "grad_norm": 0.34878355264663696,
836
+ "learning_rate": 9.790246041015896e-05,
837
+ "loss": 0.0434,
838
+ "step": 1380
839
+ },
840
+ {
841
+ "grad_norm": 0.24419234693050385,
842
+ "learning_rate": 9.785480950320538e-05,
843
+ "loss": 0.038,
844
+ "step": 1390
845
+ },
846
+ {
847
+ "grad_norm": 0.3212246894836426,
848
+ "learning_rate": 9.78066352637221e-05,
849
+ "loss": 0.0386,
850
+ "step": 1400
851
+ },
852
+ {
853
+ "grad_norm": 0.39363548159599304,
854
+ "learning_rate": 9.775793821853488e-05,
855
+ "loss": 0.0381,
856
+ "step": 1410
857
+ },
858
+ {
859
+ "grad_norm": 0.31847864389419556,
860
+ "learning_rate": 9.77087189001868e-05,
861
+ "loss": 0.0354,
862
+ "step": 1420
863
+ },
864
+ {
865
+ "grad_norm": 0.38333189487457275,
866
+ "learning_rate": 9.765897784693243e-05,
867
+ "loss": 0.037,
868
+ "step": 1430
869
+ },
870
+ {
871
+ "grad_norm": 0.4181508421897888,
872
+ "learning_rate": 9.760871560273197e-05,
873
+ "loss": 0.0343,
874
+ "step": 1440
875
+ },
876
+ {
877
+ "grad_norm": 0.29742392897605896,
878
+ "learning_rate": 9.755793271724526e-05,
879
+ "loss": 0.0397,
880
+ "step": 1450
881
+ },
882
+ {
883
+ "grad_norm": 0.42186254262924194,
884
+ "learning_rate": 9.750662974582584e-05,
885
+ "loss": 0.0405,
886
+ "step": 1460
887
+ },
888
+ {
889
+ "grad_norm": 0.2908092141151428,
890
+ "learning_rate": 9.745480724951473e-05,
891
+ "loss": 0.0383,
892
+ "step": 1470
893
+ },
894
+ {
895
+ "grad_norm": 0.3689759373664856,
896
+ "learning_rate": 9.740246579503447e-05,
897
+ "loss": 0.034,
898
+ "step": 1480
899
+ },
900
+ {
901
+ "grad_norm": 0.3124215602874756,
902
+ "learning_rate": 9.734960595478284e-05,
903
+ "loss": 0.0335,
904
+ "step": 1490
905
+ },
906
+ {
907
+ "grad_norm": 0.3377379775047302,
908
+ "learning_rate": 9.729622830682657e-05,
909
+ "loss": 0.0426,
910
+ "step": 1500
911
+ },
912
+ {
913
+ "grad_norm": 0.26830872893333435,
914
+ "learning_rate": 9.724233343489504e-05,
915
+ "loss": 0.0332,
916
+ "step": 1510
917
+ },
918
+ {
919
+ "grad_norm": 0.3746989965438843,
920
+ "learning_rate": 9.718792192837396e-05,
921
+ "loss": 0.0415,
922
+ "step": 1520
923
+ },
924
+ {
925
+ "grad_norm": 0.3922579288482666,
926
+ "learning_rate": 9.713299438229886e-05,
927
+ "loss": 0.0394,
928
+ "step": 1530
929
+ },
930
+ {
931
+ "grad_norm": 0.2619168758392334,
932
+ "learning_rate": 9.707755139734855e-05,
933
+ "loss": 0.0379,
934
+ "step": 1540
935
+ },
936
+ {
937
+ "grad_norm": 0.2860689163208008,
938
+ "learning_rate": 9.702159357983866e-05,
939
+ "loss": 0.0333,
940
+ "step": 1550
941
+ },
942
+ {
943
+ "grad_norm": 0.47750091552734375,
944
+ "learning_rate": 9.696512154171492e-05,
945
+ "loss": 0.0427,
946
+ "step": 1560
947
+ },
948
+ {
949
+ "grad_norm": 0.335984468460083,
950
+ "learning_rate": 9.690813590054645e-05,
951
+ "loss": 0.0415,
952
+ "step": 1570
953
+ },
954
+ {
955
+ "grad_norm": 0.4124501943588257,
956
+ "learning_rate": 9.685063727951914e-05,
957
+ "loss": 0.0414,
958
+ "step": 1580
959
+ },
960
+ {
961
+ "grad_norm": 0.3823433220386505,
962
+ "learning_rate": 9.679262630742865e-05,
963
+ "loss": 0.0377,
964
+ "step": 1590
965
+ },
966
+ {
967
+ "grad_norm": 0.361517071723938,
968
+ "learning_rate": 9.673410361867373e-05,
969
+ "loss": 0.0394,
970
+ "step": 1600
971
+ },
972
+ {
973
+ "grad_norm": 0.33740976452827454,
974
+ "learning_rate": 9.667506985324909e-05,
975
+ "loss": 0.0393,
976
+ "step": 1610
977
+ },
978
+ {
979
+ "grad_norm": 0.3104957342147827,
980
+ "learning_rate": 9.661552565673855e-05,
981
+ "loss": 0.0323,
982
+ "step": 1620
983
+ },
984
+ {
985
+ "grad_norm": 0.34105607867240906,
986
+ "learning_rate": 9.655547168030789e-05,
987
+ "loss": 0.034,
988
+ "step": 1630
989
+ },
990
+ {
991
+ "grad_norm": 0.33195221424102783,
992
+ "learning_rate": 9.649490858069777e-05,
993
+ "loss": 0.0364,
994
+ "step": 1640
995
+ },
996
+ {
997
+ "grad_norm": 0.5268750190734863,
998
+ "learning_rate": 9.643383702021658e-05,
999
+ "loss": 0.0418,
1000
+ "step": 1650
1001
+ },
1002
+ {
1003
+ "grad_norm": 0.32975631952285767,
1004
+ "learning_rate": 9.637225766673307e-05,
1005
+ "loss": 0.0369,
1006
+ "step": 1660
1007
+ },
1008
+ {
1009
+ "grad_norm": 0.25505009293556213,
1010
+ "learning_rate": 9.631017119366922e-05,
1011
+ "loss": 0.0392,
1012
+ "step": 1670
1013
+ },
1014
+ {
1015
+ "grad_norm": 0.4074243903160095,
1016
+ "learning_rate": 9.624757827999273e-05,
1017
+ "loss": 0.0355,
1018
+ "step": 1680
1019
+ },
1020
+ {
1021
+ "grad_norm": 0.37926310300827026,
1022
+ "learning_rate": 9.618447961020971e-05,
1023
+ "loss": 0.0465,
1024
+ "step": 1690
1025
+ },
1026
+ {
1027
+ "grad_norm": 0.3103801906108856,
1028
+ "learning_rate": 9.612087587435707e-05,
1029
+ "loss": 0.0381,
1030
+ "step": 1700
1031
+ },
1032
+ {
1033
+ "grad_norm": 0.27850306034088135,
1034
+ "learning_rate": 9.605676776799508e-05,
1035
+ "loss": 0.0345,
1036
+ "step": 1710
1037
+ },
1038
+ {
1039
+ "grad_norm": 0.41181617975234985,
1040
+ "learning_rate": 9.599215599219973e-05,
1041
+ "loss": 0.036,
1042
+ "step": 1720
1043
+ },
1044
+ {
1045
+ "grad_norm": 0.29745417833328247,
1046
+ "learning_rate": 9.592704125355505e-05,
1047
+ "loss": 0.0359,
1048
+ "step": 1730
1049
+ },
1050
+ {
1051
+ "grad_norm": 0.25739115476608276,
1052
+ "learning_rate": 9.586142426414538e-05,
1053
+ "loss": 0.0322,
1054
+ "step": 1740
1055
+ },
1056
+ {
1057
+ "grad_norm": 0.3614145517349243,
1058
+ "learning_rate": 9.57953057415476e-05,
1059
+ "loss": 0.0386,
1060
+ "step": 1750
1061
+ },
1062
+ {
1063
+ "grad_norm": 0.3207153379917145,
1064
+ "learning_rate": 9.572868640882328e-05,
1065
+ "loss": 0.0419,
1066
+ "step": 1760
1067
+ },
1068
+ {
1069
+ "grad_norm": 0.2426140457391739,
1070
+ "learning_rate": 9.56615669945108e-05,
1071
+ "loss": 0.0318,
1072
+ "step": 1770
1073
+ },
1074
+ {
1075
+ "grad_norm": 0.3300987184047699,
1076
+ "learning_rate": 9.55939482326173e-05,
1077
+ "loss": 0.0299,
1078
+ "step": 1780
1079
+ },
1080
+ {
1081
+ "grad_norm": 0.29514288902282715,
1082
+ "learning_rate": 9.552583086261069e-05,
1083
+ "loss": 0.0323,
1084
+ "step": 1790
1085
+ },
1086
+ {
1087
+ "grad_norm": 0.2129458338022232,
1088
+ "learning_rate": 9.545721562941168e-05,
1089
+ "loss": 0.0397,
1090
+ "step": 1800
1091
+ },
1092
+ {
1093
+ "grad_norm": 0.1863127201795578,
1094
+ "learning_rate": 9.538810328338543e-05,
1095
+ "loss": 0.0417,
1096
+ "step": 1810
1097
+ },
1098
+ {
1099
+ "grad_norm": 0.2662620544433594,
1100
+ "learning_rate": 9.531849458033349e-05,
1101
+ "loss": 0.0334,
1102
+ "step": 1820
1103
+ },
1104
+ {
1105
+ "grad_norm": 0.3173840343952179,
1106
+ "learning_rate": 9.524839028148547e-05,
1107
+ "loss": 0.0302,
1108
+ "step": 1830
1109
+ },
1110
+ {
1111
+ "grad_norm": 0.2131727635860443,
1112
+ "learning_rate": 9.517779115349077e-05,
1113
+ "loss": 0.0291,
1114
+ "step": 1840
1115
+ },
1116
+ {
1117
+ "grad_norm": 0.21706818044185638,
1118
+ "learning_rate": 9.510669796841014e-05,
1119
+ "loss": 0.0275,
1120
+ "step": 1850
1121
+ },
1122
+ {
1123
+ "grad_norm": 0.3188416361808777,
1124
+ "learning_rate": 9.503511150370727e-05,
1125
+ "loss": 0.0308,
1126
+ "step": 1860
1127
+ },
1128
+ {
1129
+ "grad_norm": 0.29916301369667053,
1130
+ "learning_rate": 9.496303254224024e-05,
1131
+ "loss": 0.0327,
1132
+ "step": 1870
1133
+ },
1134
+ {
1135
+ "grad_norm": 0.35951587557792664,
1136
+ "learning_rate": 9.489046187225306e-05,
1137
+ "loss": 0.0373,
1138
+ "step": 1880
1139
+ },
1140
+ {
1141
+ "grad_norm": 0.3958148956298828,
1142
+ "learning_rate": 9.481740028736692e-05,
1143
+ "loss": 0.0336,
1144
+ "step": 1890
1145
+ },
1146
+ {
1147
+ "grad_norm": 0.24587172269821167,
1148
+ "learning_rate": 9.474384858657164e-05,
1149
+ "loss": 0.0322,
1150
+ "step": 1900
1151
+ },
1152
+ {
1153
+ "grad_norm": 0.3606860339641571,
1154
+ "learning_rate": 9.466980757421679e-05,
1155
+ "loss": 0.041,
1156
+ "step": 1910
1157
+ },
1158
+ {
1159
+ "grad_norm": 0.36055126786231995,
1160
+ "learning_rate": 9.459527806000305e-05,
1161
+ "loss": 0.0343,
1162
+ "step": 1920
1163
+ },
1164
+ {
1165
+ "grad_norm": 0.26927992701530457,
1166
+ "learning_rate": 9.452026085897325e-05,
1167
+ "loss": 0.0293,
1168
+ "step": 1930
1169
+ },
1170
+ {
1171
+ "grad_norm": 0.31135281920433044,
1172
+ "learning_rate": 9.444475679150348e-05,
1173
+ "loss": 0.0349,
1174
+ "step": 1940
1175
+ },
1176
+ {
1177
+ "grad_norm": 0.36437752842903137,
1178
+ "learning_rate": 9.436876668329411e-05,
1179
+ "loss": 0.0317,
1180
+ "step": 1950
1181
+ },
1182
+ {
1183
+ "grad_norm": 0.30771905183792114,
1184
+ "learning_rate": 9.429229136536079e-05,
1185
+ "loss": 0.0307,
1186
+ "step": 1960
1187
+ },
1188
+ {
1189
+ "grad_norm": 0.3096272945404053,
1190
+ "learning_rate": 9.421533167402534e-05,
1191
+ "loss": 0.0265,
1192
+ "step": 1970
1193
+ },
1194
+ {
1195
+ "grad_norm": 0.31442776322364807,
1196
+ "learning_rate": 9.413788845090666e-05,
1197
+ "loss": 0.033,
1198
+ "step": 1980
1199
+ },
1200
+ {
1201
+ "grad_norm": 0.3305257558822632,
1202
+ "learning_rate": 9.405996254291136e-05,
1203
+ "loss": 0.0313,
1204
+ "step": 1990
1205
+ },
1206
+ {
1207
+ "grad_norm": 0.2100379914045334,
1208
+ "learning_rate": 9.398155480222474e-05,
1209
+ "loss": 0.0352,
1210
+ "step": 2000
1211
+ }
1212
+ ],
1213
+ "logging_steps": 10,
1214
+ "max_steps": 10000,
1215
+ "num_input_tokens_seen": 0,
1216
+ "num_train_epochs": 9223372036854775807,
1217
+ "save_steps": 1000,
1218
+ "stateful_callbacks": {
1219
+ "TrainerControl": {
1220
+ "args": {
1221
+ "should_epoch_stop": false,
1222
+ "should_evaluate": false,
1223
+ "should_log": false,
1224
+ "should_save": true,
1225
+ "should_training_stop": false
1226
+ },
1227
+ "attributes": {}
1228
+ }
1229
+ },
1230
+ "total_flos": 0.0,
1231
+ "train_batch_size": 36,
1232
+ "trial_name": null,
1233
+ "trial_params": null
1234
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4524ac143a9f2191e97d0434f3b2f8c78c2ac53e3116f1e79b416f15557f3fa0
3
+ size 5713
checkpoint-2000/wandb_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"project": "finetune-gr00t-n1d6", "run_id": "so100_finetune"}
experiment_cfg/conf.yaml ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ load_config_path: null
2
+ model:
3
+ model_type: Gr00tN1d6
4
+ model_dtype: bfloat16
5
+ model_name: nvidia/Eagle-Block2A-2B-v2
6
+ backbone_model_type: eagle
7
+ model_revision: null
8
+ tune_top_llm_layers: 4
9
+ backbone_embedding_dim: 2048
10
+ tune_llm: false
11
+ tune_visual: false
12
+ select_layer: 16
13
+ reproject_vision: false
14
+ use_flash_attention: true
15
+ load_bf16: false
16
+ collator_overwrite_image_inputs: false
17
+ eagle_collator: true
18
+ backbone_trainable_params_fp32: true
19
+ image_crop_size: null
20
+ image_target_size: null
21
+ shortest_image_edge: 256
22
+ crop_fraction: 0.95
23
+ random_rotation_angle: null
24
+ color_jitter_params:
25
+ brightness: 0.3
26
+ contrast: 0.4
27
+ saturation: 0.5
28
+ hue: 0.08
29
+ use_albumentations_transforms: true
30
+ formalize_language: true
31
+ apply_sincos_state_encoding: false
32
+ use_relative_action: true
33
+ max_state_dim: 29
34
+ max_action_dim: 29
35
+ action_horizon: 16
36
+ hidden_size: 1024
37
+ input_embedding_dim: 1536
38
+ add_pos_embed: true
39
+ attn_dropout: 0.2
40
+ use_vlln: true
41
+ max_seq_len: 1024
42
+ use_alternate_vl_dit: true
43
+ attend_text_every_n_blocks: 2
44
+ diffusion_model_cfg:
45
+ positional_embeddings: null
46
+ num_layers: 32
47
+ num_attention_heads: 32
48
+ attention_head_dim: 48
49
+ norm_type: ada_norm
50
+ dropout: 0.2
51
+ final_dropout: true
52
+ output_dim: 1024
53
+ interleave_self_attention: true
54
+ num_inference_timesteps: 4
55
+ noise_beta_alpha: 1.5
56
+ noise_beta_beta: 1.0
57
+ noise_s: 0.999
58
+ num_timestep_buckets: 1000
59
+ tune_projector: true
60
+ tune_diffusion_model: true
61
+ tune_vlln: true
62
+ state_dropout_prob: 0.0
63
+ state_additive_noise_scale: 0.0
64
+ max_num_embodiments: 32
65
+ data:
66
+ datasets:
67
+ - dataset_paths:
68
+ - /content/dataset/azazdeaz/record-test/azazdeaz/record-test/
69
+ embodiment_tag: new_embodiment
70
+ mix_ratio: 1.0
71
+ dataset_type: physical_embodiment
72
+ val_dataset_path: null
73
+ modality_configs:
74
+ new_embodiment:
75
+ video:
76
+ delta_indices:
77
+ - 0
78
+ modality_keys:
79
+ - front
80
+ - wrist
81
+ sin_cos_embedding_keys: null
82
+ mean_std_embedding_keys: null
83
+ action_configs: null
84
+ state:
85
+ delta_indices:
86
+ - 0
87
+ modality_keys:
88
+ - single_arm
89
+ - gripper
90
+ sin_cos_embedding_keys: null
91
+ mean_std_embedding_keys: null
92
+ action_configs: null
93
+ action:
94
+ delta_indices:
95
+ - 0
96
+ - 1
97
+ - 2
98
+ - 3
99
+ - 4
100
+ - 5
101
+ - 6
102
+ - 7
103
+ - 8
104
+ - 9
105
+ - 10
106
+ - 11
107
+ - 12
108
+ - 13
109
+ - 14
110
+ - 15
111
+ modality_keys:
112
+ - single_arm
113
+ - gripper
114
+ sin_cos_embedding_keys: null
115
+ mean_std_embedding_keys: null
116
+ action_configs:
117
+ - rep: RELATIVE
118
+ type: NON_EEF
119
+ format: DEFAULT
120
+ state_key: null
121
+ - rep: ABSOLUTE
122
+ type: NON_EEF
123
+ format: DEFAULT
124
+ state_key: null
125
+ language:
126
+ delta_indices:
127
+ - 0
128
+ modality_keys:
129
+ - annotation.human.task_description
130
+ sin_cos_embedding_keys: null
131
+ mean_std_embedding_keys: null
132
+ action_configs: null
133
+ download_cache: false
134
+ shard_size: 1024
135
+ episode_sampling_rate: 0.1
136
+ num_shards_per_epoch: 100000
137
+ override_pretraining_statistics: false
138
+ mode: single_turn
139
+ random_chop: 0.0
140
+ mock_dataset_mode: false
141
+ shuffle: true
142
+ seed: 42
143
+ multiprocessing_context: fork
144
+ allow_padding: false
145
+ subsample_ratio: 1.0
146
+ image_crop_size:
147
+ - 244
148
+ - 244
149
+ image_target_size:
150
+ - 224
151
+ - 224
152
+ video_backend: torchcodec
153
+ training:
154
+ output_dir: /content/so100_finetune
155
+ experiment_name: null
156
+ max_steps: 10000
157
+ global_batch_size: 36
158
+ batch_size: null
159
+ gradient_accumulation_steps: 1
160
+ learning_rate: 0.0001
161
+ lr_scheduler_type: cosine
162
+ weight_decay: 1.0e-05
163
+ warmup_ratio: 0.05
164
+ warmup_steps: 0
165
+ max_grad_norm: 1.0
166
+ optim: adamw_torch
167
+ start_from_checkpoint: nvidia/GR00T-N1.6-3B
168
+ tf32: true
169
+ fp16: false
170
+ bf16: true
171
+ eval_bf16: true
172
+ logging_steps: 10
173
+ save_steps: 1000
174
+ save_total_limit: 5
175
+ save_vl_model: false
176
+ upload_checkpoints: false
177
+ upload_every: 1000
178
+ upload_last_n_checkpoints: 5
179
+ max_concurrent_uploads: 2
180
+ eval_strategy: 'no'
181
+ eval_steps: 500
182
+ eval_set_split_ratio: 0.1
183
+ eval_batch_size: 2
184
+ save_best_eval_metric_name: ''
185
+ save_best_eval_metric_greater_is_better: true
186
+ deepspeed_stage: 2
187
+ gradient_checkpointing: false
188
+ transformers_trust_remote_code: true
189
+ transformers_local_files_only: false
190
+ transformers_cache_dir: null
191
+ transformers_access_token: null
192
+ use_ddp: false
193
+ ddp_bucket_cap_mb: 100
194
+ num_gpus: 1
195
+ dataloader_num_workers: 4
196
+ remove_unused_columns: false
197
+ use_wandb: true
198
+ wandb_project: finetune-gr00t-n1d6
199
+ enable_profiling: false
200
+ max_retries: 3
201
+ assert_loss_less_than: null
202
+ add_rl_callback: false
203
+ enable_open_loop_eval: false
204
+ open_loop_eval_traj_ids:
205
+ - 0
206
+ open_loop_eval_steps_per_traj: 100
207
+ open_loop_eval_plot_indices: null
208
+ max_steps: 10000
209
+ save_steps: 1000
experiment_cfg/config.yaml ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !!python/object:gr00t.configs.base_config.Config
2
+ data: !!python/object:gr00t.configs.data.data_config.DataConfig
3
+ allow_padding: false
4
+ datasets:
5
+ - !!python/object:gr00t.configs.data.data_config.SingleDatasetConfig
6
+ dataset_paths:
7
+ - /content/dataset/azazdeaz/record-test/azazdeaz/record-test/
8
+ dataset_type: physical_embodiment
9
+ embodiment_tag: new_embodiment
10
+ mix_ratio: 1.0
11
+ val_dataset_path: null
12
+ download_cache: false
13
+ episode_sampling_rate: 0.1
14
+ image_crop_size:
15
+ - 244
16
+ - 244
17
+ image_target_size:
18
+ - 224
19
+ - 224
20
+ mock_dataset_mode: false
21
+ modality_configs:
22
+ new_embodiment:
23
+ action: !!python/object:gr00t.data.types.ModalityConfig
24
+ action_configs:
25
+ - !!python/object:gr00t.data.types.ActionConfig
26
+ format: &id001 !!python/object/apply:gr00t.data.types.ActionFormat
27
+ - default
28
+ rep: !!python/object/apply:gr00t.data.types.ActionRepresentation
29
+ - relative
30
+ state_key: null
31
+ type: &id002 !!python/object/apply:gr00t.data.types.ActionType
32
+ - non_eef
33
+ - !!python/object:gr00t.data.types.ActionConfig
34
+ format: *id001
35
+ rep: !!python/object/apply:gr00t.data.types.ActionRepresentation
36
+ - absolute
37
+ state_key: null
38
+ type: *id002
39
+ delta_indices:
40
+ - 0
41
+ - 1
42
+ - 2
43
+ - 3
44
+ - 4
45
+ - 5
46
+ - 6
47
+ - 7
48
+ - 8
49
+ - 9
50
+ - 10
51
+ - 11
52
+ - 12
53
+ - 13
54
+ - 14
55
+ - 15
56
+ mean_std_embedding_keys: null
57
+ modality_keys:
58
+ - single_arm
59
+ - gripper
60
+ sin_cos_embedding_keys: null
61
+ language: !!python/object:gr00t.data.types.ModalityConfig
62
+ action_configs: null
63
+ delta_indices:
64
+ - 0
65
+ mean_std_embedding_keys: null
66
+ modality_keys:
67
+ - annotation.human.task_description
68
+ sin_cos_embedding_keys: null
69
+ state: !!python/object:gr00t.data.types.ModalityConfig
70
+ action_configs: null
71
+ delta_indices:
72
+ - 0
73
+ mean_std_embedding_keys: null
74
+ modality_keys:
75
+ - single_arm
76
+ - gripper
77
+ sin_cos_embedding_keys: null
78
+ video: !!python/object:gr00t.data.types.ModalityConfig
79
+ action_configs: null
80
+ delta_indices:
81
+ - 0
82
+ mean_std_embedding_keys: null
83
+ modality_keys:
84
+ - front
85
+ - wrist
86
+ sin_cos_embedding_keys: null
87
+ mode: single_turn
88
+ multiprocessing_context: fork
89
+ num_shards_per_epoch: 100000
90
+ override_pretraining_statistics: false
91
+ random_chop: 0.0
92
+ seed: 42
93
+ shard_size: 1024
94
+ shuffle: true
95
+ subsample_ratio: 1.0
96
+ video_backend: torchcodec
97
+ load_config_path: null
98
+ model: !!python/object:gr00t.configs.model.gr00t_n1d6.Gr00tN1d6Config
99
+ _attn_implementation_autoset: false
100
+ _attn_implementation_internal: null
101
+ _commit_hash: null
102
+ _name_or_path: ''
103
+ add_cross_attention: false
104
+ architectures: null
105
+ backbone_model_type: eagle
106
+ backbone_trainable_params_fp32: true
107
+ bad_words_ids: null
108
+ begin_suppress_tokens: null
109
+ bos_token_id: null
110
+ chunk_size_feed_forward: 0
111
+ color_jitter_params:
112
+ brightness: 0.3
113
+ contrast: 0.4
114
+ hue: 0.08
115
+ saturation: 0.5
116
+ cross_attention_hidden_size: null
117
+ decoder_start_token_id: null
118
+ diffusion_model_cfg:
119
+ attention_head_dim: 48
120
+ dropout: 0.2
121
+ final_dropout: true
122
+ interleave_self_attention: true
123
+ norm_type: ada_norm
124
+ num_attention_heads: 32
125
+ num_layers: 32
126
+ output_dim: 1024
127
+ positional_embeddings: null
128
+ diversity_penalty: 0.0
129
+ do_sample: false
130
+ eagle_collator: true
131
+ early_stopping: false
132
+ encoder_no_repeat_ngram_size: 0
133
+ eos_token_id: null
134
+ exponential_decay_length_penalty: null
135
+ finetuning_task: null
136
+ forced_bos_token_id: null
137
+ forced_eos_token_id: null
138
+ id2label:
139
+ 0: LABEL_0
140
+ 1: LABEL_1
141
+ is_decoder: false
142
+ is_encoder_decoder: false
143
+ label2id:
144
+ LABEL_0: 0
145
+ LABEL_1: 1
146
+ length_penalty: 1.0
147
+ load_bf16: false
148
+ max_length: 20
149
+ min_length: 0
150
+ model_name: nvidia/Eagle-Block2A-2B-v2
151
+ no_repeat_ngram_size: 0
152
+ num_beam_groups: 1
153
+ num_beams: 1
154
+ num_return_sequences: 1
155
+ output_attentions: false
156
+ output_hidden_states: false
157
+ output_scores: false
158
+ pad_token_id: null
159
+ prefix: null
160
+ problem_type: null
161
+ pruned_heads: {}
162
+ random_rotation_angle: null
163
+ remove_invalid_values: false
164
+ repetition_penalty: 1.0
165
+ reproject_vision: false
166
+ return_dict: true
167
+ return_dict_in_generate: false
168
+ sep_token_id: null
169
+ state_dropout_prob: 0.0
170
+ suppress_tokens: null
171
+ task_specific_params: null
172
+ temperature: 1.0
173
+ tf_legacy_loss: false
174
+ tie_encoder_decoder: false
175
+ tie_word_embeddings: true
176
+ tokenizer_class: null
177
+ top_k: 50
178
+ top_p: 1.0
179
+ torch_dtype: null
180
+ torchscript: false
181
+ transformers_version: null
182
+ tune_diffusion_model: true
183
+ tune_llm: false
184
+ tune_projector: true
185
+ tune_visual: false
186
+ typical_p: 1.0
187
+ use_bfloat16: false
188
+ use_relative_action: true
189
+ training: !!python/object:gr00t.configs.training.training_config.TrainingConfig
190
+ add_rl_callback: false
191
+ assert_loss_less_than: null
192
+ batch_size: null
193
+ bf16: true
194
+ dataloader_num_workers: 4
195
+ ddp_bucket_cap_mb: 100
196
+ deepspeed_stage: 2
197
+ enable_open_loop_eval: false
198
+ enable_profiling: false
199
+ eval_batch_size: 2
200
+ eval_bf16: true
201
+ eval_set_split_ratio: 0.1
202
+ eval_steps: 500
203
+ eval_strategy: 'no'
204
+ experiment_name: null
205
+ fp16: false
206
+ global_batch_size: 36
207
+ gradient_accumulation_steps: 1
208
+ gradient_checkpointing: false
209
+ learning_rate: 0.0001
210
+ logging_steps: 10
211
+ lr_scheduler_type: cosine
212
+ max_concurrent_uploads: 2
213
+ max_grad_norm: 1.0
214
+ max_retries: 3
215
+ max_steps: 10000
216
+ num_gpus: 1
217
+ open_loop_eval_plot_indices: null
218
+ open_loop_eval_steps_per_traj: 100
219
+ open_loop_eval_traj_ids:
220
+ - 0
221
+ optim: adamw_torch
222
+ output_dir: /content/so100_finetune
223
+ remove_unused_columns: false
224
+ save_best_eval_metric_greater_is_better: true
225
+ save_best_eval_metric_name: ''
226
+ save_steps: 1000
227
+ save_total_limit: 5
228
+ save_vl_model: false
229
+ start_from_checkpoint: nvidia/GR00T-N1.6-3B
230
+ tf32: true
231
+ transformers_access_token: null
232
+ transformers_cache_dir: null
233
+ transformers_local_files_only: false
234
+ transformers_trust_remote_code: true
235
+ upload_checkpoints: false
236
+ upload_every: 1000
237
+ upload_last_n_checkpoints: 5
238
+ use_ddp: false
239
+ use_wandb: true
240
+ wandb_project: finetune-gr00t-n1d6
241
+ warmup_ratio: 0.05
242
+ warmup_steps: 0
243
+ weight_decay: 1.0e-05
experiment_cfg/dataset_statistics.json ADDED
@@ -0,0 +1,824 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "state": {
4
+ "single_arm": {
5
+ "min": [
6
+ -60.105262756347656,
7
+ -99.31827545166016,
8
+ -43.24324417114258,
9
+ 44.688323974609375,
10
+ -80.68241119384766
11
+ ],
12
+ "max": [
13
+ 34.31578826904297,
14
+ 45.80315399169922,
15
+ 99.62721252441406,
16
+ 97.62950134277344,
17
+ -4.8293962478637695
18
+ ],
19
+ "mean": [
20
+ -3.5325422842834118,
21
+ -39.55833784368612,
22
+ 47.61425589641663,
23
+ 68.3154743394685,
24
+ -46.70357059936142
25
+ ],
26
+ "std": [
27
+ 27.459571708144434,
28
+ 41.074676874488844,
29
+ 34.075122244859095,
30
+ 13.351843152336091,
31
+ 19.483248111140814
32
+ ],
33
+ "q01": [
34
+ -43.89320641903556,
35
+ -99.31827545166016,
36
+ 2.96704117550838,
37
+ 52.128458407208086,
38
+ -64.9047852897644
39
+ ],
40
+ "q99": [
41
+ 23.91888349814585,
42
+ 10.271718478515513,
43
+ 99.48718710059721,
44
+ 91.53274853632244,
45
+ -26.5701696578577
46
+ ]
47
+ },
48
+ "gripper": {
49
+ "min": [
50
+ 0.9803921580314636
51
+ ],
52
+ "max": [
53
+ 35.9943962097168
54
+ ],
55
+ "mean": [
56
+ 20.298597213926165
57
+ ],
58
+ "std": [
59
+ 9.780378321306017
60
+ ],
61
+ "q01": [
62
+ 6.1409783557640525
63
+ ],
64
+ "q99": [
65
+ 29.83243577963609
66
+ ]
67
+ }
68
+ },
69
+ "action": {
70
+ "single_arm": {
71
+ "min": [
72
+ -60.843040466308594,
73
+ -100.0,
74
+ -47.45454406738281,
75
+ 44.675209045410156,
76
+ -81.04986572265625
77
+ ],
78
+ "max": [
79
+ 34.99722671508789,
80
+ 45.694801330566406,
81
+ 100.0,
82
+ 97.96730041503906,
83
+ -4.566929340362549
84
+ ],
85
+ "mean": [
86
+ -3.4051861822555503,
87
+ -40.21667093678776,
88
+ 46.219495035627304,
89
+ 68.11321517887163,
90
+ -46.72253879798044
91
+ ],
92
+ "std": [
93
+ 27.604988373472494,
94
+ 40.826917813818575,
95
+ 34.82453909741334,
96
+ 13.439408862929378,
97
+ 19.518525610689558
98
+ ],
99
+ "q01": [
100
+ -44.29914832538184,
101
+ -99.9431293020161,
102
+ -0.6294070457600729,
103
+ 51.77255040025036,
104
+ -65.3110059408939
105
+ ],
106
+ "q99": [
107
+ 24.517798066878782,
108
+ 10.287138016986633,
109
+ 99.9985524069893,
110
+ 91.79085298874445,
111
+ -26.382156973884847
112
+ ]
113
+ },
114
+ "gripper": {
115
+ "min": [
116
+ 0.25553661584854126
117
+ ],
118
+ "max": [
119
+ 36.28620147705078
120
+ ],
121
+ "mean": [
122
+ 19.60203928355869
123
+ ],
124
+ "std": [
125
+ 10.896771214917207
126
+ ],
127
+ "q01": [
128
+ 2.708348265017052
129
+ ],
130
+ "q99": [
131
+ 30.222431877944597
132
+ ]
133
+ }
134
+ },
135
+ "relative_action": {
136
+ "single_arm": {
137
+ "min": [
138
+ [
139
+ -9.055549621582031,
140
+ -13.95650863647461,
141
+ -14.800384521484375,
142
+ -13.61447525024414,
143
+ -7.874013900756836
144
+ ],
145
+ [
146
+ -11.160816192626953,
147
+ -16.68364715576172,
148
+ -17.25493621826172,
149
+ -16.177448272705078,
150
+ -9.501310348510742
151
+ ],
152
+ [
153
+ -13.476604461669922,
154
+ -19.667449951171875,
155
+ -19.89129638671875,
156
+ -18.372356414794922,
157
+ -11.23359489440918
158
+ ],
159
+ [
160
+ -15.26607894897461,
161
+ -22.73650360107422,
162
+ -22.314407348632812,
163
+ -20.567264556884766,
164
+ -12.755905151367188
165
+ ],
166
+ [
167
+ -16.950286865234375,
168
+ -25.550708770751953,
169
+ -24.457931518554688,
170
+ -22.246448516845703,
171
+ -14.120733261108398
172
+ ],
173
+ [
174
+ -18.84502410888672,
175
+ -28.619762420654297,
176
+ -26.764127731323242,
177
+ -23.572124481201172,
178
+ -15.538057327270508
179
+ ],
180
+ [
181
+ -20.50891876220703,
182
+ -31.262561798095703,
183
+ -29.18724250793457,
184
+ -24.625682830810547,
185
+ -16.79789924621582
186
+ ],
187
+ [
188
+ -21.950958251953125,
189
+ -33.90535354614258,
190
+ -31.703550338745117,
191
+ -25.421085357666016,
192
+ -17.9002628326416
193
+ ],
194
+ [
195
+ -23.351024627685547,
196
+ -36.545902252197266,
197
+ -33.976280212402344,
198
+ -25.947864532470703,
199
+ -18.89763641357422
200
+ ],
201
+ [
202
+ -24.79306411743164,
203
+ -39.18869400024414,
204
+ -36.27018737792969,
205
+ -26.389755249023438,
206
+ -19.737533569335938
207
+ ],
208
+ [
209
+ -26.056222915649414,
210
+ -41.40523910522461,
211
+ -38.54291534423828,
212
+ -26.566513061523438,
213
+ -20.57742691040039
214
+ ],
215
+ [
216
+ -27.2141170501709,
217
+ -43.45127487182617,
218
+ -40.633819580078125,
219
+ -26.654312133789062,
220
+ -21.732280731201172
221
+ ],
222
+ [
223
+ -28.372011184692383,
224
+ -45.497310638427734,
225
+ -42.90654754638672,
226
+ -26.654312133789062,
227
+ -22.834644317626953
228
+ ],
229
+ [
230
+ -29.5299072265625,
231
+ -47.542476654052734,
232
+ -44.9827995300293,
233
+ -26.654312133789062,
234
+ -23.674541473388672
235
+ ],
236
+ [
237
+ -30.687801361083984,
238
+ -49.50242614746094,
239
+ -47.19833755493164,
240
+ -26.654312133789062,
241
+ -24.304462432861328
242
+ ],
243
+ [
244
+ -31.84569549560547,
245
+ -51.29353713989258,
246
+ -49.528255462646484,
247
+ -26.654312133789062,
248
+ -25.406824111938477
249
+ ]
250
+ ],
251
+ "max": [
252
+ [
253
+ 11.800273895263672,
254
+ 13.390182495117188,
255
+ 11.2484130859375,
256
+ 12.953540802001953,
257
+ 8.97637939453125
258
+ ],
259
+ [
260
+ 14.462503433227539,
261
+ 16.288734436035156,
262
+ 13.857921600341797,
263
+ 15.763023376464844,
264
+ 10.813648223876953
265
+ ],
266
+ [
267
+ 16.902881622314453,
268
+ 19.443038940429688,
269
+ 16.2484130859375,
270
+ 18.39691162109375,
271
+ 12.703411102294922
272
+ ],
273
+ [
274
+ 19.40987205505371,
275
+ 22.16992950439453,
276
+ 18.857921600341797,
277
+ 20.783126831054688,
278
+ 14.645668029785156
279
+ ],
280
+ [
281
+ 21.850250244140625,
282
+ 24.812728881835938,
283
+ 21.221553802490234,
284
+ 22.815826416015625,
285
+ 16.482940673828125
286
+ ],
287
+ [
288
+ 24.290626525878906,
289
+ 27.455524444580078,
290
+ 23.551467895507812,
291
+ 24.671768188476562,
292
+ 18.530181884765625
293
+ ],
294
+ [
295
+ 26.50115203857422,
296
+ 30.011974334716797,
297
+ 25.857921600341797,
298
+ 26.515491485595703,
299
+ 20.262466430664062
300
+ ],
301
+ [
302
+ 28.71167755126953,
303
+ 32.31278610229492,
304
+ 28.187835693359375,
305
+ 27.920230865478516,
306
+ 22.099735260009766
307
+ ],
308
+ [
309
+ 31.132732391357422,
310
+ 34.52932357788086,
311
+ 30.585193634033203,
312
+ 29.34360122680664,
313
+ 23.832019805908203
314
+ ],
315
+ [
316
+ 33.24032974243164,
317
+ 36.490108489990234,
318
+ 32.91510772705078,
319
+ 30.757648468017578,
320
+ 25.56430435180664
321
+ ],
322
+ [
323
+ 35.237003326416016,
324
+ 38.621395111083984,
325
+ 35.187835693359375,
326
+ 32.26008224487305,
327
+ 27.296588897705078
328
+ ],
329
+ [
330
+ 37.0264778137207,
331
+ 40.411678314208984,
332
+ 37.27873992919922,
333
+ 33.66482162475586,
334
+ 28.87139129638672
335
+ ],
336
+ [
337
+ 38.815948486328125,
338
+ 42.372459411621094,
339
+ 39.32906723022461,
340
+ 34.99049758911133,
341
+ 30.498687744140625
342
+ ],
343
+ [
344
+ 40.147064208984375,
345
+ 43.99224090576172,
346
+ 41.238162994384766,
347
+ 35.6975212097168,
348
+ 31.9160099029541
349
+ ],
350
+ [
351
+ 41.552791595458984,
352
+ 45.61201858520508,
353
+ 43.13301467895508,
354
+ 36.0510368347168,
355
+ 33.33333206176758
356
+ ],
357
+ [
358
+ 43.0264778137207,
359
+ 47.23110580444336,
360
+ 45.22392654418945,
361
+ 36.402225494384766,
362
+ 34.75065612792969
363
+ ]
364
+ ],
365
+ "mean": [
366
+ [
367
+ 0.11750347912311554,
368
+ -0.6653266549110413,
369
+ -1.4583468437194824,
370
+ -0.2053278237581253,
371
+ -0.01863335259258747
372
+ ],
373
+ [
374
+ 0.12236570566892624,
375
+ -0.6657301187515259,
376
+ -1.4583476781845093,
377
+ -0.20791052281856537,
378
+ -0.02185939997434616
379
+ ],
380
+ [
381
+ 0.12741880118846893,
382
+ -0.6660965085029602,
383
+ -1.4583467245101929,
384
+ -0.21049359440803528,
385
+ -0.025085171684622765
386
+ ],
387
+ [
388
+ 0.13261470198631287,
389
+ -0.6664630174636841,
390
+ -1.4583468437194824,
391
+ -0.21307627856731415,
392
+ -0.02831093594431877
393
+ ],
394
+ [
395
+ 0.13785837590694427,
396
+ -0.6668296456336975,
397
+ -1.4583486318588257,
398
+ -0.21565881371498108,
399
+ -0.03153714910149574
400
+ ],
401
+ [
402
+ 0.14319713413715363,
403
+ -0.6671591401100159,
404
+ -1.4583512544631958,
405
+ -0.21824198961257935,
406
+ -0.03474012389779091
407
+ ],
408
+ [
409
+ 0.14853604137897491,
410
+ -0.6674894094467163,
411
+ -1.458350419998169,
412
+ -0.22082529962062836,
413
+ -0.03794342279434204
414
+ ],
415
+ [
416
+ 0.15382729470729828,
417
+ -0.6678190231323242,
418
+ -1.4583524465560913,
419
+ -0.22340813279151917,
420
+ -0.041146621108055115
421
+ ],
422
+ [
423
+ 0.15907101333141327,
424
+ -0.6681490540504456,
425
+ -1.458353042602539,
426
+ -0.2259913831949234,
427
+ -0.04434990882873535
428
+ ],
429
+ [
430
+ 0.16426703333854675,
431
+ -0.668441653251648,
432
+ -1.458351492881775,
433
+ -0.22857406735420227,
434
+ -0.04755344241857529
435
+ ],
436
+ [
437
+ 0.1694149672985077,
438
+ -0.668734610080719,
439
+ -1.4583535194396973,
440
+ -0.23115694522857666,
441
+ -0.05075618624687195
442
+ ],
443
+ [
444
+ 0.17465919256210327,
445
+ -0.6690278649330139,
446
+ -1.458351731300354,
447
+ -0.2337394654750824,
448
+ -0.05395958200097084
449
+ ],
450
+ [
451
+ 0.17990276217460632,
452
+ -0.6693212985992432,
453
+ -1.4583524465560913,
454
+ -0.23632188141345978,
455
+ -0.05716199427843094
456
+ ],
457
+ [
458
+ 0.18505056202411652,
459
+ -0.6696141958236694,
460
+ -1.4583513736724854,
461
+ -0.2389044612646103,
462
+ -0.060365449637174606
463
+ ],
464
+ [
465
+ 0.1901988983154297,
466
+ -0.6699072122573853,
467
+ -1.4583531618118286,
468
+ -0.24148696660995483,
469
+ -0.06356889009475708
470
+ ],
471
+ [
472
+ 0.19529956579208374,
473
+ -0.6702005863189697,
474
+ -1.4583516120910645,
475
+ -0.24407006800174713,
476
+ -0.0667722150683403
477
+ ]
478
+ ],
479
+ "std": [
480
+ [
481
+ 2.756209135055542,
482
+ 4.048781394958496,
483
+ 4.834221839904785,
484
+ 2.4045844078063965,
485
+ 1.831181526184082
486
+ ],
487
+ [
488
+ 3.375519037246704,
489
+ 4.940067768096924,
490
+ 5.704192638397217,
491
+ 2.936358690261841,
492
+ 2.2514567375183105
493
+ ],
494
+ [
495
+ 3.9847497940063477,
496
+ 5.8236775398254395,
497
+ 6.570866107940674,
498
+ 3.443351984024048,
499
+ 2.663818359375
500
+ ],
501
+ [
502
+ 4.582452774047852,
503
+ 6.696892738342285,
504
+ 7.430098056793213,
505
+ 3.9259438514709473,
506
+ 3.067777633666992
507
+ ],
508
+ [
509
+ 5.167651653289795,
510
+ 7.5579352378845215,
511
+ 8.279448509216309,
512
+ 4.385776996612549,
513
+ 3.462792158126831
514
+ ],
515
+ [
516
+ 5.740141868591309,
517
+ 8.406184196472168,
518
+ 9.11788558959961,
519
+ 4.825246810913086,
520
+ 3.848816156387329
521
+ ],
522
+ [
523
+ 6.2996673583984375,
524
+ 9.240605354309082,
525
+ 9.944042205810547,
526
+ 5.246663570404053,
527
+ 4.225630283355713
528
+ ],
529
+ [
530
+ 6.846652507781982,
531
+ 10.06134033203125,
532
+ 10.75770092010498,
533
+ 5.652442932128906,
534
+ 4.593677043914795
535
+ ],
536
+ [
537
+ 7.3811469078063965,
538
+ 10.86816120147705,
539
+ 11.558343887329102,
540
+ 6.044579982757568,
541
+ 4.9531779289245605
542
+ ],
543
+ [
544
+ 7.903446197509766,
545
+ 11.661355972290039,
546
+ 12.34595012664795,
547
+ 6.424712657928467,
548
+ 5.304342269897461
549
+ ],
550
+ [
551
+ 8.414060592651367,
552
+ 12.44128131866455,
553
+ 13.120593070983887,
554
+ 6.794016361236572,
555
+ 5.647256851196289
556
+ ],
557
+ [
558
+ 8.913477897644043,
559
+ 13.208165168762207,
560
+ 13.88218879699707,
561
+ 7.153115272521973,
562
+ 5.98213529586792
563
+ ],
564
+ [
565
+ 9.402256965637207,
566
+ 13.962782859802246,
567
+ 14.631062507629395,
568
+ 7.5023603439331055,
569
+ 6.309183597564697
570
+ ],
571
+ [
572
+ 9.880831718444824,
573
+ 14.70537281036377,
574
+ 15.367119789123535,
575
+ 7.841666221618652,
576
+ 6.628418445587158
577
+ ],
578
+ [
579
+ 10.349787712097168,
580
+ 15.43662166595459,
581
+ 16.090578079223633,
582
+ 8.171104431152344,
583
+ 6.940072536468506
584
+ ],
585
+ [
586
+ 10.809670448303223,
587
+ 16.15700340270996,
588
+ 16.801616668701172,
589
+ 8.490680694580078,
590
+ 7.244339466094971
591
+ ]
592
+ ],
593
+ "q01": [
594
+ [
595
+ -7.149912223815918,
596
+ -12.172602081298828,
597
+ -13.191639633178712,
598
+ -7.083407516479492,
599
+ -5.301837921142578
600
+ ],
601
+ [
602
+ -8.969949054718018,
603
+ -14.76386520385742,
604
+ -15.673493194580079,
605
+ -8.597449951171875,
606
+ -6.4955373382568355
607
+ ],
608
+ [
609
+ -10.634601535797119,
610
+ -17.415445404052733,
611
+ -18.04468208312988,
612
+ -9.786780090332032,
613
+ -7.755381164550781
614
+ ],
615
+ [
616
+ -12.332097396850585,
617
+ -19.81734550476074,
618
+ -20.49539176940918,
619
+ -10.772034606933593,
620
+ -8.81890007019043
621
+ ],
622
+ [
623
+ -14.054951286315918,
624
+ -22.659150772094726,
625
+ -22.971768417358398,
626
+ -11.810748291015624,
627
+ -10.076640815734862
628
+ ],
629
+ [
630
+ -15.467545299530029,
631
+ -24.854450073242187,
632
+ -25.42631248474121,
633
+ -12.89973159790039,
634
+ -11.391075839996338
635
+ ],
636
+ [
637
+ -16.85467258453369,
638
+ -27.482084159851073,
639
+ -27.82916431427002,
640
+ -13.832246704101562,
641
+ -12.532282905578613
642
+ ],
643
+ [
644
+ -18.258717765808104,
645
+ -29.645261459350586,
646
+ -29.891822814941406,
647
+ -14.627758026123047,
648
+ -13.543306350708008
649
+ ],
650
+ [
651
+ -19.36220642089844,
652
+ -31.862525711059572,
653
+ -32.179530944824215,
654
+ -15.569841918945311,
655
+ -14.78740104675293
656
+ ],
657
+ [
658
+ -20.866409225463865,
659
+ -34.33618782043457,
660
+ -34.25658073425293,
661
+ -16.842924346923827,
662
+ -15.576902465820313
663
+ ],
664
+ [
665
+ -21.95573440551758,
666
+ -36.528687438964845,
667
+ -36.38519187927246,
668
+ -17.423948669433592,
669
+ -16.82309829711914
670
+ ],
671
+ [
672
+ -23.461913070678712,
673
+ -38.73653938293457,
674
+ -38.30317817687988,
675
+ -17.647796630859375,
676
+ -17.742782592773438
677
+ ],
678
+ [
679
+ -24.53435989379883,
680
+ -40.874423828124996,
681
+ -40.44069320678711,
682
+ -18.515239715576172,
683
+ -18.879788017272947
684
+ ],
685
+ [
686
+ -25.933171768188476,
687
+ -43.23724739074707,
688
+ -42.29842636108398,
689
+ -19.954179382324217,
690
+ -19.801573486328124
691
+ ],
692
+ [
693
+ -27.245162963867188,
694
+ -45.308877029418944,
695
+ -43.92444366455078,
696
+ -20.61133575439453,
697
+ -20.9133874130249
698
+ ],
699
+ [
700
+ -28.540471458435057,
701
+ -47.20507698059082,
702
+ -45.95519866943359,
703
+ -20.804307861328123,
704
+ -21.69133777618408
705
+ ]
706
+ ],
707
+ "q99": [
708
+ [
709
+ 8.69469722747799,
710
+ 10.299373474121074,
711
+ 9.785606613159175,
712
+ 9.179881286621086,
713
+ 6.271916503906227
714
+ ],
715
+ [
716
+ 10.615129890441846,
717
+ 12.666508712768518,
718
+ 12.091986007690421,
719
+ 11.210918426513642,
720
+ 7.833070821762051
721
+ ],
722
+ [
723
+ 12.4686942863464,
724
+ 14.899181365966779,
725
+ 14.446693725585934,
726
+ 13.182954406738268,
727
+ 9.380576934814396
728
+ ],
729
+ [
730
+ 14.271173782348615,
731
+ 17.33157432556147,
732
+ 16.80153594970703,
733
+ 15.230424041748023,
734
+ 10.432546234130848
735
+ ],
736
+ [
737
+ 16.08265884399413,
738
+ 19.270514526367187,
739
+ 19.092461013793933,
740
+ 17.081269989013663,
741
+ 12.112335987091052
742
+ ],
743
+ [
744
+ 18.145391998291,
745
+ 21.44050186157223,
746
+ 21.280475997924803,
747
+ 18.68309165954589,
748
+ 13.620999450683572
749
+ ],
750
+ [
751
+ 19.996333007812442,
752
+ 23.52975692749016,
753
+ 23.596975326538082,
754
+ 20.103778152465818,
755
+ 14.602626724243084
756
+ ],
757
+ [
758
+ 21.60820228576658,
759
+ 25.1919183349609,
760
+ 25.76568862915039,
761
+ 21.541843643188393,
762
+ 16.10183769226073
763
+ ],
764
+ [
765
+ 23.362480049133293,
766
+ 27.08537643432608,
767
+ 28.15810386657714,
768
+ 22.8827821350097,
769
+ 17.433070678710767
770
+ ],
771
+ [
772
+ 25.225577430725092,
773
+ 28.52370849609371,
774
+ 30.372146911621076,
775
+ 23.82381240844726,
776
+ 18.712861175537086
777
+ ],
778
+ [
779
+ 26.625544967651358,
780
+ 30.16027656555174,
781
+ 32.49975234985349,
782
+ 24.50854393005371,
783
+ 19.443568878173714
784
+ ],
785
+ [
786
+ 28.116129837036127,
787
+ 31.295492095947264,
788
+ 34.53191467285156,
789
+ 25.490360641479374,
790
+ 20.561679687499943
791
+ ],
792
+ [
793
+ 29.457272415161103,
794
+ 32.63257324218748,
795
+ 36.42262039184569,
796
+ 26.227766571044913,
797
+ 21.088712310791003
798
+ ],
799
+ [
800
+ 30.8263483428955,
801
+ 33.570856246948225,
802
+ 38.26254928588863,
803
+ 26.711691284179675,
804
+ 22.396849746704
805
+ ],
806
+ [
807
+ 32.008242797851516,
808
+ 34.67565246582029,
809
+ 40.05519142150879,
810
+ 27.399809188842735,
811
+ 22.793699951171842
812
+ ],
813
+ [
814
+ 33.28292778015133,
815
+ 35.806464462280275,
816
+ 41.85474884033201,
817
+ 27.812029418945265,
818
+ 23.47191642761218
819
+ ]
820
+ ]
821
+ }
822
+ }
823
+ }
824
+ }
experiment_cfg/final_model_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "Gr00tN1d6",
3
+ "model_dtype": "bfloat16",
4
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
5
+ "backbone_model_type": "eagle",
6
+ "model_revision": null,
7
+ "tune_top_llm_layers": 4,
8
+ "backbone_embedding_dim": 2048,
9
+ "tune_llm": false,
10
+ "tune_visual": false,
11
+ "select_layer": 16,
12
+ "reproject_vision": false,
13
+ "use_flash_attention": true,
14
+ "load_bf16": true,
15
+ "collator_overwrite_image_inputs": false,
16
+ "eagle_collator": true,
17
+ "backbone_trainable_params_fp32": true,
18
+ "apply_sincos_state_encoding": true,
19
+ "use_relative_action": true,
20
+ "max_state_dim": 128,
21
+ "max_action_dim": 128,
22
+ "action_horizon": 50,
23
+ "hidden_size": 1024,
24
+ "input_embedding_dim": 1536,
25
+ "add_pos_embed": true,
26
+ "attn_dropout": 0.2,
27
+ "use_vlln": true,
28
+ "max_seq_len": 1024,
29
+ "use_alternate_vl_dit": true,
30
+ "attend_text_every_n_blocks": 2,
31
+ "diffusion_model_cfg": {
32
+ "attention_head_dim": 48,
33
+ "dropout": 0.2,
34
+ "final_dropout": true,
35
+ "interleave_self_attention": true,
36
+ "norm_type": "ada_norm",
37
+ "num_attention_heads": 32,
38
+ "num_layers": 32,
39
+ "output_dim": 1024,
40
+ "positional_embeddings": null
41
+ },
42
+ "num_inference_timesteps": 4,
43
+ "noise_beta_alpha": 1.5,
44
+ "noise_beta_beta": 1.0,
45
+ "noise_s": 0.999,
46
+ "num_timestep_buckets": 1000,
47
+ "tune_projector": true,
48
+ "tune_diffusion_model": true,
49
+ "tune_vlln": true,
50
+ "state_dropout_prob": 0.0,
51
+ "state_additive_noise_scale": 0.0,
52
+ "max_num_embodiments": 32
53
+ }
experiment_cfg/final_processor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
processor/embodiment_id.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "robocasa_panda_omron": 13,
3
+ "gr1": 20,
4
+ "behavior_r1_pro": 24,
5
+ "unitree_g1": 8,
6
+ "oxe_google": 0,
7
+ "oxe_widowx": 1,
8
+ "libero_panda": 2,
9
+ "new_embodiment": 10
10
+ }
processor/processor_config.json ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processor_class": "Gr00tN1d6Processor",
3
+ "processor_kwargs": {
4
+ "modality_configs": {
5
+ "behavior_r1_pro": {
6
+ "video": {
7
+ "delta_indices": [
8
+ 0
9
+ ],
10
+ "modality_keys": [
11
+ "observation.images.rgb.head_256_256",
12
+ "observation.images.rgb.left_wrist_256_256",
13
+ "observation.images.rgb.right_wrist_256_256"
14
+ ],
15
+ "sin_cos_embedding_keys": null,
16
+ "mean_std_embedding_keys": null,
17
+ "action_configs": null
18
+ },
19
+ "state": {
20
+ "delta_indices": [
21
+ 0
22
+ ],
23
+ "modality_keys": [
24
+ "robot_pos",
25
+ "robot_ori_cos",
26
+ "robot_ori_sin",
27
+ "robot_2d_ori",
28
+ "robot_2d_ori_cos",
29
+ "robot_2d_ori_sin",
30
+ "robot_lin_vel",
31
+ "robot_ang_vel",
32
+ "arm_left_qpos",
33
+ "arm_left_qpos_sin",
34
+ "arm_left_qpos_cos",
35
+ "eef_left_pos",
36
+ "eef_left_quat",
37
+ "gripper_left_qpos",
38
+ "arm_right_qpos",
39
+ "arm_right_qpos_sin",
40
+ "arm_right_qpos_cos",
41
+ "eef_right_pos",
42
+ "eef_right_quat",
43
+ "gripper_right_qpos",
44
+ "trunk_qpos"
45
+ ],
46
+ "sin_cos_embedding_keys": null,
47
+ "mean_std_embedding_keys": null,
48
+ "action_configs": null
49
+ },
50
+ "action": {
51
+ "delta_indices": [
52
+ 0,
53
+ 1,
54
+ 2,
55
+ 3,
56
+ 4,
57
+ 5,
58
+ 6,
59
+ 7,
60
+ 8,
61
+ 9,
62
+ 10,
63
+ 11,
64
+ 12,
65
+ 13,
66
+ 14,
67
+ 15,
68
+ 16,
69
+ 17,
70
+ 18,
71
+ 19,
72
+ 20,
73
+ 21,
74
+ 22,
75
+ 23,
76
+ 24,
77
+ 25,
78
+ 26,
79
+ 27,
80
+ 28,
81
+ 29,
82
+ 30,
83
+ 31
84
+ ],
85
+ "modality_keys": [
86
+ "base",
87
+ "torso",
88
+ "left_arm",
89
+ "left_gripper",
90
+ "right_arm",
91
+ "right_gripper"
92
+ ],
93
+ "sin_cos_embedding_keys": null,
94
+ "mean_std_embedding_keys": null,
95
+ "action_configs": [
96
+ {
97
+ "rep": "ABSOLUTE",
98
+ "type": "NON_EEF",
99
+ "format": "DEFAULT",
100
+ "state_key": null
101
+ },
102
+ {
103
+ "rep": "RELATIVE",
104
+ "type": "NON_EEF",
105
+ "format": "DEFAULT",
106
+ "state_key": "trunk_qpos"
107
+ },
108
+ {
109
+ "rep": "RELATIVE",
110
+ "type": "NON_EEF",
111
+ "format": "DEFAULT",
112
+ "state_key": "arm_left_qpos"
113
+ },
114
+ {
115
+ "rep": "ABSOLUTE",
116
+ "type": "NON_EEF",
117
+ "format": "DEFAULT",
118
+ "state_key": null
119
+ },
120
+ {
121
+ "rep": "RELATIVE",
122
+ "type": "NON_EEF",
123
+ "format": "DEFAULT",
124
+ "state_key": "arm_right_qpos"
125
+ },
126
+ {
127
+ "rep": "ABSOLUTE",
128
+ "type": "NON_EEF",
129
+ "format": "DEFAULT",
130
+ "state_key": null
131
+ }
132
+ ]
133
+ },
134
+ "language": {
135
+ "delta_indices": [
136
+ 0
137
+ ],
138
+ "modality_keys": [
139
+ "annotation.human.coarse_action"
140
+ ],
141
+ "sin_cos_embedding_keys": null,
142
+ "mean_std_embedding_keys": null,
143
+ "action_configs": null
144
+ }
145
+ },
146
+ "gr1": {
147
+ "video": {
148
+ "delta_indices": [
149
+ 0
150
+ ],
151
+ "modality_keys": [
152
+ "ego_view_bg_crop_pad_res256_freq20"
153
+ ],
154
+ "sin_cos_embedding_keys": null,
155
+ "mean_std_embedding_keys": null,
156
+ "action_configs": null
157
+ },
158
+ "state": {
159
+ "delta_indices": [
160
+ 0
161
+ ],
162
+ "modality_keys": [
163
+ "left_arm",
164
+ "right_arm",
165
+ "left_hand",
166
+ "right_hand",
167
+ "waist"
168
+ ],
169
+ "sin_cos_embedding_keys": [
170
+ "left_arm",
171
+ "right_arm",
172
+ "left_hand",
173
+ "right_hand",
174
+ "waist"
175
+ ],
176
+ "mean_std_embedding_keys": null,
177
+ "action_configs": null
178
+ },
179
+ "action": {
180
+ "delta_indices": [
181
+ 0,
182
+ 1,
183
+ 2,
184
+ 3,
185
+ 4,
186
+ 5,
187
+ 6,
188
+ 7,
189
+ 8,
190
+ 9,
191
+ 10,
192
+ 11,
193
+ 12,
194
+ 13,
195
+ 14,
196
+ 15
197
+ ],
198
+ "modality_keys": [
199
+ "left_arm",
200
+ "right_arm",
201
+ "left_hand",
202
+ "right_hand",
203
+ "waist"
204
+ ],
205
+ "sin_cos_embedding_keys": null,
206
+ "mean_std_embedding_keys": null,
207
+ "action_configs": [
208
+ {
209
+ "rep": "RELATIVE",
210
+ "type": "NON_EEF",
211
+ "format": "DEFAULT",
212
+ "state_key": null
213
+ },
214
+ {
215
+ "rep": "RELATIVE",
216
+ "type": "NON_EEF",
217
+ "format": "DEFAULT",
218
+ "state_key": null
219
+ },
220
+ {
221
+ "rep": "RELATIVE",
222
+ "type": "NON_EEF",
223
+ "format": "DEFAULT",
224
+ "state_key": null
225
+ },
226
+ {
227
+ "rep": "RELATIVE",
228
+ "type": "NON_EEF",
229
+ "format": "DEFAULT",
230
+ "state_key": null
231
+ },
232
+ {
233
+ "rep": "ABSOLUTE",
234
+ "type": "NON_EEF",
235
+ "format": "DEFAULT",
236
+ "state_key": null
237
+ }
238
+ ]
239
+ },
240
+ "language": {
241
+ "delta_indices": [
242
+ 0
243
+ ],
244
+ "modality_keys": [
245
+ "task"
246
+ ],
247
+ "sin_cos_embedding_keys": null,
248
+ "mean_std_embedding_keys": null,
249
+ "action_configs": null
250
+ }
251
+ },
252
+ "robocasa_panda_omron": {
253
+ "video": {
254
+ "delta_indices": [
255
+ 0
256
+ ],
257
+ "modality_keys": [
258
+ "res256_image_side_0",
259
+ "res256_image_side_1",
260
+ "res256_image_wrist_0"
261
+ ],
262
+ "sin_cos_embedding_keys": null,
263
+ "mean_std_embedding_keys": null,
264
+ "action_configs": null
265
+ },
266
+ "state": {
267
+ "delta_indices": [
268
+ 0
269
+ ],
270
+ "modality_keys": [
271
+ "end_effector_position_relative",
272
+ "end_effector_rotation_relative",
273
+ "gripper_qpos",
274
+ "base_position",
275
+ "base_rotation"
276
+ ],
277
+ "sin_cos_embedding_keys": null,
278
+ "mean_std_embedding_keys": null,
279
+ "action_configs": null
280
+ },
281
+ "action": {
282
+ "delta_indices": [
283
+ 0,
284
+ 1,
285
+ 2,
286
+ 3,
287
+ 4,
288
+ 5,
289
+ 6,
290
+ 7,
291
+ 8,
292
+ 9,
293
+ 10,
294
+ 11,
295
+ 12,
296
+ 13,
297
+ 14,
298
+ 15
299
+ ],
300
+ "modality_keys": [
301
+ "end_effector_position",
302
+ "end_effector_rotation",
303
+ "gripper_close",
304
+ "base_motion",
305
+ "control_mode"
306
+ ],
307
+ "sin_cos_embedding_keys": null,
308
+ "mean_std_embedding_keys": null,
309
+ "action_configs": [
310
+ {
311
+ "rep": "ABSOLUTE",
312
+ "type": "NON_EEF",
313
+ "format": "DEFAULT",
314
+ "state_key": null
315
+ },
316
+ {
317
+ "rep": "ABSOLUTE",
318
+ "type": "NON_EEF",
319
+ "format": "DEFAULT",
320
+ "state_key": null
321
+ },
322
+ {
323
+ "rep": "ABSOLUTE",
324
+ "type": "NON_EEF",
325
+ "format": "DEFAULT",
326
+ "state_key": null
327
+ },
328
+ {
329
+ "rep": "ABSOLUTE",
330
+ "type": "NON_EEF",
331
+ "format": "DEFAULT",
332
+ "state_key": null
333
+ },
334
+ {
335
+ "rep": "ABSOLUTE",
336
+ "type": "NON_EEF",
337
+ "format": "DEFAULT",
338
+ "state_key": null
339
+ }
340
+ ]
341
+ },
342
+ "language": {
343
+ "delta_indices": [
344
+ 0
345
+ ],
346
+ "modality_keys": [
347
+ "annotation.human.action.task_description"
348
+ ],
349
+ "sin_cos_embedding_keys": null,
350
+ "mean_std_embedding_keys": null,
351
+ "action_configs": null
352
+ }
353
+ },
354
+ "new_embodiment": {
355
+ "video": {
356
+ "delta_indices": [
357
+ 0
358
+ ],
359
+ "modality_keys": [
360
+ "front",
361
+ "wrist"
362
+ ],
363
+ "sin_cos_embedding_keys": null,
364
+ "mean_std_embedding_keys": null,
365
+ "action_configs": null
366
+ },
367
+ "state": {
368
+ "delta_indices": [
369
+ 0
370
+ ],
371
+ "modality_keys": [
372
+ "single_arm",
373
+ "gripper"
374
+ ],
375
+ "sin_cos_embedding_keys": null,
376
+ "mean_std_embedding_keys": null,
377
+ "action_configs": null
378
+ },
379
+ "action": {
380
+ "delta_indices": [
381
+ 0,
382
+ 1,
383
+ 2,
384
+ 3,
385
+ 4,
386
+ 5,
387
+ 6,
388
+ 7,
389
+ 8,
390
+ 9,
391
+ 10,
392
+ 11,
393
+ 12,
394
+ 13,
395
+ 14,
396
+ 15
397
+ ],
398
+ "modality_keys": [
399
+ "single_arm",
400
+ "gripper"
401
+ ],
402
+ "sin_cos_embedding_keys": null,
403
+ "mean_std_embedding_keys": null,
404
+ "action_configs": [
405
+ {
406
+ "rep": "RELATIVE",
407
+ "type": "NON_EEF",
408
+ "format": "DEFAULT",
409
+ "state_key": null
410
+ },
411
+ {
412
+ "rep": "ABSOLUTE",
413
+ "type": "NON_EEF",
414
+ "format": "DEFAULT",
415
+ "state_key": null
416
+ }
417
+ ]
418
+ },
419
+ "language": {
420
+ "delta_indices": [
421
+ 0
422
+ ],
423
+ "modality_keys": [
424
+ "annotation.human.task_description"
425
+ ],
426
+ "sin_cos_embedding_keys": null,
427
+ "mean_std_embedding_keys": null,
428
+ "action_configs": null
429
+ }
430
+ }
431
+ },
432
+ "image_crop_size": null,
433
+ "image_target_size": null,
434
+ "use_albumentations": true,
435
+ "random_rotation_angle": null,
436
+ "color_jitter_params": {
437
+ "brightness": 0.3,
438
+ "contrast": 0.4,
439
+ "saturation": 0.5,
440
+ "hue": 0.08
441
+ },
442
+ "shortest_image_edge": 256,
443
+ "crop_fraction": 0.95,
444
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
445
+ "model_type": "eagle",
446
+ "formalize_language": true,
447
+ "max_state_dim": 128,
448
+ "max_action_dim": 128,
449
+ "max_action_horizon": 50,
450
+ "use_percentiles": false,
451
+ "clip_outliers": true,
452
+ "apply_sincos_state_encoding": true,
453
+ "use_relative_action": true
454
+ }
455
+ }
processor/statistics.json ADDED
The diff for this file is too large to render. See raw diff
 
wandb_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"project": "finetune-gr00t-n1d6", "run_id": "so100_finetune"}