12e21 commited on
Commit
9a64e15
·
verified ·
1 Parent(s): e6d13ab

Upload folder using huggingface_hub

Browse files
ckpt/checkpoint-10000/config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_horizon": 50,
3
+ "add_pos_embed": true,
4
+ "apply_sincos_state_encoding": true,
5
+ "architectures": [
6
+ "Gr00tN1d6"
7
+ ],
8
+ "attn_dropout": 0.2,
9
+ "attn_implementation": null,
10
+ "backbone_embedding_dim": 2048,
11
+ "backbone_model_type": "eagle",
12
+ "backbone_trainable_params_fp32": true,
13
+ "collator_overwrite_image_inputs": false,
14
+ "color_jitter_params": {
15
+ "brightness": 0.1,
16
+ "contrast": 0.1,
17
+ "hue": 0.1,
18
+ "saturation": 0.1
19
+ },
20
+ "crop_fraction": 0.95,
21
+ "diffusion_model_cfg": {
22
+ "attention_head_dim": 48,
23
+ "dropout": 0.2,
24
+ "final_dropout": true,
25
+ "interleave_self_attention": true,
26
+ "norm_type": "ada_norm",
27
+ "num_attention_heads": 32,
28
+ "num_layers": 32,
29
+ "output_dim": 1024,
30
+ "positional_embeddings": null
31
+ },
32
+ "eagle_collator": true,
33
+ "formalize_language": true,
34
+ "gemma_collator": false,
35
+ "hidden_size": 1024,
36
+ "image_crop_size": null,
37
+ "image_target_size": null,
38
+ "input_embedding_dim": 1536,
39
+ "load_bf16": true,
40
+ "max_action_dim": 128,
41
+ "max_num_embodiments": 32,
42
+ "max_seq_len": 1024,
43
+ "max_state_dim": 128,
44
+ "model_dtype": "bfloat16",
45
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
46
+ "model_type": "Gr00tN1d6",
47
+ "noise_beta_alpha": 1.5,
48
+ "noise_beta_beta": 1.0,
49
+ "noise_s": 0.999,
50
+ "num_inference_timesteps": 4,
51
+ "num_timestep_buckets": 1000,
52
+ "random_rotation_angle": null,
53
+ "reproject_vision": false,
54
+ "select_layer": 16,
55
+ "shortest_image_edge": 256,
56
+ "state_dropout_prob": 0.0,
57
+ "torch_dtype": "bfloat16",
58
+ "transformers_version": "4.51.3",
59
+ "tune_diffusion_model": true,
60
+ "tune_llm": false,
61
+ "tune_projector": true,
62
+ "tune_top_llm_layers": 4,
63
+ "tune_visual": false,
64
+ "tune_vlln": true,
65
+ "use_albumentations_transforms": true,
66
+ "use_alternate_vl_dit": true,
67
+ "use_flash_attention": true,
68
+ "use_relative_action": true,
69
+ "use_vlln": true
70
+ }
ckpt/checkpoint-10000/embodiment_id.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "robocasa_panda_omron": 13,
3
+ "gr1": 20,
4
+ "behavior_r1_pro": 24,
5
+ "unitree_g1": 8,
6
+ "oxe_google": 0,
7
+ "oxe_widowx": 1,
8
+ "libero_panda": 2,
9
+ "new_embodiment": 10
10
+ }
ckpt/checkpoint-10000/experiment_cfg/conf.yaml ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ load_config_path: null
2
+ model:
3
+ model_type: Gr00tN1d6
4
+ model_dtype: bfloat16
5
+ model_name: nvidia/Eagle-Block2A-2B-v2
6
+ backbone_model_type: eagle
7
+ model_revision: null
8
+ tune_top_llm_layers: 4
9
+ backbone_embedding_dim: 2048
10
+ tune_llm: false
11
+ tune_visual: false
12
+ select_layer: 16
13
+ reproject_vision: false
14
+ use_flash_attention: true
15
+ load_bf16: false
16
+ collator_overwrite_image_inputs: false
17
+ eagle_collator: true
18
+ backbone_trainable_params_fp32: true
19
+ image_crop_size: null
20
+ image_target_size: null
21
+ shortest_image_edge: 256
22
+ crop_fraction: 0.95
23
+ random_rotation_angle: null
24
+ color_jitter_params:
25
+ brightness: 0.3
26
+ contrast: 0.4
27
+ saturation: 0.5
28
+ hue: 0.08
29
+ use_albumentations_transforms: true
30
+ formalize_language: true
31
+ apply_sincos_state_encoding: false
32
+ use_relative_action: true
33
+ max_state_dim: 29
34
+ max_action_dim: 29
35
+ action_horizon: 16
36
+ hidden_size: 1024
37
+ input_embedding_dim: 1536
38
+ add_pos_embed: true
39
+ attn_dropout: 0.2
40
+ use_vlln: true
41
+ max_seq_len: 1024
42
+ use_alternate_vl_dit: true
43
+ attend_text_every_n_blocks: 2
44
+ diffusion_model_cfg:
45
+ positional_embeddings: null
46
+ num_layers: 32
47
+ num_attention_heads: 32
48
+ attention_head_dim: 48
49
+ norm_type: ada_norm
50
+ dropout: 0.2
51
+ final_dropout: true
52
+ output_dim: 1024
53
+ interleave_self_attention: true
54
+ num_inference_timesteps: 4
55
+ noise_beta_alpha: 1.5
56
+ noise_beta_beta: 1.0
57
+ noise_s: 0.999
58
+ num_timestep_buckets: 1000
59
+ tune_projector: true
60
+ tune_diffusion_model: true
61
+ tune_vlln: true
62
+ state_dropout_prob: 0.0
63
+ state_additive_noise_scale: 0.0
64
+ max_num_embodiments: 32
65
+ data:
66
+ datasets:
67
+ - dataset_paths:
68
+ - dataset/so101_pick_orange_v2.1
69
+ embodiment_tag: new_embodiment
70
+ mix_ratio: 1.0
71
+ dataset_type: physical_embodiment
72
+ val_dataset_path: null
73
+ modality_configs:
74
+ new_embodiment:
75
+ video:
76
+ delta_indices:
77
+ - 0
78
+ modality_keys:
79
+ - front
80
+ - wrist
81
+ sin_cos_embedding_keys: null
82
+ mean_std_embedding_keys: null
83
+ action_configs: null
84
+ state:
85
+ delta_indices:
86
+ - 0
87
+ modality_keys:
88
+ - single_arm
89
+ - gripper
90
+ sin_cos_embedding_keys: null
91
+ mean_std_embedding_keys: null
92
+ action_configs: null
93
+ action:
94
+ delta_indices:
95
+ - 0
96
+ - 1
97
+ - 2
98
+ - 3
99
+ - 4
100
+ - 5
101
+ - 6
102
+ - 7
103
+ - 8
104
+ - 9
105
+ - 10
106
+ - 11
107
+ - 12
108
+ - 13
109
+ - 14
110
+ - 15
111
+ modality_keys:
112
+ - single_arm
113
+ - gripper
114
+ sin_cos_embedding_keys: null
115
+ mean_std_embedding_keys: null
116
+ action_configs:
117
+ - rep: RELATIVE
118
+ type: NON_EEF
119
+ format: DEFAULT
120
+ state_key: null
121
+ - rep: ABSOLUTE
122
+ type: NON_EEF
123
+ format: DEFAULT
124
+ state_key: null
125
+ language:
126
+ delta_indices:
127
+ - 0
128
+ modality_keys:
129
+ - annotation.human.task_description
130
+ sin_cos_embedding_keys: null
131
+ mean_std_embedding_keys: null
132
+ action_configs: null
133
+ download_cache: false
134
+ shard_size: 1024
135
+ episode_sampling_rate: 0.1
136
+ num_shards_per_epoch: 100000
137
+ override_pretraining_statistics: false
138
+ mode: single_turn
139
+ random_chop: 0.0
140
+ mock_dataset_mode: false
141
+ shuffle: true
142
+ seed: 42
143
+ multiprocessing_context: fork
144
+ allow_padding: false
145
+ subsample_ratio: 1.0
146
+ image_crop_size:
147
+ - 244
148
+ - 244
149
+ image_target_size:
150
+ - 224
151
+ - 224
152
+ video_backend: torchcodec
153
+ training:
154
+ output_dir: outputs/so101_finetune
155
+ experiment_name: null
156
+ max_steps: 10000
157
+ global_batch_size: 32
158
+ batch_size: null
159
+ gradient_accumulation_steps: 1
160
+ learning_rate: 0.0001
161
+ lr_scheduler_type: cosine
162
+ weight_decay: 1.0e-05
163
+ warmup_ratio: 0.05
164
+ warmup_steps: 0
165
+ max_grad_norm: 1.0
166
+ optim: adamw_torch
167
+ start_from_checkpoint: nvidia/GR00T-N1.6-3B
168
+ tf32: true
169
+ fp16: false
170
+ bf16: true
171
+ eval_bf16: true
172
+ logging_steps: 10
173
+ save_steps: 1000
174
+ save_total_limit: 5
175
+ save_vl_model: false
176
+ upload_checkpoints: false
177
+ upload_every: 1000
178
+ upload_last_n_checkpoints: 5
179
+ max_concurrent_uploads: 2
180
+ eval_strategy: 'no'
181
+ eval_steps: 500
182
+ eval_set_split_ratio: 0.1
183
+ eval_batch_size: 2
184
+ save_best_eval_metric_name: ''
185
+ save_best_eval_metric_greater_is_better: true
186
+ deepspeed_stage: 2
187
+ gradient_checkpointing: false
188
+ transformers_trust_remote_code: true
189
+ transformers_local_files_only: false
190
+ transformers_cache_dir: null
191
+ transformers_access_token: null
192
+ use_ddp: false
193
+ ddp_bucket_cap_mb: 100
194
+ num_gpus: 1
195
+ dataloader_num_workers: 4
196
+ remove_unused_columns: false
197
+ use_wandb: true
198
+ wandb_project: finetune-gr00t-n1d6
199
+ enable_profiling: false
200
+ max_retries: 3
201
+ assert_loss_less_than: null
202
+ add_rl_callback: false
203
+ enable_open_loop_eval: false
204
+ open_loop_eval_traj_ids:
205
+ - 0
206
+ open_loop_eval_steps_per_traj: 100
207
+ open_loop_eval_plot_indices: null
208
+ max_steps: 10000
209
+ save_steps: 1000
ckpt/checkpoint-10000/experiment_cfg/config.yaml ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !!python/object:gr00t.configs.base_config.Config
2
+ data: !!python/object:gr00t.configs.data.data_config.DataConfig
3
+ allow_padding: false
4
+ datasets:
5
+ - !!python/object:gr00t.configs.data.data_config.SingleDatasetConfig
6
+ dataset_paths:
7
+ - dataset/so101_pick_orange_v2.1
8
+ dataset_type: physical_embodiment
9
+ embodiment_tag: new_embodiment
10
+ mix_ratio: 1.0
11
+ val_dataset_path: null
12
+ download_cache: false
13
+ episode_sampling_rate: 0.1
14
+ image_crop_size:
15
+ - 244
16
+ - 244
17
+ image_target_size:
18
+ - 224
19
+ - 224
20
+ mock_dataset_mode: false
21
+ modality_configs:
22
+ new_embodiment:
23
+ action: !!python/object:gr00t.data.types.ModalityConfig
24
+ action_configs:
25
+ - !!python/object:gr00t.data.types.ActionConfig
26
+ format: &id001 !!python/object/apply:gr00t.data.types.ActionFormat
27
+ - default
28
+ rep: !!python/object/apply:gr00t.data.types.ActionRepresentation
29
+ - relative
30
+ state_key: null
31
+ type: &id002 !!python/object/apply:gr00t.data.types.ActionType
32
+ - non_eef
33
+ - !!python/object:gr00t.data.types.ActionConfig
34
+ format: *id001
35
+ rep: !!python/object/apply:gr00t.data.types.ActionRepresentation
36
+ - absolute
37
+ state_key: null
38
+ type: *id002
39
+ delta_indices:
40
+ - 0
41
+ - 1
42
+ - 2
43
+ - 3
44
+ - 4
45
+ - 5
46
+ - 6
47
+ - 7
48
+ - 8
49
+ - 9
50
+ - 10
51
+ - 11
52
+ - 12
53
+ - 13
54
+ - 14
55
+ - 15
56
+ mean_std_embedding_keys: null
57
+ modality_keys:
58
+ - single_arm
59
+ - gripper
60
+ sin_cos_embedding_keys: null
61
+ language: !!python/object:gr00t.data.types.ModalityConfig
62
+ action_configs: null
63
+ delta_indices:
64
+ - 0
65
+ mean_std_embedding_keys: null
66
+ modality_keys:
67
+ - annotation.human.task_description
68
+ sin_cos_embedding_keys: null
69
+ state: !!python/object:gr00t.data.types.ModalityConfig
70
+ action_configs: null
71
+ delta_indices:
72
+ - 0
73
+ mean_std_embedding_keys: null
74
+ modality_keys:
75
+ - single_arm
76
+ - gripper
77
+ sin_cos_embedding_keys: null
78
+ video: !!python/object:gr00t.data.types.ModalityConfig
79
+ action_configs: null
80
+ delta_indices:
81
+ - 0
82
+ mean_std_embedding_keys: null
83
+ modality_keys:
84
+ - front
85
+ - wrist
86
+ sin_cos_embedding_keys: null
87
+ mode: single_turn
88
+ multiprocessing_context: fork
89
+ num_shards_per_epoch: 100000
90
+ override_pretraining_statistics: false
91
+ random_chop: 0.0
92
+ seed: 42
93
+ shard_size: 1024
94
+ shuffle: true
95
+ subsample_ratio: 1.0
96
+ video_backend: torchcodec
97
+ load_config_path: null
98
+ model: !!python/object:gr00t.configs.model.gr00t_n1d6.Gr00tN1d6Config
99
+ _attn_implementation_autoset: false
100
+ _attn_implementation_internal: null
101
+ _commit_hash: null
102
+ _name_or_path: ''
103
+ add_cross_attention: false
104
+ architectures: null
105
+ backbone_model_type: eagle
106
+ backbone_trainable_params_fp32: true
107
+ bad_words_ids: null
108
+ begin_suppress_tokens: null
109
+ bos_token_id: null
110
+ chunk_size_feed_forward: 0
111
+ color_jitter_params:
112
+ brightness: 0.3
113
+ contrast: 0.4
114
+ hue: 0.08
115
+ saturation: 0.5
116
+ cross_attention_hidden_size: null
117
+ decoder_start_token_id: null
118
+ diffusion_model_cfg:
119
+ attention_head_dim: 48
120
+ dropout: 0.2
121
+ final_dropout: true
122
+ interleave_self_attention: true
123
+ norm_type: ada_norm
124
+ num_attention_heads: 32
125
+ num_layers: 32
126
+ output_dim: 1024
127
+ positional_embeddings: null
128
+ diversity_penalty: 0.0
129
+ do_sample: false
130
+ eagle_collator: true
131
+ early_stopping: false
132
+ encoder_no_repeat_ngram_size: 0
133
+ eos_token_id: null
134
+ exponential_decay_length_penalty: null
135
+ finetuning_task: null
136
+ forced_bos_token_id: null
137
+ forced_eos_token_id: null
138
+ id2label:
139
+ 0: LABEL_0
140
+ 1: LABEL_1
141
+ is_decoder: false
142
+ is_encoder_decoder: false
143
+ label2id:
144
+ LABEL_0: 0
145
+ LABEL_1: 1
146
+ length_penalty: 1.0
147
+ load_bf16: false
148
+ max_length: 20
149
+ min_length: 0
150
+ model_name: nvidia/Eagle-Block2A-2B-v2
151
+ no_repeat_ngram_size: 0
152
+ num_beam_groups: 1
153
+ num_beams: 1
154
+ num_return_sequences: 1
155
+ output_attentions: false
156
+ output_hidden_states: false
157
+ output_scores: false
158
+ pad_token_id: null
159
+ prefix: null
160
+ problem_type: null
161
+ pruned_heads: {}
162
+ random_rotation_angle: null
163
+ remove_invalid_values: false
164
+ repetition_penalty: 1.0
165
+ reproject_vision: false
166
+ return_dict: true
167
+ return_dict_in_generate: false
168
+ sep_token_id: null
169
+ state_dropout_prob: 0.0
170
+ suppress_tokens: null
171
+ task_specific_params: null
172
+ temperature: 1.0
173
+ tf_legacy_loss: false
174
+ tie_encoder_decoder: false
175
+ tie_word_embeddings: true
176
+ tokenizer_class: null
177
+ top_k: 50
178
+ top_p: 1.0
179
+ torch_dtype: null
180
+ torchscript: false
181
+ transformers_version: null
182
+ tune_diffusion_model: true
183
+ tune_llm: false
184
+ tune_projector: true
185
+ tune_visual: false
186
+ typical_p: 1.0
187
+ use_bfloat16: false
188
+ use_relative_action: true
189
+ training: !!python/object:gr00t.configs.training.training_config.TrainingConfig
190
+ add_rl_callback: false
191
+ assert_loss_less_than: null
192
+ batch_size: null
193
+ bf16: true
194
+ dataloader_num_workers: 4
195
+ ddp_bucket_cap_mb: 100
196
+ deepspeed_stage: 2
197
+ enable_open_loop_eval: false
198
+ enable_profiling: false
199
+ eval_batch_size: 2
200
+ eval_bf16: true
201
+ eval_set_split_ratio: 0.1
202
+ eval_steps: 500
203
+ eval_strategy: 'no'
204
+ experiment_name: null
205
+ fp16: false
206
+ global_batch_size: 32
207
+ gradient_accumulation_steps: 1
208
+ gradient_checkpointing: false
209
+ learning_rate: 0.0001
210
+ logging_steps: 10
211
+ lr_scheduler_type: cosine
212
+ max_concurrent_uploads: 2
213
+ max_grad_norm: 1.0
214
+ max_retries: 3
215
+ max_steps: 10000
216
+ num_gpus: 1
217
+ open_loop_eval_plot_indices: null
218
+ open_loop_eval_steps_per_traj: 100
219
+ open_loop_eval_traj_ids:
220
+ - 0
221
+ optim: adamw_torch
222
+ output_dir: outputs/so101_finetune
223
+ remove_unused_columns: false
224
+ save_best_eval_metric_greater_is_better: true
225
+ save_best_eval_metric_name: ''
226
+ save_steps: 1000
227
+ save_total_limit: 5
228
+ save_vl_model: false
229
+ start_from_checkpoint: nvidia/GR00T-N1.6-3B
230
+ tf32: true
231
+ transformers_access_token: null
232
+ transformers_cache_dir: null
233
+ transformers_local_files_only: false
234
+ transformers_trust_remote_code: true
235
+ upload_checkpoints: false
236
+ upload_every: 1000
237
+ upload_last_n_checkpoints: 5
238
+ use_ddp: false
239
+ use_wandb: true
240
+ wandb_project: finetune-gr00t-n1d6
241
+ warmup_ratio: 0.05
242
+ warmup_steps: 0
243
+ weight_decay: 1.0e-05
ckpt/checkpoint-10000/experiment_cfg/dataset_statistics.json ADDED
@@ -0,0 +1,824 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "state": {
4
+ "single_arm": {
5
+ "min": [
6
+ -66.94554138183594,
7
+ -99.34537506103516,
8
+ -100.07406616210938,
9
+ -7.2442169189453125,
10
+ -14.350746154785156
11
+ ],
12
+ "max": [
13
+ 54.9786376953125,
14
+ 94.00718688964844,
15
+ 99.91923522949219,
16
+ 100.17535400390625,
17
+ 49.98741149902344
18
+ ],
19
+ "mean": [
20
+ 9.436819110182034,
21
+ 7.574628396223122,
22
+ -10.053358760235177,
23
+ 80.99322336603086,
24
+ 8.907621910152459
25
+ ],
26
+ "std": [
27
+ 18.523341824919854,
28
+ 44.35623147233547,
29
+ 52.39530042055574,
30
+ 17.853640756898756,
31
+ 11.958544505091808
32
+ ],
33
+ "q01": [
34
+ -16.689096150230384,
35
+ -94.91365668057918,
36
+ -83.75251391588807,
37
+ 38.96477862518415,
38
+ -7.5895381550527485
39
+ ],
40
+ "q99": [
41
+ 44.3971940337351,
42
+ 57.803459759429344,
43
+ 93.71004261030197,
44
+ 99.54684530336125,
45
+ 33.206195503441855
46
+ ]
47
+ },
48
+ "gripper": {
49
+ "min": [
50
+ 1.4235210418701172
51
+ ],
52
+ "max": [
53
+ 72.82665252685547
54
+ ],
55
+ "mean": [
56
+ 35.93042549630967
57
+ ],
58
+ "std": [
59
+ 19.82074107207529
60
+ ],
61
+ "q01": [
62
+ 4.166254402088548
63
+ ],
64
+ "q99": [
65
+ 61.39789539026209
66
+ ]
67
+ }
68
+ },
69
+ "action": {
70
+ "single_arm": {
71
+ "min": [
72
+ -67.02870178222656,
73
+ -99.99999237060547,
74
+ -99.54914093017578,
75
+ -7.626762390136719,
76
+ -15.4171142578125
77
+ ],
78
+ "max": [
79
+ 54.38323974609375,
80
+ 90.63290405273438,
81
+ 99.09828186035156,
82
+ 100.0,
83
+ 50.42237854003906
84
+ ],
85
+ "mean": [
86
+ 9.34462854049006,
87
+ 6.218694137043469,
88
+ -11.439668769206587,
89
+ 81.78630601437781,
90
+ 8.801531099191099
91
+ ],
92
+ "std": [
93
+ 18.455675361161806,
94
+ 44.32732024212432,
95
+ 52.883627011344565,
96
+ 18.074943936960118,
97
+ 11.953933302294447
98
+ ],
99
+ "q01": [
100
+ -16.572277213108652,
101
+ -96.09065517543287,
102
+ -84.84769216215412,
103
+ 39.35092962124239,
104
+ -7.671825312079834
105
+ ],
106
+ "q99": [
107
+ 43.40559771349356,
108
+ 55.26623004959493,
109
+ 92.96575841515937,
110
+ 99.27690853548235,
111
+ 33.246040566403735
112
+ ]
113
+ },
114
+ "gripper": {
115
+ "min": [
116
+ 1.483216643333435
117
+ ],
118
+ "max": [
119
+ 72.98985290527344
120
+ ],
121
+ "mean": [
122
+ 32.06279950114537
123
+ ],
124
+ "std": [
125
+ 22.009280451587454
126
+ ],
127
+ "q01": [
128
+ 4.0092054223149365
129
+ ],
130
+ "q99": [
131
+ 61.458876840633906
132
+ ]
133
+ }
134
+ },
135
+ "relative_action": {
136
+ "single_arm": {
137
+ "min": [
138
+ [
139
+ -12.078048706054688,
140
+ -51.95113754272461,
141
+ -41.35333251953125,
142
+ -20.325958251953125,
143
+ -13.003585815429688
144
+ ],
145
+ [
146
+ -14.987480163574219,
147
+ -51.95113754272461,
148
+ -41.353355407714844,
149
+ -28.125457763671875,
150
+ -18.019424438476562
151
+ ],
152
+ [
153
+ -18.291839599609375,
154
+ -51.95113754272461,
155
+ -41.361572265625,
156
+ -35.89482116699219,
157
+ -22.878860473632812
158
+ ],
159
+ [
160
+ -21.74469757080078,
161
+ -51.95113754272461,
162
+ -46.38298034667969,
163
+ -43.38505554199219,
164
+ -27.603317260742188
165
+ ],
166
+ [
167
+ -24.92536163330078,
168
+ -51.95113754272461,
169
+ -52.875328063964844,
170
+ -50.71356201171875,
171
+ -31.510398864746094
172
+ ],
173
+ [
174
+ -27.711326599121094,
175
+ -51.95113754272461,
176
+ -58.254844665527344,
177
+ -57.86737060546875,
178
+ -33.94642639160156
179
+ ],
180
+ [
181
+ -30.726837158203125,
182
+ -54.53240966796875,
183
+ -62.85357666015625,
184
+ -63.66200256347656,
185
+ -36.22076416015625
186
+ ],
187
+ [
188
+ -33.83001708984375,
189
+ -59.21240997314453,
190
+ -67.64421844482422,
191
+ -67.32625579833984,
192
+ -38.53020477294922
193
+ ],
194
+ [
195
+ -36.622886657714844,
196
+ -63.709739685058594,
197
+ -73.31746673583984,
198
+ -69.5547103881836,
199
+ -40.86298370361328
200
+ ],
201
+ [
202
+ -39.131980895996094,
203
+ -68.01353454589844,
204
+ -78.90808868408203,
205
+ -71.34422302246094,
206
+ -43.1685791015625
207
+ ],
208
+ [
209
+ -41.22662353515625,
210
+ -72.27005767822266,
211
+ -83.95648956298828,
212
+ -71.68507385253906,
213
+ -45.630462646484375
214
+ ],
215
+ [
216
+ -43.09895324707031,
217
+ -76.34874725341797,
218
+ -88.23937225341797,
219
+ -72.02021789550781,
220
+ -48.347808837890625
221
+ ],
222
+ [
223
+ -44.80570983886719,
224
+ -80.23060607910156,
225
+ -91.84622192382812,
226
+ -72.1054458618164,
227
+ -51.21324157714844
228
+ ],
229
+ [
230
+ -46.06871032714844,
231
+ -83.98172760009766,
232
+ -95.32868957519531,
233
+ -72.1054458618164,
234
+ -53.853118896484375
235
+ ],
236
+ [
237
+ -47.23240661621094,
238
+ -87.46543884277344,
239
+ -98.75520324707031,
240
+ -72.1054458618164,
241
+ -56.05763244628906
242
+ ],
243
+ [
244
+ -48.67430114746094,
245
+ -90.8409652709961,
246
+ -101.38887023925781,
247
+ -72.1054458618164,
248
+ -57.85150146484375
249
+ ]
250
+ ],
251
+ "max": [
252
+ [
253
+ 18.512191772460938,
254
+ 20.69375228881836,
255
+ 42.411834716796875,
256
+ 23.563674926757812,
257
+ 13.185585021972656
258
+ ],
259
+ [
260
+ 22.481399536132812,
261
+ 28.41082000732422,
262
+ 42.411834716796875,
263
+ 27.518478393554688,
264
+ 17.75921630859375
265
+ ],
266
+ [
267
+ 25.739761352539062,
268
+ 35.58381652832031,
269
+ 46.46379852294922,
270
+ 34.67655944824219,
271
+ 22.29987335205078
272
+ ],
273
+ [
274
+ 28.566734313964844,
275
+ 42.625579833984375,
276
+ 55.480934143066406,
277
+ 40.45378112792969,
278
+ 26.15636444091797
279
+ ],
280
+ [
281
+ 30.84162139892578,
282
+ 48.86858367919922,
283
+ 64.38365173339844,
284
+ 45.651885986328125,
285
+ 29.79944610595703
286
+ ],
287
+ [
288
+ 34.05448913574219,
289
+ 54.42564392089844,
290
+ 72.3538589477539,
291
+ 49.64585876464844,
292
+ 32.81971740722656
293
+ ],
294
+ [
295
+ 37.10010528564453,
296
+ 61.04212951660156,
297
+ 80.28893280029297,
298
+ 52.62840270996094,
299
+ 35.40287780761719
300
+ ],
301
+ [
302
+ 39.84075927734375,
303
+ 67.21363830566406,
304
+ 87.31271362304688,
305
+ 55.53631591796875,
306
+ 38.09008026123047
307
+ ],
308
+ [
309
+ 42.86634826660156,
310
+ 73.03642272949219,
311
+ 94.16574096679688,
312
+ 57.916900634765625,
313
+ 39.504486083984375
314
+ ],
315
+ [
316
+ 45.81437683105469,
317
+ 78.54348754882812,
318
+ 100.11762237548828,
319
+ 60.059600830078125,
320
+ 40.71885681152344
321
+ ],
322
+ [
323
+ 48.55254364013672,
324
+ 83.35438537597656,
325
+ 105.4377212524414,
326
+ 62.40226745605469,
327
+ 41.093238830566406
328
+ ],
329
+ [
330
+ 50.975975036621094,
331
+ 88.08012390136719,
332
+ 110.27528381347656,
333
+ 64.56655883789062,
334
+ 41.30443572998047
335
+ ],
336
+ [
337
+ 53.30335235595703,
338
+ 92.29953002929688,
339
+ 115.33406066894531,
340
+ 65.78926086425781,
341
+ 41.34910583496094
342
+ ],
343
+ [
344
+ 55.47357177734375,
345
+ 96.1929702758789,
346
+ 121.76927947998047,
347
+ 69.31730651855469,
348
+ 41.388336181640625
349
+ ],
350
+ [
351
+ 57.270782470703125,
352
+ 99.4841079711914,
353
+ 127.72057342529297,
354
+ 72.86603546142578,
355
+ 41.913490295410156
356
+ ],
357
+ [
358
+ 58.745025634765625,
359
+ 102.42430114746094,
360
+ 132.68002319335938,
361
+ 76.27465057373047,
362
+ 42.577056884765625
363
+ ]
364
+ ],
365
+ "mean": [
366
+ [
367
+ -0.08928748220205307,
368
+ -1.341666340827942,
369
+ -1.4313260316848755,
370
+ 0.8176242113113403,
371
+ -0.10854622721672058
372
+ ],
373
+ [
374
+ -0.08544569462537766,
375
+ -1.3248456716537476,
376
+ -1.450048804283142,
377
+ 0.8183177709579468,
378
+ -0.10887768864631653
379
+ ],
380
+ [
381
+ -0.08182403445243835,
382
+ -1.309598445892334,
383
+ -1.4664539098739624,
384
+ 0.8186652660369873,
385
+ -0.10912427306175232
386
+ ],
387
+ [
388
+ -0.07855380326509476,
389
+ -1.2955514192581177,
390
+ -1.4810765981674194,
391
+ 0.8189600110054016,
392
+ -0.10939010232686996
393
+ ],
394
+ [
395
+ -0.07568887621164322,
396
+ -1.282740592956543,
397
+ -1.4939128160476685,
398
+ 0.819760262966156,
399
+ -0.10975522547960281
400
+ ],
401
+ [
402
+ -0.07324869185686111,
403
+ -1.270890474319458,
404
+ -1.5052545070648193,
405
+ 0.821160614490509,
406
+ -0.1100916713476181
407
+ ],
408
+ [
409
+ -0.07121772319078445,
410
+ -1.2599191665649414,
411
+ -1.5153403282165527,
412
+ 0.8231523036956787,
413
+ -0.1104360818862915
414
+ ],
415
+ [
416
+ -0.06971976906061172,
417
+ -1.2498693466186523,
418
+ -1.5240850448608398,
419
+ 0.8258273005485535,
420
+ -0.11075393855571747
421
+ ],
422
+ [
423
+ -0.0686890035867691,
424
+ -1.2407728433609009,
425
+ -1.5315450429916382,
426
+ 0.8293973803520203,
427
+ -0.11109816282987595
428
+ ],
429
+ [
430
+ -0.06810573488473892,
431
+ -1.2328988313674927,
432
+ -1.537511944770813,
433
+ 0.8338295221328735,
434
+ -0.11150559782981873
435
+ ],
436
+ [
437
+ -0.06795518100261688,
438
+ -1.2263479232788086,
439
+ -1.5418497323989868,
440
+ 0.8392298817634583,
441
+ -0.11192262917757034
442
+ ],
443
+ [
444
+ -0.06824091076850891,
445
+ -1.2213470935821533,
446
+ -1.5445207357406616,
447
+ 0.84555584192276,
448
+ -0.11240900307893753
449
+ ],
450
+ [
451
+ -0.06895144283771515,
452
+ -1.2181369066238403,
453
+ -1.5453885793685913,
454
+ 0.8529182076454163,
455
+ -0.11297398805618286
456
+ ],
457
+ [
458
+ -0.06986649334430695,
459
+ -1.2168906927108765,
460
+ -1.544404149055481,
461
+ 0.8613249063491821,
462
+ -0.113494373857975
463
+ ],
464
+ [
465
+ -0.07098635286092758,
466
+ -1.2179030179977417,
467
+ -1.5415356159210205,
468
+ 0.8707390427589417,
469
+ -0.11393582075834274
470
+ ],
471
+ [
472
+ -0.07230301201343536,
473
+ -1.2215701341629028,
474
+ -1.5368025302886963,
475
+ 0.881089448928833,
476
+ -0.11435369402170181
477
+ ]
478
+ ],
479
+ "std": [
480
+ [
481
+ 2.871591091156006,
482
+ 5.391886234283447,
483
+ 6.014742374420166,
484
+ 3.662677764892578,
485
+ 1.5400844812393188
486
+ ],
487
+ [
488
+ 3.6962430477142334,
489
+ 6.426257610321045,
490
+ 7.588341236114502,
491
+ 4.433483123779297,
492
+ 2.0808327198028564
493
+ ],
494
+ [
495
+ 4.523628234863281,
496
+ 7.537558078765869,
497
+ 9.208881378173828,
498
+ 5.248630523681641,
499
+ 2.599971294403076
500
+ ],
501
+ [
502
+ 5.338505744934082,
503
+ 8.676809310913086,
504
+ 10.831770896911621,
505
+ 6.0670928955078125,
506
+ 3.0954203605651855
507
+ ],
508
+ [
509
+ 6.133913040161133,
510
+ 9.817375183105469,
511
+ 12.435976028442383,
512
+ 6.87053918838501,
513
+ 3.5675745010375977
514
+ ],
515
+ [
516
+ 6.90657377243042,
517
+ 10.943366050720215,
518
+ 14.010025024414062,
519
+ 7.649028778076172,
520
+ 4.017260551452637
521
+ ],
522
+ [
523
+ 7.654917240142822,
524
+ 12.04592227935791,
525
+ 15.547393798828125,
526
+ 8.397272109985352,
527
+ 4.445760250091553
528
+ ],
529
+ [
530
+ 8.378345489501953,
531
+ 13.1199369430542,
532
+ 17.044599533081055,
533
+ 9.112852096557617,
534
+ 4.8546142578125
535
+ ],
536
+ [
537
+ 9.07689380645752,
538
+ 14.162562370300293,
539
+ 18.499372482299805,
540
+ 9.794965744018555,
541
+ 5.2454915046691895
542
+ ],
543
+ [
544
+ 9.750933647155762,
545
+ 15.172690391540527,
546
+ 19.911304473876953,
547
+ 10.443316459655762,
548
+ 5.619942665100098
549
+ ],
550
+ [
551
+ 10.401156425476074,
552
+ 16.14958953857422,
553
+ 21.27997589111328,
554
+ 11.058207511901855,
555
+ 5.979310512542725
556
+ ],
557
+ [
558
+ 11.028536796569824,
559
+ 17.093509674072266,
560
+ 22.60550880432129,
561
+ 11.64011001586914,
562
+ 6.324570655822754
563
+ ],
564
+ [
565
+ 11.634090423583984,
566
+ 18.0046443939209,
567
+ 23.8878116607666,
568
+ 12.189626693725586,
569
+ 6.656616687774658
570
+ ],
571
+ [
572
+ 12.21898078918457,
573
+ 18.88372802734375,
574
+ 25.12725830078125,
575
+ 12.70767879486084,
576
+ 6.976064205169678
577
+ ],
578
+ [
579
+ 12.784246444702148,
580
+ 19.73196792602539,
581
+ 26.324321746826172,
582
+ 13.195191383361816,
583
+ 7.283435821533203
584
+ ],
585
+ [
586
+ 13.33083724975586,
587
+ 20.551090240478516,
588
+ 27.480131149291992,
589
+ 13.652670860290527,
590
+ 7.579307556152344
591
+ ]
592
+ ],
593
+ "q01": [
594
+ [
595
+ -7.962890148162842,
596
+ -14.315224647521973,
597
+ -17.707164764404297,
598
+ -9.451419830322266,
599
+ -5.023085117340088
600
+ ],
601
+ [
602
+ -9.997512817382812,
603
+ -17.7712459564209,
604
+ -22.663740158081055,
605
+ -12.528553009033203,
606
+ -6.693915843963623
607
+ ],
608
+ [
609
+ -11.868306159973145,
610
+ -21.55695343017578,
611
+ -26.183889389038086,
612
+ -15.697895050048828,
613
+ -8.342229843139648
614
+ ],
615
+ [
616
+ -14.148200035095215,
617
+ -25.02656364440918,
618
+ -30.18362045288086,
619
+ -18.600130081176758,
620
+ -9.885397911071777
621
+ ],
622
+ [
623
+ -16.415008544921875,
624
+ -28.84429168701172,
625
+ -33.98527145385742,
626
+ -21.490951538085938,
627
+ -11.297945976257324
628
+ ],
629
+ [
630
+ -18.51193618774414,
631
+ -32.46988296508789,
632
+ -37.53544235229492,
633
+ -24.31591796875,
634
+ -12.669763565063477
635
+ ],
636
+ [
637
+ -20.730897903442383,
638
+ -35.97110366821289,
639
+ -41.142967224121094,
640
+ -26.682071685791016,
641
+ -13.93807315826416
642
+ ],
643
+ [
644
+ -22.598093032836914,
645
+ -39.98816680908203,
646
+ -44.34919738769531,
647
+ -29.085233688354492,
648
+ -15.14836311340332
649
+ ],
650
+ [
651
+ -24.54084014892578,
652
+ -43.015499114990234,
653
+ -47.237667083740234,
654
+ -31.39093017578125,
655
+ -16.20530128479004
656
+ ],
657
+ [
658
+ -26.530101776123047,
659
+ -46.71792221069336,
660
+ -50.609710693359375,
661
+ -33.21760940551758,
662
+ -17.194290161132812
663
+ ],
664
+ [
665
+ -28.196693420410156,
666
+ -50.25079345703125,
667
+ -53.16810989379883,
668
+ -35.350162506103516,
669
+ -18.18351173400879
670
+ ],
671
+ [
672
+ -29.735382080078125,
673
+ -52.41567611694336,
674
+ -55.63277053833008,
675
+ -36.781761169433594,
676
+ -19.38352394104004
677
+ ],
678
+ [
679
+ -31.10464859008789,
680
+ -55.84165573120117,
681
+ -57.993980407714844,
682
+ -38.734779357910156,
683
+ -20.228023529052734
684
+ ],
685
+ [
686
+ -32.75123977661133,
687
+ -59.13310241699219,
688
+ -60.02174758911133,
689
+ -40.260066986083984,
690
+ -21.13140296936035
691
+ ],
692
+ [
693
+ -34.169673919677734,
694
+ -62.30284881591797,
695
+ -62.26761245727539,
696
+ -42.105831146240234,
697
+ -21.92721939086914
698
+ ],
699
+ [
700
+ -35.710262298583984,
701
+ -65.6181869506836,
702
+ -64.38263702392578,
703
+ -43.33319091796875,
704
+ -22.71847152709961
705
+ ]
706
+ ],
707
+ "q99": [
708
+ [
709
+ 9.732619285583496,
710
+ 13.937226295471191,
711
+ 17.37236213684082,
712
+ 10.840984344482422,
713
+ 5.523990154266357
714
+ ],
715
+ [
716
+ 12.623029708862305,
717
+ 17.370437622070312,
718
+ 23.666194915771484,
719
+ 13.33433723449707,
720
+ 7.570279598236084
721
+ ],
722
+ [
723
+ 15.670611381530762,
724
+ 20.512556076049805,
725
+ 29.823646545410156,
726
+ 15.939998626708984,
727
+ 9.560501098632812
728
+ ],
729
+ [
730
+ 18.423181533813477,
731
+ 24.460220336914062,
732
+ 35.89162826538086,
733
+ 18.452611923217773,
734
+ 11.480408668518066
735
+ ],
736
+ [
737
+ 21.29176902770996,
738
+ 28.305740356445312,
739
+ 41.52001190185547,
740
+ 20.555370330810547,
741
+ 13.225448608398438
742
+ ],
743
+ [
744
+ 23.652997970581055,
745
+ 32.20855712890625,
746
+ 47.252647399902344,
747
+ 22.819040298461914,
748
+ 14.876164436340332
749
+ ],
750
+ [
751
+ 26.150699615478516,
752
+ 36.052101135253906,
753
+ 53.16713333129883,
754
+ 24.967885971069336,
755
+ 16.451913833618164
756
+ ],
757
+ [
758
+ 28.00914192199707,
759
+ 39.1674919128418,
760
+ 58.627197265625,
761
+ 27.356786727905273,
762
+ 18.208866119384766
763
+ ],
764
+ [
765
+ 29.93129539489746,
766
+ 42.67388153076172,
767
+ 64.21937561035156,
768
+ 29.275638580322266,
769
+ 19.7204647064209
770
+ ],
771
+ [
772
+ 32.04494094848633,
773
+ 45.604549407958984,
774
+ 69.7145004272461,
775
+ 31.53823471069336,
776
+ 21.124452590942383
777
+ ],
778
+ [
779
+ 33.8387451171875,
780
+ 48.230594635009766,
781
+ 74.79926300048828,
782
+ 33.60222625732422,
783
+ 22.14889907836914
784
+ ],
785
+ [
786
+ 35.145259857177734,
787
+ 51.10918045043945,
788
+ 79.912841796875,
789
+ 35.561344146728516,
790
+ 23.181623458862305
791
+ ],
792
+ [
793
+ 36.698463439941406,
794
+ 53.3221435546875,
795
+ 84.57654571533203,
796
+ 36.795257568359375,
797
+ 24.493980407714844
798
+ ],
799
+ [
800
+ 37.947391510009766,
801
+ 55.879974365234375,
802
+ 89.63903045654297,
803
+ 38.53077697753906,
804
+ 25.64667510986328
805
+ ],
806
+ [
807
+ 39.297698974609375,
808
+ 57.505733489990234,
809
+ 94.03701782226562,
810
+ 40.18893814086914,
811
+ 26.982807159423828
812
+ ],
813
+ [
814
+ 40.42583465576172,
815
+ 59.16443634033203,
816
+ 98.92611694335938,
817
+ 41.37394714355469,
818
+ 27.82160758972168
819
+ ]
820
+ ]
821
+ }
822
+ }
823
+ }
824
+ }
ckpt/checkpoint-10000/experiment_cfg/final_model_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "Gr00tN1d6",
3
+ "model_dtype": "bfloat16",
4
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
5
+ "backbone_model_type": "eagle",
6
+ "model_revision": null,
7
+ "tune_top_llm_layers": 4,
8
+ "backbone_embedding_dim": 2048,
9
+ "tune_llm": false,
10
+ "tune_visual": false,
11
+ "select_layer": 16,
12
+ "reproject_vision": false,
13
+ "use_flash_attention": true,
14
+ "load_bf16": true,
15
+ "collator_overwrite_image_inputs": false,
16
+ "eagle_collator": true,
17
+ "backbone_trainable_params_fp32": true,
18
+ "apply_sincos_state_encoding": true,
19
+ "use_relative_action": true,
20
+ "max_state_dim": 128,
21
+ "max_action_dim": 128,
22
+ "action_horizon": 50,
23
+ "hidden_size": 1024,
24
+ "input_embedding_dim": 1536,
25
+ "add_pos_embed": true,
26
+ "attn_dropout": 0.2,
27
+ "use_vlln": true,
28
+ "max_seq_len": 1024,
29
+ "use_alternate_vl_dit": true,
30
+ "attend_text_every_n_blocks": 2,
31
+ "diffusion_model_cfg": {
32
+ "attention_head_dim": 48,
33
+ "dropout": 0.2,
34
+ "final_dropout": true,
35
+ "interleave_self_attention": true,
36
+ "norm_type": "ada_norm",
37
+ "num_attention_heads": 32,
38
+ "num_layers": 32,
39
+ "output_dim": 1024,
40
+ "positional_embeddings": null
41
+ },
42
+ "num_inference_timesteps": 4,
43
+ "noise_beta_alpha": 1.5,
44
+ "noise_beta_beta": 1.0,
45
+ "noise_s": 0.999,
46
+ "num_timestep_buckets": 1000,
47
+ "tune_projector": true,
48
+ "tune_diffusion_model": true,
49
+ "tune_vlln": true,
50
+ "state_dropout_prob": 0.0,
51
+ "state_additive_noise_scale": 0.0,
52
+ "max_num_embodiments": 32
53
+ }
ckpt/checkpoint-10000/experiment_cfg/final_processor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
ckpt/checkpoint-10000/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:448c4cd0dfedf8cbf58af31eb51f0744acc7cb7ca14cce693e0a4391a3b76d28
3
+ size 4990120184
ckpt/checkpoint-10000/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16631189cc64c704275f0e14d15f883fbdfe27fd863cea1e3454b11d842e1b9e
3
+ size 4823190320
ckpt/checkpoint-10000/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
ckpt/checkpoint-10000/processor_config.json ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processor_class": "Gr00tN1d6Processor",
3
+ "processor_kwargs": {
4
+ "modality_configs": {
5
+ "behavior_r1_pro": {
6
+ "video": {
7
+ "delta_indices": [
8
+ 0
9
+ ],
10
+ "modality_keys": [
11
+ "observation.images.rgb.head_256_256",
12
+ "observation.images.rgb.left_wrist_256_256",
13
+ "observation.images.rgb.right_wrist_256_256"
14
+ ],
15
+ "sin_cos_embedding_keys": null,
16
+ "mean_std_embedding_keys": null,
17
+ "action_configs": null
18
+ },
19
+ "state": {
20
+ "delta_indices": [
21
+ 0
22
+ ],
23
+ "modality_keys": [
24
+ "robot_pos",
25
+ "robot_ori_cos",
26
+ "robot_ori_sin",
27
+ "robot_2d_ori",
28
+ "robot_2d_ori_cos",
29
+ "robot_2d_ori_sin",
30
+ "robot_lin_vel",
31
+ "robot_ang_vel",
32
+ "arm_left_qpos",
33
+ "arm_left_qpos_sin",
34
+ "arm_left_qpos_cos",
35
+ "eef_left_pos",
36
+ "eef_left_quat",
37
+ "gripper_left_qpos",
38
+ "arm_right_qpos",
39
+ "arm_right_qpos_sin",
40
+ "arm_right_qpos_cos",
41
+ "eef_right_pos",
42
+ "eef_right_quat",
43
+ "gripper_right_qpos",
44
+ "trunk_qpos"
45
+ ],
46
+ "sin_cos_embedding_keys": null,
47
+ "mean_std_embedding_keys": null,
48
+ "action_configs": null
49
+ },
50
+ "action": {
51
+ "delta_indices": [
52
+ 0,
53
+ 1,
54
+ 2,
55
+ 3,
56
+ 4,
57
+ 5,
58
+ 6,
59
+ 7,
60
+ 8,
61
+ 9,
62
+ 10,
63
+ 11,
64
+ 12,
65
+ 13,
66
+ 14,
67
+ 15,
68
+ 16,
69
+ 17,
70
+ 18,
71
+ 19,
72
+ 20,
73
+ 21,
74
+ 22,
75
+ 23,
76
+ 24,
77
+ 25,
78
+ 26,
79
+ 27,
80
+ 28,
81
+ 29,
82
+ 30,
83
+ 31
84
+ ],
85
+ "modality_keys": [
86
+ "base",
87
+ "torso",
88
+ "left_arm",
89
+ "left_gripper",
90
+ "right_arm",
91
+ "right_gripper"
92
+ ],
93
+ "sin_cos_embedding_keys": null,
94
+ "mean_std_embedding_keys": null,
95
+ "action_configs": [
96
+ {
97
+ "rep": "ABSOLUTE",
98
+ "type": "NON_EEF",
99
+ "format": "DEFAULT",
100
+ "state_key": null
101
+ },
102
+ {
103
+ "rep": "RELATIVE",
104
+ "type": "NON_EEF",
105
+ "format": "DEFAULT",
106
+ "state_key": "trunk_qpos"
107
+ },
108
+ {
109
+ "rep": "RELATIVE",
110
+ "type": "NON_EEF",
111
+ "format": "DEFAULT",
112
+ "state_key": "arm_left_qpos"
113
+ },
114
+ {
115
+ "rep": "ABSOLUTE",
116
+ "type": "NON_EEF",
117
+ "format": "DEFAULT",
118
+ "state_key": null
119
+ },
120
+ {
121
+ "rep": "RELATIVE",
122
+ "type": "NON_EEF",
123
+ "format": "DEFAULT",
124
+ "state_key": "arm_right_qpos"
125
+ },
126
+ {
127
+ "rep": "ABSOLUTE",
128
+ "type": "NON_EEF",
129
+ "format": "DEFAULT",
130
+ "state_key": null
131
+ }
132
+ ]
133
+ },
134
+ "language": {
135
+ "delta_indices": [
136
+ 0
137
+ ],
138
+ "modality_keys": [
139
+ "annotation.human.coarse_action"
140
+ ],
141
+ "sin_cos_embedding_keys": null,
142
+ "mean_std_embedding_keys": null,
143
+ "action_configs": null
144
+ }
145
+ },
146
+ "gr1": {
147
+ "video": {
148
+ "delta_indices": [
149
+ 0
150
+ ],
151
+ "modality_keys": [
152
+ "ego_view_bg_crop_pad_res256_freq20"
153
+ ],
154
+ "sin_cos_embedding_keys": null,
155
+ "mean_std_embedding_keys": null,
156
+ "action_configs": null
157
+ },
158
+ "state": {
159
+ "delta_indices": [
160
+ 0
161
+ ],
162
+ "modality_keys": [
163
+ "left_arm",
164
+ "right_arm",
165
+ "left_hand",
166
+ "right_hand",
167
+ "waist"
168
+ ],
169
+ "sin_cos_embedding_keys": [
170
+ "left_arm",
171
+ "right_arm",
172
+ "left_hand",
173
+ "right_hand",
174
+ "waist"
175
+ ],
176
+ "mean_std_embedding_keys": null,
177
+ "action_configs": null
178
+ },
179
+ "action": {
180
+ "delta_indices": [
181
+ 0,
182
+ 1,
183
+ 2,
184
+ 3,
185
+ 4,
186
+ 5,
187
+ 6,
188
+ 7,
189
+ 8,
190
+ 9,
191
+ 10,
192
+ 11,
193
+ 12,
194
+ 13,
195
+ 14,
196
+ 15
197
+ ],
198
+ "modality_keys": [
199
+ "left_arm",
200
+ "right_arm",
201
+ "left_hand",
202
+ "right_hand",
203
+ "waist"
204
+ ],
205
+ "sin_cos_embedding_keys": null,
206
+ "mean_std_embedding_keys": null,
207
+ "action_configs": [
208
+ {
209
+ "rep": "RELATIVE",
210
+ "type": "NON_EEF",
211
+ "format": "DEFAULT",
212
+ "state_key": null
213
+ },
214
+ {
215
+ "rep": "RELATIVE",
216
+ "type": "NON_EEF",
217
+ "format": "DEFAULT",
218
+ "state_key": null
219
+ },
220
+ {
221
+ "rep": "RELATIVE",
222
+ "type": "NON_EEF",
223
+ "format": "DEFAULT",
224
+ "state_key": null
225
+ },
226
+ {
227
+ "rep": "RELATIVE",
228
+ "type": "NON_EEF",
229
+ "format": "DEFAULT",
230
+ "state_key": null
231
+ },
232
+ {
233
+ "rep": "ABSOLUTE",
234
+ "type": "NON_EEF",
235
+ "format": "DEFAULT",
236
+ "state_key": null
237
+ }
238
+ ]
239
+ },
240
+ "language": {
241
+ "delta_indices": [
242
+ 0
243
+ ],
244
+ "modality_keys": [
245
+ "task"
246
+ ],
247
+ "sin_cos_embedding_keys": null,
248
+ "mean_std_embedding_keys": null,
249
+ "action_configs": null
250
+ }
251
+ },
252
+ "robocasa_panda_omron": {
253
+ "video": {
254
+ "delta_indices": [
255
+ 0
256
+ ],
257
+ "modality_keys": [
258
+ "res256_image_side_0",
259
+ "res256_image_side_1",
260
+ "res256_image_wrist_0"
261
+ ],
262
+ "sin_cos_embedding_keys": null,
263
+ "mean_std_embedding_keys": null,
264
+ "action_configs": null
265
+ },
266
+ "state": {
267
+ "delta_indices": [
268
+ 0
269
+ ],
270
+ "modality_keys": [
271
+ "end_effector_position_relative",
272
+ "end_effector_rotation_relative",
273
+ "gripper_qpos",
274
+ "base_position",
275
+ "base_rotation"
276
+ ],
277
+ "sin_cos_embedding_keys": null,
278
+ "mean_std_embedding_keys": null,
279
+ "action_configs": null
280
+ },
281
+ "action": {
282
+ "delta_indices": [
283
+ 0,
284
+ 1,
285
+ 2,
286
+ 3,
287
+ 4,
288
+ 5,
289
+ 6,
290
+ 7,
291
+ 8,
292
+ 9,
293
+ 10,
294
+ 11,
295
+ 12,
296
+ 13,
297
+ 14,
298
+ 15
299
+ ],
300
+ "modality_keys": [
301
+ "end_effector_position",
302
+ "end_effector_rotation",
303
+ "gripper_close",
304
+ "base_motion",
305
+ "control_mode"
306
+ ],
307
+ "sin_cos_embedding_keys": null,
308
+ "mean_std_embedding_keys": null,
309
+ "action_configs": [
310
+ {
311
+ "rep": "ABSOLUTE",
312
+ "type": "NON_EEF",
313
+ "format": "DEFAULT",
314
+ "state_key": null
315
+ },
316
+ {
317
+ "rep": "ABSOLUTE",
318
+ "type": "NON_EEF",
319
+ "format": "DEFAULT",
320
+ "state_key": null
321
+ },
322
+ {
323
+ "rep": "ABSOLUTE",
324
+ "type": "NON_EEF",
325
+ "format": "DEFAULT",
326
+ "state_key": null
327
+ },
328
+ {
329
+ "rep": "ABSOLUTE",
330
+ "type": "NON_EEF",
331
+ "format": "DEFAULT",
332
+ "state_key": null
333
+ },
334
+ {
335
+ "rep": "ABSOLUTE",
336
+ "type": "NON_EEF",
337
+ "format": "DEFAULT",
338
+ "state_key": null
339
+ }
340
+ ]
341
+ },
342
+ "language": {
343
+ "delta_indices": [
344
+ 0
345
+ ],
346
+ "modality_keys": [
347
+ "annotation.human.action.task_description"
348
+ ],
349
+ "sin_cos_embedding_keys": null,
350
+ "mean_std_embedding_keys": null,
351
+ "action_configs": null
352
+ }
353
+ },
354
+ "new_embodiment": {
355
+ "video": {
356
+ "delta_indices": [
357
+ 0
358
+ ],
359
+ "modality_keys": [
360
+ "front",
361
+ "wrist"
362
+ ],
363
+ "sin_cos_embedding_keys": null,
364
+ "mean_std_embedding_keys": null,
365
+ "action_configs": null
366
+ },
367
+ "state": {
368
+ "delta_indices": [
369
+ 0
370
+ ],
371
+ "modality_keys": [
372
+ "single_arm",
373
+ "gripper"
374
+ ],
375
+ "sin_cos_embedding_keys": null,
376
+ "mean_std_embedding_keys": null,
377
+ "action_configs": null
378
+ },
379
+ "action": {
380
+ "delta_indices": [
381
+ 0,
382
+ 1,
383
+ 2,
384
+ 3,
385
+ 4,
386
+ 5,
387
+ 6,
388
+ 7,
389
+ 8,
390
+ 9,
391
+ 10,
392
+ 11,
393
+ 12,
394
+ 13,
395
+ 14,
396
+ 15
397
+ ],
398
+ "modality_keys": [
399
+ "single_arm",
400
+ "gripper"
401
+ ],
402
+ "sin_cos_embedding_keys": null,
403
+ "mean_std_embedding_keys": null,
404
+ "action_configs": [
405
+ {
406
+ "rep": "RELATIVE",
407
+ "type": "NON_EEF",
408
+ "format": "DEFAULT",
409
+ "state_key": null
410
+ },
411
+ {
412
+ "rep": "ABSOLUTE",
413
+ "type": "NON_EEF",
414
+ "format": "DEFAULT",
415
+ "state_key": null
416
+ }
417
+ ]
418
+ },
419
+ "language": {
420
+ "delta_indices": [
421
+ 0
422
+ ],
423
+ "modality_keys": [
424
+ "annotation.human.task_description"
425
+ ],
426
+ "sin_cos_embedding_keys": null,
427
+ "mean_std_embedding_keys": null,
428
+ "action_configs": null
429
+ }
430
+ }
431
+ },
432
+ "image_crop_size": null,
433
+ "image_target_size": null,
434
+ "use_albumentations": true,
435
+ "random_rotation_angle": null,
436
+ "color_jitter_params": {
437
+ "brightness": 0.3,
438
+ "contrast": 0.4,
439
+ "saturation": 0.5,
440
+ "hue": 0.08
441
+ },
442
+ "shortest_image_edge": 256,
443
+ "crop_fraction": 0.95,
444
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
445
+ "model_type": "eagle",
446
+ "formalize_language": true,
447
+ "max_state_dim": 128,
448
+ "max_action_dim": 128,
449
+ "max_action_horizon": 50,
450
+ "use_percentiles": false,
451
+ "clip_outliers": true,
452
+ "apply_sincos_state_encoding": true,
453
+ "use_relative_action": true
454
+ }
455
+ }
ckpt/checkpoint-10000/statistics.json ADDED
The diff for this file is too large to render. See raw diff
 
ckpt/checkpoint-10000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
ckpt/checkpoint-10000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f23d9dbb04f183208d38f860d86935f3415285bb2ac58730eb13ce55056f3803
3
+ size 5713
ckpt/checkpoint-10000/wandb_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"project": "finetune-gr00t-n1d6", "run_id": "so101_finetune"}
configs_for_gr00t/modality.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "state": {
3
+ "single_arm": {
4
+ "start": 0,
5
+ "end": 5
6
+ },
7
+ "gripper": {
8
+ "start": 5,
9
+ "end": 6
10
+ }
11
+ },
12
+ "action": {
13
+ "single_arm": {
14
+ "start": 0,
15
+ "end": 5
16
+ },
17
+ "gripper": {
18
+ "start": 5,
19
+ "end": 6
20
+ }
21
+ },
22
+ "video": {
23
+ "front": {
24
+ "original_key": "observation.images.front"
25
+ },
26
+ "wrist": {
27
+ "original_key": "observation.images.wrist"
28
+ }
29
+ },
30
+ "annotation": {
31
+ "human.task_description": {
32
+ "original_key": "task_index"
33
+ }
34
+ }
35
+ }
36
+
configs_for_gr00t/so101_config.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gr00t.configs.data.embodiment_configs import register_modality_config
2
+ from gr00t.data.embodiment_tags import EmbodimentTag
3
+ from gr00t.data.types import (
4
+ ActionConfig,
5
+ ActionFormat,
6
+ ActionRepresentation,
7
+ ActionType,
8
+ ModalityConfig,
9
+ )
10
+
11
+
12
+ so101_config = {
13
+ "video": ModalityConfig(
14
+ delta_indices=[0],
15
+ modality_keys=["front", "wrist"],
16
+ ),
17
+ "state": ModalityConfig(
18
+ delta_indices=[0],
19
+ modality_keys=[
20
+ "single_arm",
21
+ "gripper",
22
+ ],
23
+ ),
24
+ "action": ModalityConfig(
25
+ delta_indices=[
26
+ 0,
27
+ 1,
28
+ 2,
29
+ 3,
30
+ 4,
31
+ 5,
32
+ 6,
33
+ 7,
34
+ 8,
35
+ 9,
36
+ 10,
37
+ 11,
38
+ 12,
39
+ 13,
40
+ 14,
41
+ 15,
42
+ ],
43
+ modality_keys=[
44
+ "single_arm",
45
+ "gripper",
46
+ ],
47
+ action_configs=[
48
+ ActionConfig(
49
+ rep=ActionRepresentation.RELATIVE,
50
+ type=ActionType.NON_EEF,
51
+ format=ActionFormat.DEFAULT,
52
+ ),
53
+ ActionConfig(
54
+ rep=ActionRepresentation.ABSOLUTE,
55
+ type=ActionType.NON_EEF,
56
+ format=ActionFormat.DEFAULT,
57
+ ),
58
+ ],
59
+ ),
60
+ "language": ModalityConfig(
61
+ delta_indices=[0],
62
+ modality_keys=["annotation.human.task_description"],
63
+ ),
64
+ }
65
+
66
+ register_modality_config(so101_config, embodiment_tag=EmbodimentTag.NEW_EMBODIMENT)
67
+
scripts/finetune.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ set -x -e
2
+
3
+ export NUM_GPUS=1
4
+
5
+ # torchrun --nproc_per_node=$NUM_GPUS --master_port=29500 \
6
+ CUDA_VISIBLE_DEVICES=0 python \
7
+ gr00t/experiment/launch_finetune.py \
8
+ --base_model_path nvidia/GR00T-N1.6-3B \
9
+ --dataset_path dataset/so101_pick_orange_v2.1 \
10
+ --modality_config_path configs/so101_config.py \
11
+ --embodiment_tag NEW_EMBODIMENT \
12
+ --num_gpus $NUM_GPUS \
13
+ --output_dir outputs/so101_finetune \
14
+ --save_steps 1000 \
15
+ --save_total_limit 5 \
16
+ --max_steps 10000 \
17
+ --warmup_ratio 0.05 \
18
+ --weight_decay 1e-5 \
19
+ --learning_rate 1e-4 \
20
+ --global_batch_size 32 \
21
+ --color_jitter_params brightness 0.3 contrast 0.4 saturation 0.5 hue 0.08 \
22
+ --use_wandb \
23
+ --dataloader_num_workers 4
24
+
25
+
scripts/gr00t_server.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ python gr00t/eval/run_gr00t_server.py \
2
+ --embodiment-tag NEW_EMBODIMENT \
3
+ --model-path ckpt/checkpoint-10000 \
4
+ --modality-config-path configs/modality.json \
5
+ --device cuda:0 \
6
+ --host 0.0.0.0 \
7
+ --port 5555 \
8
+ --strict
scripts/leisaac_client.sh ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python scripts/evaluation/policy_inference.py \
2
+ --task=LeIsaac-SO101-PickOrange-v0 \
3
+ --eval_rounds=10 \
4
+ --policy_type=gr00tn1.6 \
5
+ --policy_host=localhost \
6
+ --policy_port=5555 \
7
+ --policy_timeout_ms=5000 \
8
+ --policy_action_horizon=16 \
9
+ --policy_language_instruction="Pick up the orange and place it on the plate" \
10
+ --device=cuda \
11
+ --enable_cameras