Robotics
Safetensors
Gr00tN1d6
youliangt commited on
Commit
6ccd4b8
·
verified ·
1 Parent(s): 20232f6

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - nvidia/PhysicalAI-Robotics-GR00T-X-Embodiment-Sim
4
+ - IPEC-COMMUNITY/droid_lerobot
5
+ tags:
6
+ - robotics
7
+ base_model:
8
+ - nvidia/GR00T-N1.6-3B
9
+ ---
10
+
11
+ <div align="center">
12
+ <a href="https://github.com/NVIDIA/Isaac-GR00T">
13
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/67b8da81d01134f89899b4a7/8bFQa2ZIGCsOQQ2ho2N_U.png">
14
+ </a>
15
+ <div align="center">
16
+ <a href="https://github.com/NVIDIA/Isaac-GR00T">
17
+ <img src="https://img.shields.io/badge/GitHub-grey?logo=GitHub" alt="GitHub Badge">
18
+ </a>
19
+ <a href="https://developer.nvidia.com/isaac/gr00t">
20
+ <img src="https://img.shields.io/badge/Website-green" alt="Website Badge">
21
+ </a>
22
+ <!-- <a href=""">
23
+ <img src="https://img.shields.io/badge/Project%20Page-blue?style=plastic" alt="Project Page Badge">
24
+ </a>
25
+ <a href="">
26
+ <img src="https://img.shields.io/badge/Research_Blog-black?style=flat" alt="Research Blog Badge">
27
+ </a>
28
+ <a href="">
29
+ <img src="https://img.shields.io/badge/Dataset-Overview-brightgreen?logo=googleforms" alt="Research Blog Badge">
30
+ </a>
31
+ -->
32
+ </div>
33
+ </div>
34
+
35
+ # GR00T-N1.6-DROID
36
+
37
+ This is a finetuned model on the [DROID dataset](https://droid-dataset.github.io/), based on the `GR00T-N1.6-3B` model.
38
+
39
+ Refer to [Isaac-GR00T](https://github.com/NVIDIA/Isaac-GR00T) examples for more details.
40
+
41
+ # Usage
42
+
43
+ ```bash
44
+ uv run python gr00t/eval/run_gr00t_server.py \
45
+ --embodiment-tag OXE_DROID \
46
+ --use_sim_policy_wrapper \
47
+ --model-path=nvidia/GR00T-N1.6-DROID
48
+ ```
49
+
50
+ # Citation
51
+ ```bibtex
52
+ @misc{nvidia2025gr00tn1openfoundation,
53
+ title={GR00T N1: An Open Foundation Model for Generalist Humanoid Robots},
54
+ author={NVIDIA and Johan Bjorck and Fernando Castañeda and Nikita Cherniadev and Xingye Da and Runyu Ding and Linxi "Jim" Fan and Yu Fang and Dieter Fox and Fengyuan Hu and Spencer Huang and Joel Jang and Zhenyu Jiang and Jan Kautz and Kaushil Kundalia and Lawrence Lao and Zhiqi Li and Zongyu Lin and Kevin Lin and Guilin Liu and Edith Llontop and Loic Magne and Ajay Mandlekar and Avnish Narayan and Soroush Nasiriany and Scott Reed and You Liang Tan and Guanzhi Wang and Zu Wang and Jing Wang and Qi Wang and Jiannan Xiang and Yuqi Xie and Yinzhen Xu and Zhenjia Xu and Seonghyeon Ye and Zhiding Yu and Ao Zhang and Hao Zhang and Yizhou Zhao and Ruijie Zheng and Yuke Zhu},
55
+ year={2025},
56
+ eprint={2503.14734},
57
+ archivePrefix={arXiv},
58
+ primaryClass={cs.RO},
59
+ }
60
+ ```
config.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_horizon": 50,
3
+ "add_pos_embed": true,
4
+ "apply_sincos_state_encoding": true,
5
+ "architectures": [
6
+ "Gr00tN1d6"
7
+ ],
8
+ "attn_dropout": 0.2,
9
+ "attn_implementation": null,
10
+ "backbone_embedding_dim": 2048,
11
+ "backbone_model_type": "eagle",
12
+ "backbone_trainable_params_fp32": true,
13
+ "collator_overwrite_image_inputs": false,
14
+ "color_jitter_params": {
15
+ "brightness": 0.1,
16
+ "contrast": 0.1,
17
+ "hue": 0.1,
18
+ "saturation": 0.1
19
+ },
20
+ "crop_fraction": 0.95,
21
+ "diffusion_model_cfg": {
22
+ "attention_head_dim": 48,
23
+ "dropout": 0.2,
24
+ "final_dropout": true,
25
+ "interleave_self_attention": true,
26
+ "norm_type": "ada_norm",
27
+ "num_attention_heads": 32,
28
+ "num_layers": 32,
29
+ "output_dim": 1024,
30
+ "positional_embeddings": null
31
+ },
32
+ "eagle_collator": true,
33
+ "formalize_language": true,
34
+ "gemma_collator": false,
35
+ "hidden_size": 1024,
36
+ "image_crop_size": null,
37
+ "image_target_size": null,
38
+ "input_embedding_dim": 1536,
39
+ "load_bf16": true,
40
+ "max_action_dim": 128,
41
+ "max_num_embodiments": 32,
42
+ "max_seq_len": 1024,
43
+ "max_state_dim": 128,
44
+ "model_dtype": "bfloat16",
45
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
46
+ "model_type": "Gr00tN1d6",
47
+ "noise_beta_alpha": 1.5,
48
+ "noise_beta_beta": 1.0,
49
+ "noise_s": 0.999,
50
+ "num_inference_timesteps": 8,
51
+ "num_timestep_buckets": 1000,
52
+ "random_rotation_angle": null,
53
+ "reproject_vision": false,
54
+ "select_layer": 16,
55
+ "shortest_image_edge": 256,
56
+ "state_dropout_prob": 0.2,
57
+ "torch_dtype": "bfloat16",
58
+ "transformers_version": "4.51.3",
59
+ "tune_diffusion_model": true,
60
+ "tune_llm": false,
61
+ "tune_projector": true,
62
+ "tune_top_llm_layers": 4,
63
+ "tune_visual": false,
64
+ "tune_vlln": true,
65
+ "use_albumentations_transforms": true,
66
+ "use_alternate_vl_dit": true,
67
+ "use_flash_attention": true,
68
+ "use_percentiles": false,
69
+ "use_relative_action": true,
70
+ "use_vlln": true
71
+ }
embodiment_id.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "robocasa_panda_omron": 13,
3
+ "oxe_droid": 16,
4
+ "oxe_fractal": 18,
5
+ "oxe_language_table": 19,
6
+ "oxe_bridge": 20,
7
+ "unknown": 22,
8
+ "gr1_unified": 20,
9
+ "agibot": 26,
10
+ "oxe_mutex": 28,
11
+ "oxe_roboset": 29,
12
+ "oxe_plex": 30,
13
+ "dream": 31,
14
+ "language_table_sim": 7,
15
+ "gr1_isaac": 0,
16
+ "xdof": 23,
17
+ "xdof_oss_data": 27,
18
+ "xdof_h16": 23,
19
+ "sim_behavior_r1_pro": 24,
20
+ "unitree_g1_full_body_with_waist_height_nav_cmd": 25,
21
+ "unitree_g1_full_body_with_waist_height_nav_cmd_sim": 8,
22
+ "unitree_g1_full_body_with_waist_height_nav_cmd_new": 9,
23
+ "simpler_env_google": 0,
24
+ "simpler_env_widowx": 1,
25
+ "libero_sim": 2,
26
+ "droid_sim": 3,
27
+ "real_r1_pro_sharpa": 8,
28
+ "r1_pro": 27,
29
+ "r1_pro_single-view": 27,
30
+ "new_embodiment": 10,
31
+ "so100_2rgb": 6,
32
+ "so100_3rgb": 6,
33
+ "robomind_agilex_3rgb": 4,
34
+ "robomind_franka_1rgb": 5,
35
+ "robomind_franka_3rgb": 5,
36
+ "robomind_tienkung_gello_1rgb": 11,
37
+ "robomind_ur_1rgb": 12,
38
+ "robomind_tienkung_xsens_1rgb": 13,
39
+ "molmoact_franka_3rgb": 14,
40
+ "galaxea_r1_4rgb": 15
41
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0933dd66f5454611671bc5b7a83a0107e8024cfddc96948857be98281c43fceb
3
+ size 4991094616
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:102646302b58baa606ff4f912598b6d659242139e0e20d61b2cec21e754bae5a
3
+ size 1582283096
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
processor_config.json ADDED
@@ -0,0 +1,991 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processor_class": "Gr00tN1d6Processor",
3
+ "processor_kwargs": {
4
+ "modality_configs": {
5
+ "agibot": {
6
+ "video": {
7
+ "delta_indices": [
8
+ 0
9
+ ],
10
+ "modality_keys": [
11
+ "top_head_pad_res256_freq10",
12
+ "hand_left_pad_res256_freq10",
13
+ "hand_right_pad_res256_freq10"
14
+ ],
15
+ "sin_cos_embedding_keys": null,
16
+ "mean_std_embedding_keys": null,
17
+ "action_configs": null
18
+ },
19
+ "state": {
20
+ "delta_indices": [
21
+ 0
22
+ ],
23
+ "modality_keys": [
24
+ "left_arm_joint_position",
25
+ "right_arm_joint_position",
26
+ "left_effector_position",
27
+ "right_effector_position",
28
+ "head_position",
29
+ "waist_pitch",
30
+ "waist_lift"
31
+ ],
32
+ "sin_cos_embedding_keys": [
33
+ "left_arm_joint_position",
34
+ "right_arm_joint_position",
35
+ "head_position",
36
+ "waist_pitch"
37
+ ],
38
+ "mean_std_embedding_keys": null,
39
+ "action_configs": null
40
+ },
41
+ "action": {
42
+ "delta_indices": [
43
+ 0,
44
+ 1,
45
+ 2,
46
+ 3,
47
+ 4,
48
+ 5,
49
+ 6,
50
+ 7,
51
+ 8,
52
+ 9,
53
+ 10,
54
+ 11,
55
+ 12,
56
+ 13,
57
+ 14,
58
+ 15,
59
+ 16,
60
+ 17,
61
+ 18,
62
+ 19,
63
+ 20,
64
+ 21,
65
+ 22,
66
+ 23,
67
+ 24,
68
+ 25,
69
+ 26,
70
+ 27,
71
+ 28,
72
+ 29,
73
+ 30,
74
+ 31,
75
+ 32,
76
+ 33,
77
+ 34,
78
+ 35,
79
+ 36,
80
+ 37,
81
+ 38,
82
+ 39,
83
+ 40,
84
+ 41,
85
+ 42,
86
+ 43,
87
+ 44,
88
+ 45,
89
+ 46,
90
+ 47,
91
+ 48,
92
+ 49
93
+ ],
94
+ "modality_keys": [
95
+ "left_arm_joint_position",
96
+ "right_arm_joint_position",
97
+ "left_effector_position",
98
+ "right_effector_position",
99
+ "head_position",
100
+ "waist_pitch",
101
+ "waist_lift",
102
+ "robot_velocity"
103
+ ],
104
+ "sin_cos_embedding_keys": null,
105
+ "mean_std_embedding_keys": null,
106
+ "action_configs": [
107
+ {
108
+ "rep": "RELATIVE",
109
+ "type": "NON_EEF",
110
+ "format": "DEFAULT",
111
+ "state_key": null
112
+ },
113
+ {
114
+ "rep": "RELATIVE",
115
+ "type": "NON_EEF",
116
+ "format": "DEFAULT",
117
+ "state_key": null
118
+ },
119
+ {
120
+ "rep": "ABSOLUTE",
121
+ "type": "NON_EEF",
122
+ "format": "DEFAULT",
123
+ "state_key": null
124
+ },
125
+ {
126
+ "rep": "ABSOLUTE",
127
+ "type": "NON_EEF",
128
+ "format": "DEFAULT",
129
+ "state_key": null
130
+ },
131
+ {
132
+ "rep": "ABSOLUTE",
133
+ "type": "NON_EEF",
134
+ "format": "DEFAULT",
135
+ "state_key": null
136
+ },
137
+ {
138
+ "rep": "ABSOLUTE",
139
+ "type": "NON_EEF",
140
+ "format": "DEFAULT",
141
+ "state_key": null
142
+ },
143
+ {
144
+ "rep": "ABSOLUTE",
145
+ "type": "NON_EEF",
146
+ "format": "DEFAULT",
147
+ "state_key": null
148
+ },
149
+ {
150
+ "rep": "ABSOLUTE",
151
+ "type": "NON_EEF",
152
+ "format": "DEFAULT",
153
+ "state_key": null
154
+ }
155
+ ]
156
+ },
157
+ "language": {
158
+ "delta_indices": [
159
+ 0
160
+ ],
161
+ "modality_keys": [
162
+ "annotation.language.action_text"
163
+ ],
164
+ "sin_cos_embedding_keys": null,
165
+ "mean_std_embedding_keys": null,
166
+ "action_configs": null
167
+ }
168
+ },
169
+ "sim_behavior_r1_pro": {
170
+ "video": {
171
+ "delta_indices": [
172
+ 0
173
+ ],
174
+ "modality_keys": [
175
+ "observation.images.rgb.head_256_256",
176
+ "observation.images.rgb.left_wrist_256_256",
177
+ "observation.images.rgb.right_wrist_256_256"
178
+ ],
179
+ "sin_cos_embedding_keys": null,
180
+ "mean_std_embedding_keys": null,
181
+ "action_configs": null
182
+ },
183
+ "state": {
184
+ "delta_indices": [
185
+ 0
186
+ ],
187
+ "modality_keys": [
188
+ "robot_pos",
189
+ "robot_ori_cos",
190
+ "robot_ori_sin",
191
+ "robot_2d_ori",
192
+ "robot_2d_ori_cos",
193
+ "robot_2d_ori_sin",
194
+ "robot_lin_vel",
195
+ "robot_ang_vel",
196
+ "arm_left_qpos",
197
+ "arm_left_qpos_sin",
198
+ "arm_left_qpos_cos",
199
+ "eef_left_pos",
200
+ "eef_left_quat",
201
+ "gripper_left_qpos",
202
+ "arm_right_qpos",
203
+ "arm_right_qpos_sin",
204
+ "arm_right_qpos_cos",
205
+ "eef_right_pos",
206
+ "eef_right_quat",
207
+ "gripper_right_qpos",
208
+ "trunk_qpos"
209
+ ],
210
+ "sin_cos_embedding_keys": null,
211
+ "mean_std_embedding_keys": null,
212
+ "action_configs": null
213
+ },
214
+ "action": {
215
+ "delta_indices": [
216
+ 0,
217
+ 1,
218
+ 2,
219
+ 3,
220
+ 4,
221
+ 5,
222
+ 6,
223
+ 7,
224
+ 8,
225
+ 9,
226
+ 10,
227
+ 11,
228
+ 12,
229
+ 13,
230
+ 14,
231
+ 15,
232
+ 16,
233
+ 17,
234
+ 18,
235
+ 19,
236
+ 20,
237
+ 21,
238
+ 22,
239
+ 23,
240
+ 24,
241
+ 25,
242
+ 26,
243
+ 27,
244
+ 28,
245
+ 29,
246
+ 30,
247
+ 31
248
+ ],
249
+ "modality_keys": [
250
+ "base",
251
+ "torso",
252
+ "left_arm",
253
+ "left_gripper",
254
+ "right_arm",
255
+ "right_gripper"
256
+ ],
257
+ "sin_cos_embedding_keys": null,
258
+ "mean_std_embedding_keys": null,
259
+ "action_configs": [
260
+ {
261
+ "rep": "ABSOLUTE",
262
+ "type": "NON_EEF",
263
+ "format": "DEFAULT",
264
+ "state_key": null
265
+ },
266
+ {
267
+ "rep": "RELATIVE",
268
+ "type": "NON_EEF",
269
+ "format": "DEFAULT",
270
+ "state_key": "trunk_qpos"
271
+ },
272
+ {
273
+ "rep": "RELATIVE",
274
+ "type": "NON_EEF",
275
+ "format": "DEFAULT",
276
+ "state_key": "arm_left_qpos"
277
+ },
278
+ {
279
+ "rep": "ABSOLUTE",
280
+ "type": "NON_EEF",
281
+ "format": "DEFAULT",
282
+ "state_key": null
283
+ },
284
+ {
285
+ "rep": "RELATIVE",
286
+ "type": "NON_EEF",
287
+ "format": "DEFAULT",
288
+ "state_key": "arm_right_qpos"
289
+ },
290
+ {
291
+ "rep": "ABSOLUTE",
292
+ "type": "NON_EEF",
293
+ "format": "DEFAULT",
294
+ "state_key": null
295
+ }
296
+ ]
297
+ },
298
+ "language": {
299
+ "delta_indices": [
300
+ 0
301
+ ],
302
+ "modality_keys": [
303
+ "annotation.human.coarse_action"
304
+ ],
305
+ "sin_cos_embedding_keys": null,
306
+ "mean_std_embedding_keys": null,
307
+ "action_configs": null
308
+ }
309
+ },
310
+ "xdof": {
311
+ "video": {
312
+ "delta_indices": [
313
+ 0
314
+ ],
315
+ "modality_keys": [
316
+ "left_camera-images-rgb_320_240",
317
+ "top_camera-images-rgb_320_240",
318
+ "right_camera-images-rgb_320_240"
319
+ ],
320
+ "sin_cos_embedding_keys": null,
321
+ "mean_std_embedding_keys": null,
322
+ "action_configs": null
323
+ },
324
+ "state": {
325
+ "delta_indices": [
326
+ 0
327
+ ],
328
+ "modality_keys": [
329
+ "gripper_pos_obs_left",
330
+ "gripper_pos_obs_right",
331
+ "joint_pos_obs_left",
332
+ "joint_pos_obs_right"
333
+ ],
334
+ "sin_cos_embedding_keys": null,
335
+ "mean_std_embedding_keys": null,
336
+ "action_configs": null
337
+ },
338
+ "action": {
339
+ "delta_indices": [
340
+ 0,
341
+ 1,
342
+ 2,
343
+ 3,
344
+ 4,
345
+ 5,
346
+ 6,
347
+ 7,
348
+ 8,
349
+ 9,
350
+ 10,
351
+ 11,
352
+ 12,
353
+ 13,
354
+ 14,
355
+ 15,
356
+ 16,
357
+ 17,
358
+ 18,
359
+ 19,
360
+ 20,
361
+ 21,
362
+ 22,
363
+ 23,
364
+ 24,
365
+ 25,
366
+ 26,
367
+ 27,
368
+ 28,
369
+ 29,
370
+ 30,
371
+ 31,
372
+ 32,
373
+ 33,
374
+ 34,
375
+ 35,
376
+ 36,
377
+ 37,
378
+ 38,
379
+ 39,
380
+ 40,
381
+ 41,
382
+ 42,
383
+ 43,
384
+ 44,
385
+ 45,
386
+ 46,
387
+ 47,
388
+ 48,
389
+ 49
390
+ ],
391
+ "modality_keys": [
392
+ "gripper_pos_action_left",
393
+ "gripper_pos_action_right",
394
+ "joint_pos_action_left",
395
+ "joint_pos_action_right"
396
+ ],
397
+ "sin_cos_embedding_keys": null,
398
+ "mean_std_embedding_keys": null,
399
+ "action_configs": [
400
+ {
401
+ "rep": "ABSOLUTE",
402
+ "type": "NON_EEF",
403
+ "format": "DEFAULT",
404
+ "state_key": "gripper_pos_obs_left"
405
+ },
406
+ {
407
+ "rep": "ABSOLUTE",
408
+ "type": "NON_EEF",
409
+ "format": "DEFAULT",
410
+ "state_key": "gripper_pos_obs_right"
411
+ },
412
+ {
413
+ "rep": "RELATIVE",
414
+ "type": "NON_EEF",
415
+ "format": "DEFAULT",
416
+ "state_key": "joint_pos_obs_left"
417
+ },
418
+ {
419
+ "rep": "RELATIVE",
420
+ "type": "NON_EEF",
421
+ "format": "DEFAULT",
422
+ "state_key": "joint_pos_obs_right"
423
+ }
424
+ ]
425
+ },
426
+ "language": {
427
+ "delta_indices": [
428
+ 0
429
+ ],
430
+ "modality_keys": [
431
+ "annotation.task"
432
+ ],
433
+ "sin_cos_embedding_keys": null,
434
+ "mean_std_embedding_keys": null,
435
+ "action_configs": null
436
+ }
437
+ },
438
+ "gr1_unified": {
439
+ "video": {
440
+ "delta_indices": [
441
+ 0
442
+ ],
443
+ "modality_keys": [
444
+ "ego_view_bg_crop_pad_res256_freq20"
445
+ ],
446
+ "sin_cos_embedding_keys": null,
447
+ "mean_std_embedding_keys": null,
448
+ "action_configs": null
449
+ },
450
+ "state": {
451
+ "delta_indices": [
452
+ 0
453
+ ],
454
+ "modality_keys": [
455
+ "left_arm",
456
+ "right_arm",
457
+ "left_hand",
458
+ "right_hand",
459
+ "waist"
460
+ ],
461
+ "sin_cos_embedding_keys": [
462
+ "left_arm",
463
+ "right_arm",
464
+ "left_hand",
465
+ "right_hand",
466
+ "waist"
467
+ ],
468
+ "mean_std_embedding_keys": null,
469
+ "action_configs": null
470
+ },
471
+ "action": {
472
+ "delta_indices": [
473
+ 0,
474
+ 1,
475
+ 2,
476
+ 3,
477
+ 4,
478
+ 5,
479
+ 6,
480
+ 7,
481
+ 8,
482
+ 9,
483
+ 10,
484
+ 11,
485
+ 12,
486
+ 13,
487
+ 14,
488
+ 15
489
+ ],
490
+ "modality_keys": [
491
+ "left_arm",
492
+ "right_arm",
493
+ "left_hand",
494
+ "right_hand",
495
+ "waist"
496
+ ],
497
+ "sin_cos_embedding_keys": null,
498
+ "mean_std_embedding_keys": null,
499
+ "action_configs": [
500
+ {
501
+ "rep": "RELATIVE",
502
+ "type": "NON_EEF",
503
+ "format": "DEFAULT",
504
+ "state_key": null
505
+ },
506
+ {
507
+ "rep": "RELATIVE",
508
+ "type": "NON_EEF",
509
+ "format": "DEFAULT",
510
+ "state_key": null
511
+ },
512
+ {
513
+ "rep": "RELATIVE",
514
+ "type": "NON_EEF",
515
+ "format": "DEFAULT",
516
+ "state_key": null
517
+ },
518
+ {
519
+ "rep": "RELATIVE",
520
+ "type": "NON_EEF",
521
+ "format": "DEFAULT",
522
+ "state_key": null
523
+ },
524
+ {
525
+ "rep": "ABSOLUTE",
526
+ "type": "NON_EEF",
527
+ "format": "DEFAULT",
528
+ "state_key": null
529
+ }
530
+ ]
531
+ },
532
+ "language": {
533
+ "delta_indices": [
534
+ 0
535
+ ],
536
+ "modality_keys": [
537
+ "task"
538
+ ],
539
+ "sin_cos_embedding_keys": null,
540
+ "mean_std_embedding_keys": null,
541
+ "action_configs": null
542
+ },
543
+ "rl_info": {
544
+ "delta_indices": [
545
+ 0
546
+ ],
547
+ "modality_keys": [],
548
+ "sin_cos_embedding_keys": null,
549
+ "mean_std_embedding_keys": null,
550
+ "action_configs": null
551
+ }
552
+ },
553
+ "language_table_sim": {
554
+ "video": {
555
+ "delta_indices": [
556
+ 0
557
+ ],
558
+ "modality_keys": [
559
+ "rgb_pad_res256_freq10"
560
+ ],
561
+ "sin_cos_embedding_keys": null,
562
+ "mean_std_embedding_keys": null,
563
+ "action_configs": null
564
+ },
565
+ "state": {
566
+ "delta_indices": [
567
+ 0
568
+ ],
569
+ "modality_keys": [
570
+ "effector_translation",
571
+ "effector_target_translation"
572
+ ],
573
+ "sin_cos_embedding_keys": null,
574
+ "mean_std_embedding_keys": null,
575
+ "action_configs": null
576
+ },
577
+ "action": {
578
+ "delta_indices": [
579
+ 0,
580
+ 1,
581
+ 2,
582
+ 3,
583
+ 4,
584
+ 5,
585
+ 6,
586
+ 7,
587
+ 8,
588
+ 9,
589
+ 10,
590
+ 11,
591
+ 12,
592
+ 13,
593
+ 14,
594
+ 15
595
+ ],
596
+ "modality_keys": [
597
+ "action"
598
+ ],
599
+ "sin_cos_embedding_keys": null,
600
+ "mean_std_embedding_keys": null,
601
+ "action_configs": [
602
+ {
603
+ "rep": "ABSOLUTE",
604
+ "type": "NON_EEF",
605
+ "format": "DEFAULT",
606
+ "state_key": null
607
+ }
608
+ ]
609
+ },
610
+ "language": {
611
+ "delta_indices": [
612
+ 0
613
+ ],
614
+ "modality_keys": [
615
+ "annotation.language.instruction"
616
+ ],
617
+ "sin_cos_embedding_keys": null,
618
+ "mean_std_embedding_keys": null,
619
+ "action_configs": null
620
+ }
621
+ },
622
+ "robocasa_panda_omron": {
623
+ "video": {
624
+ "delta_indices": [
625
+ 0
626
+ ],
627
+ "modality_keys": [
628
+ "res256_image_side_0",
629
+ "res256_image_side_1",
630
+ "res256_image_wrist_0"
631
+ ],
632
+ "sin_cos_embedding_keys": null,
633
+ "mean_std_embedding_keys": null,
634
+ "action_configs": null
635
+ },
636
+ "state": {
637
+ "delta_indices": [
638
+ 0
639
+ ],
640
+ "modality_keys": [
641
+ "end_effector_position_relative",
642
+ "end_effector_rotation_relative",
643
+ "gripper_qpos",
644
+ "base_position",
645
+ "base_rotation"
646
+ ],
647
+ "sin_cos_embedding_keys": null,
648
+ "mean_std_embedding_keys": null,
649
+ "action_configs": null
650
+ },
651
+ "action": {
652
+ "delta_indices": [
653
+ 0,
654
+ 1,
655
+ 2,
656
+ 3,
657
+ 4,
658
+ 5,
659
+ 6,
660
+ 7,
661
+ 8,
662
+ 9,
663
+ 10,
664
+ 11,
665
+ 12,
666
+ 13,
667
+ 14,
668
+ 15
669
+ ],
670
+ "modality_keys": [
671
+ "end_effector_position",
672
+ "end_effector_rotation",
673
+ "gripper_close",
674
+ "base_motion",
675
+ "control_mode"
676
+ ],
677
+ "sin_cos_embedding_keys": null,
678
+ "mean_std_embedding_keys": null,
679
+ "action_configs": [
680
+ {
681
+ "rep": "ABSOLUTE",
682
+ "type": "NON_EEF",
683
+ "format": "DEFAULT",
684
+ "state_key": null
685
+ },
686
+ {
687
+ "rep": "ABSOLUTE",
688
+ "type": "NON_EEF",
689
+ "format": "DEFAULT",
690
+ "state_key": null
691
+ },
692
+ {
693
+ "rep": "ABSOLUTE",
694
+ "type": "NON_EEF",
695
+ "format": "DEFAULT",
696
+ "state_key": null
697
+ },
698
+ {
699
+ "rep": "ABSOLUTE",
700
+ "type": "NON_EEF",
701
+ "format": "DEFAULT",
702
+ "state_key": null
703
+ },
704
+ {
705
+ "rep": "ABSOLUTE",
706
+ "type": "NON_EEF",
707
+ "format": "DEFAULT",
708
+ "state_key": null
709
+ }
710
+ ]
711
+ },
712
+ "language": {
713
+ "delta_indices": [
714
+ 0
715
+ ],
716
+ "modality_keys": [
717
+ "annotation.human.action.task_description"
718
+ ],
719
+ "sin_cos_embedding_keys": null,
720
+ "mean_std_embedding_keys": null,
721
+ "action_configs": null
722
+ }
723
+ },
724
+ "unitree_g1_full_body_with_waist_height_nav_cmd": {
725
+ "video": {
726
+ "delta_indices": [
727
+ 0
728
+ ],
729
+ "modality_keys": [
730
+ "ego_view"
731
+ ],
732
+ "sin_cos_embedding_keys": null,
733
+ "mean_std_embedding_keys": null,
734
+ "action_configs": null
735
+ },
736
+ "state": {
737
+ "delta_indices": [
738
+ 0
739
+ ],
740
+ "modality_keys": [
741
+ "left_leg",
742
+ "right_leg",
743
+ "waist",
744
+ "left_arm",
745
+ "right_arm",
746
+ "left_hand",
747
+ "right_hand"
748
+ ],
749
+ "sin_cos_embedding_keys": null,
750
+ "mean_std_embedding_keys": null,
751
+ "action_configs": null
752
+ },
753
+ "action": {
754
+ "delta_indices": [
755
+ 0,
756
+ 1,
757
+ 2,
758
+ 3,
759
+ 4,
760
+ 5,
761
+ 6,
762
+ 7,
763
+ 8,
764
+ 9,
765
+ 10,
766
+ 11,
767
+ 12,
768
+ 13,
769
+ 14,
770
+ 15,
771
+ 16,
772
+ 17,
773
+ 18,
774
+ 19,
775
+ 20,
776
+ 21,
777
+ 22,
778
+ 23,
779
+ 24,
780
+ 25,
781
+ 26,
782
+ 27,
783
+ 28,
784
+ 29,
785
+ 30,
786
+ 31,
787
+ 32,
788
+ 33,
789
+ 34,
790
+ 35,
791
+ 36,
792
+ 37,
793
+ 38,
794
+ 39,
795
+ 40,
796
+ 41,
797
+ 42,
798
+ 43,
799
+ 44,
800
+ 45,
801
+ 46,
802
+ 47,
803
+ 48,
804
+ 49
805
+ ],
806
+ "modality_keys": [
807
+ "left_arm",
808
+ "right_arm",
809
+ "left_hand",
810
+ "right_hand",
811
+ "waist",
812
+ "base_height_command",
813
+ "navigate_command"
814
+ ],
815
+ "sin_cos_embedding_keys": null,
816
+ "mean_std_embedding_keys": null,
817
+ "action_configs": [
818
+ {
819
+ "rep": "RELATIVE",
820
+ "type": "NON_EEF",
821
+ "format": "DEFAULT",
822
+ "state_key": null
823
+ },
824
+ {
825
+ "rep": "RELATIVE",
826
+ "type": "NON_EEF",
827
+ "format": "DEFAULT",
828
+ "state_key": null
829
+ },
830
+ {
831
+ "rep": "ABSOLUTE",
832
+ "type": "NON_EEF",
833
+ "format": "DEFAULT",
834
+ "state_key": null
835
+ },
836
+ {
837
+ "rep": "ABSOLUTE",
838
+ "type": "NON_EEF",
839
+ "format": "DEFAULT",
840
+ "state_key": null
841
+ },
842
+ {
843
+ "rep": "ABSOLUTE",
844
+ "type": "NON_EEF",
845
+ "format": "DEFAULT",
846
+ "state_key": null
847
+ },
848
+ {
849
+ "rep": "ABSOLUTE",
850
+ "type": "NON_EEF",
851
+ "format": "DEFAULT",
852
+ "state_key": null
853
+ },
854
+ {
855
+ "rep": "ABSOLUTE",
856
+ "type": "NON_EEF",
857
+ "format": "DEFAULT",
858
+ "state_key": null
859
+ }
860
+ ]
861
+ },
862
+ "language": {
863
+ "delta_indices": [
864
+ 0
865
+ ],
866
+ "modality_keys": [
867
+ "annotation.human.task_description"
868
+ ],
869
+ "sin_cos_embedding_keys": null,
870
+ "mean_std_embedding_keys": null,
871
+ "action_configs": null
872
+ }
873
+ },
874
+ "oxe_droid": {
875
+ "video": {
876
+ "delta_indices": [
877
+ 0
878
+ ],
879
+ "modality_keys": [
880
+ "exterior_image_1_left",
881
+ "wrist_image_left"
882
+ ],
883
+ "sin_cos_embedding_keys": null,
884
+ "mean_std_embedding_keys": null,
885
+ "action_configs": null
886
+ },
887
+ "state": {
888
+ "delta_indices": [
889
+ 0
890
+ ],
891
+ "modality_keys": [
892
+ "joint_position",
893
+ "gripper_position"
894
+ ],
895
+ "sin_cos_embedding_keys": null,
896
+ "mean_std_embedding_keys": null,
897
+ "action_configs": null
898
+ },
899
+ "action": {
900
+ "delta_indices": [
901
+ 0,
902
+ 1,
903
+ 2,
904
+ 3,
905
+ 4,
906
+ 5,
907
+ 6,
908
+ 7,
909
+ 8,
910
+ 9,
911
+ 10,
912
+ 11,
913
+ 12,
914
+ 13,
915
+ 14,
916
+ 15,
917
+ 16,
918
+ 17,
919
+ 18,
920
+ 19,
921
+ 20,
922
+ 21,
923
+ 22,
924
+ 23,
925
+ 24,
926
+ 25,
927
+ 26,
928
+ 27,
929
+ 28,
930
+ 29,
931
+ 30,
932
+ 31
933
+ ],
934
+ "modality_keys": [
935
+ "joint_position",
936
+ "gripper_position"
937
+ ],
938
+ "sin_cos_embedding_keys": null,
939
+ "mean_std_embedding_keys": null,
940
+ "action_configs": [
941
+ {
942
+ "rep": "RELATIVE",
943
+ "type": "NON_EEF",
944
+ "format": "DEFAULT",
945
+ "state_key": null
946
+ },
947
+ {
948
+ "rep": "ABSOLUTE",
949
+ "type": "NON_EEF",
950
+ "format": "DEFAULT",
951
+ "state_key": null
952
+ }
953
+ ]
954
+ },
955
+ "language": {
956
+ "delta_indices": [
957
+ 0
958
+ ],
959
+ "modality_keys": [
960
+ "annotation.language.language_instruction"
961
+ ],
962
+ "sin_cos_embedding_keys": null,
963
+ "mean_std_embedding_keys": null,
964
+ "action_configs": null
965
+ }
966
+ }
967
+ },
968
+ "image_crop_size": null,
969
+ "image_target_size": null,
970
+ "use_albumentations": true,
971
+ "random_rotation_angle": null,
972
+ "color_jitter_params": {
973
+ "brightness": 0.3,
974
+ "contrast": 0.4,
975
+ "saturation": 0.5,
976
+ "hue": 0.08
977
+ },
978
+ "shortest_image_edge": 256,
979
+ "crop_fraction": 0.95,
980
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
981
+ "model_type": "eagle",
982
+ "formalize_language": true,
983
+ "max_state_dim": 128,
984
+ "max_action_dim": 128,
985
+ "max_action_horizon": 50,
986
+ "use_percentiles": false,
987
+ "clip_outliers": true,
988
+ "apply_sincos_state_encoding": true,
989
+ "use_relative_action": true
990
+ }
991
+ }
statistics.json ADDED
The diff for this file is too large to render. See raw diff