Robotics
Safetensors
Gr00tN1d6
HuFY-NV youliangt rchand commited on
Commit
e76ae68
·
verified ·
0 Parent(s):

Super-squash branch 'main' using huggingface_hub

Browse files

Co-authored-by: youliangt <youliangt@users.noreply.huggingface.co>
Co-authored-by: rchand <rchand@users.noreply.huggingface.co>

.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
EXPLAINABILITY.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # **Explainability**
2
+
3
+ |Field:|Response:|
4
+ |:---:|:---:|
5
+ |Intended Domain:| Open foundation model for generalized humanoid robot reasoning and skills.|
6
+ |Model Type: |Robot VLA model|
7
+ |Intended Users:|This model is intended for developers and community that build and finetune robot foundation models.|
8
+ |Output:|The model outputs are actions, and the units are floating points. This is referred to as "robot action policy." Actions consist of continuous-value vectors that correspond to different motor controls on a robot.|
9
+ |Describe how the model works:|Accepts vision, language and proprioception, outputs robot action policy.|
10
+ |Technical Limitations & Mitigation:| This model is not tested or intended for use in mission critical applications that require functional safety. The use of the model in those applications is at the user's own risk and sole responsibility, including taking the necessary steps to add needed guardrails or safety mechanisms prior to deployment.<br><br>Risk: Model underperformance in highly dynamic environments with varying robot surroundings (e.g. furniture, objects, etc) and lighting conditions.<br>Mitigation: Enhance dataset with dynamic obstacle scenarios and fine-tune models accordingly.<br><br>Risk: Integration challenges in specific customer environments with varying robot surroundings (e.g. furniture, objects, etc) and lighting conditions.<br>Mitigation: Provide detailed integration guides and support, leveraging NVIDIA's ecosystem.<br><br>Risk: Limited initial support for certain robot embodiments.<br>Mitigation: Expand testing and validation across a wider range of robot platforms.|
11
+ |Verified to have met prescribed quality standards?|Yes|
12
+ |Performance Metrics:|Success rate, as well as the following:<br>1) if the trajectory is smooth and does not jitter<br>2) if the robot does not hit any other objects<br>3) if the trajectory is natural|
13
+ |Potential Known Risks:|This model is not tested or intended for use in mission critical applications that require functional safety. The use of the model in those applications is at the user's own risk and sole responsibility, including taking the necessary steps to add needed guardrails or safety mechanisms prior to deployment.|
14
+ |End User License Agreement:| Your use of this model is governed by the [NSCL V1 License](https://developer.download.nvidia.com/licenses/NVIDIA-OneWay-Noncommercial-License-22Mar2022.pdf?t=eyJscyI6ImdzZW8iLCJsc2QiOiJodHRwczovL3d3dy5nb29nbGUuY29tLyIsIm5jaWQiOiJzby15b3V0LTg3MTcwMS12dDQ4In0=).|
LICENSE ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NVIDIA License
2
+ 1. Definitions
3
+ “Licensor” means any person or entity that distributes its Work.
4
+ “Work” means (a) the original work of authorship made available under this license,
5
+ which may include software, documentation, or other files, and (b) any additions to or
6
+ derivative works thereof that are made available under this license.
7
+ The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the
8
+ meaning as provided under U.S. copyright law; provided, however, that for the purposes
9
+ of this license, derivative works shall not include works that remain separable from, or
10
+ merely link (or bind by name) to the interfaces of, the Work.
11
+ Works are “made available” under this license by including in or with the Work either (a)
12
+ a copyright notice referencing the applicability of this license to the Work, or (b) a copy
13
+ of this license.
14
+ 2. License Grant
15
+ 2.1 Copyright Grant. Subject to the terms and conditions of this license, each
16
+ Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free,
17
+ copyright license to use, reproduce, prepare derivative works of, publicly display,
18
+ publicly perform, sublicense and distribute its Work and any resulting derivative
19
+ works in any form.
20
+ 3. Limitations
21
+ 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so
22
+ under this license, (b) you include a complete copy of this license with your
23
+ distribution, and (c) you retain without modification any copyright, patent,
24
+ trademark, or attribution notices that are present in the Work.
25
+ 3.2 Derivative Works. You may specify that additional or different terms apply to
26
+ the use, reproduction, and distribution of your derivative works of the Work (“Your
27
+ Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3
28
+ applies to your derivative works, and (b) you identify the specific derivative works
29
+ that are subject to Your Terms. Notwithstanding Your Terms, this license (including
30
+ the redistribution requirements in Section 3.1) will continue to apply to the Work
31
+ itself.
32
+ 3.3 Use Limitation. The Work and any derivative works thereof only may be used
33
+ or intended for use non-commercially. Notwithstanding the foregoing, NVIDIA
34
+ Corporation and its affiliates may use the Work and any derivative works
35
+ commercially. As used herein, “non-commercially” means for research or
36
+ evaluation purposes only.
37
+ 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any
38
+ Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce
39
+ any patents that you allege are infringed by any Work, then your rights under this
40
+ license from such Licensor (including the grant in Section 2.1) will terminate
41
+ immediately.
42
+ 3.5 Trademarks. This license does not grant any rights to use any Licensor’s or its
43
+ affiliates’ names, logos, or trademarks, except as necessary to reproduce the
44
+ notices described in this license.
45
+ 3.6 Termination. If you violate any term of this license, then your rights under this
46
+ license (including the grant in Section 2.1) will terminate immediately.
47
+ 4. Disclaimer of Warranty.
48
+ THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
49
+ EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
50
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-
51
+ INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS
52
+ LICENSE.
53
+ 5. Limitation of Liability.
54
+ EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
55
+ THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
56
+ SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
57
+ INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR
58
+ RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT
59
+ NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR
60
+ DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER DAMAGES OR LOSSES),
61
+ EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
PRIVACY.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # **Privacy**
2
+
3
+ |Field:|Response:|
4
+ |:---:|:---:|
5
+ |Generatable or reverse engineerable personal data?|None|
6
+ |Personal data used to create this model?|No|
7
+ |How often is dataset reviewed?|Before Release|
8
+ |Is there provenance for all datasets used in training?|Yes|
9
+ |Does data labeling (annotation, metadata) comply with privacy laws?|Yes|
10
+ |Is data compliant with data subject requests for data correction or removal, if such a request was made?|Yes|
11
+ |Applicable NVIDIA Privacy Policy|https://www.nvidia.com/en-us/about-nvidia/privacy-policy/|
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - nvidia/PhysicalAI-Robotics-GR00T-X-Embodiment-Sim
4
+ tags:
5
+ - robotics
6
+ base_model:
7
+ - nvidia/GR00T-N1.6-3B
8
+ ---
9
+
10
+ <div align="center">
11
+ <a href="https://github.com/NVIDIA/Isaac-GR00T">
12
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/67b8da81d01134f89899b4a7/8bFQa2ZIGCsOQQ2ho2N_U.png">
13
+ </a>
14
+ <div align="center">
15
+ <a href="https://github.com/NVIDIA/Isaac-GR00T">
16
+ <img src="https://img.shields.io/badge/GitHub-grey?logo=GitHub" alt="GitHub Badge">
17
+ </a>
18
+ <a href="https://developer.nvidia.com/isaac/gr00t">
19
+ <img src="https://img.shields.io/badge/Website-green" alt="Website Badge">
20
+ </a>
21
+ </div>
22
+ </div>
23
+
24
+ # GR00T-N1.6-bridge
25
+
26
+ This is a finetuned model on the bridge dataset, based on the `GR00T-N1.6-3B` model.
27
+
28
+ Refer to [Isaac-GR00T](https://github.com/NVIDIA/Isaac-GR00T) examples for more details.
29
+
30
+ # Usage
31
+ ```bash
32
+ .venv/bin/python gr00t/eval/run_gr00t_server.py \
33
+ --model-path nvidia/GR00T-N1.6-bridge \
34
+ --embodiment_tag OXE_WIDOWX \
35
+ --use_sim_policy_wrapper
36
+ ```
37
+
38
+ # Citation
39
+ ```bibtex
40
+ @misc{nvidia2025gr00tn1openfoundation,
41
+ title={GR00T N1: An Open Foundation Model for Generalist Humanoid Robots},
42
+ author={NVIDIA and Johan Bjorck and Fernando Castañeda and Nikita Cherniadev and Xingye Da and Runyu Ding and Linxi "Jim" Fan and Yu Fang and Dieter Fox and Fengyuan Hu and Spencer Huang and Joel Jang and Zhenyu Jiang and Jan Kautz and Kaushil Kundalia and Lawrence Lao and Zhiqi Li and Zongyu Lin and Kevin Lin and Guilin Liu and Edith Llontop and Loic Magne and Ajay Mandlekar and Avnish Narayan and Soroush Nasiriany and Scott Reed and You Liang Tan and Guanzhi Wang and Zu Wang and Jing Wang and Qi Wang and Jiannan Xiang and Yuqi Xie and Yinzhen Xu and Zhenjia Xu and Seonghyeon Ye and Zhiding Yu and Ao Zhang and Hao Zhang and Yizhou Zhao and Ruijie Zheng and Yuke Zhu},
43
+ year={2025},
44
+ eprint={2503.14734},
45
+ archivePrefix={arXiv},
46
+ primaryClass={cs.RO},
47
+ }
48
+ ```
SAFETY_and_SECURITY.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # **Safety & Security**
2
+
3
+ |Field:|Response:|
4
+ |:---:|:---:|
5
+ |Model Application(s):|Machinery and Robotics<br>Robot VLA - single-arm manipulation, bimanual grippers, bi-manual dex hands manipulation and humanoid dexterous manipulation|
6
+ |Describe life critical application (if present):|This model is not tested or intended for use in mission critical applications that require functional safety. The use of the model in those applications is at the user's own risk and sole responsibility, including taking the necessary steps to add needed guardrails or safety mechanisms prior to deployment.|
7
+ |Use Case Restrictions:|Abide by the [NSCL V1 License](https://developer.download.nvidia.com/licenses/NVIDIA-OneWay-Noncommercial-License-22Mar2022.pdf?t=eyJscyI6ImdzZW8iLCJsc2QiOiJodHRwczovL3d3dy5nb29nbGUuY29tLyIsIm5jaWQiOiJzby15b3V0LTg3MTcwMS12dDQ4In0=)|
8
+ |Model and Dataset Restrictions:|The Principle of least privilege (PoLP) is applied limiting access for dataset generation and model development. Restrictions enforce dataset access during training, and dataset license constraints adhered to.|
config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_horizon": 50,
3
+ "add_pos_embed": true,
4
+ "apply_sincos_state_encoding": true,
5
+ "architectures": [
6
+ "Gr00tN1d6"
7
+ ],
8
+ "attn_dropout": 0.2,
9
+ "attn_implementation": null,
10
+ "backbone_embedding_dim": 2048,
11
+ "backbone_model_type": "eagle",
12
+ "backbone_trainable_params_fp32": true,
13
+ "collator_overwrite_image_inputs": false,
14
+ "color_jitter_params": {
15
+ "brightness": 0.1,
16
+ "contrast": 0.1,
17
+ "hue": 0.1,
18
+ "saturation": 0.1
19
+ },
20
+ "crop_fraction": 0.95,
21
+ "diffusion_model_cfg": {
22
+ "attention_head_dim": 48,
23
+ "dropout": 0.2,
24
+ "final_dropout": true,
25
+ "interleave_self_attention": true,
26
+ "norm_type": "ada_norm",
27
+ "num_attention_heads": 32,
28
+ "num_layers": 32,
29
+ "output_dim": 1024,
30
+ "positional_embeddings": null
31
+ },
32
+ "eagle_collator": true,
33
+ "formalize_language": true,
34
+ "gemma_collator": false,
35
+ "hidden_size": 1024,
36
+ "image_crop_size": null,
37
+ "image_target_size": null,
38
+ "input_embedding_dim": 1536,
39
+ "load_bf16": true,
40
+ "max_action_dim": 128,
41
+ "max_num_embodiments": 32,
42
+ "max_seq_len": 1024,
43
+ "max_state_dim": 128,
44
+ "model_dtype": "bfloat16",
45
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
46
+ "model_type": "Gr00tN1d6",
47
+ "noise_beta_alpha": 1.5,
48
+ "noise_beta_beta": 1.0,
49
+ "noise_s": 0.999,
50
+ "num_inference_timesteps": 4,
51
+ "num_timestep_buckets": 1000,
52
+ "random_rotation_angle": null,
53
+ "reproject_vision": false,
54
+ "select_layer": 16,
55
+ "shortest_image_edge": 256,
56
+ "state_dropout_prob": 0.8,
57
+ "torch_dtype": "bfloat16",
58
+ "transformers_version": "4.51.3",
59
+ "tune_diffusion_model": true,
60
+ "tune_llm": false,
61
+ "tune_projector": true,
62
+ "tune_top_llm_layers": 4,
63
+ "tune_visual": false,
64
+ "tune_vlln": true,
65
+ "use_albumentations_transforms": true,
66
+ "use_alternate_vl_dit": true,
67
+ "use_flash_attention": true,
68
+ "use_relative_action": true,
69
+ "use_vlln": true
70
+ }
embodiment_id.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "robocasa_panda_omron": 13,
3
+ "gr1": 20,
4
+ "behavior_r1_pro": 24,
5
+ "unitree_g1": 8,
6
+ "oxe_google": 0,
7
+ "oxe_widowx": 1,
8
+ "libero_panda": 2
9
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:489692bc8da59594b1864ae76d72c497c0bdb08e36e1fed43543a2d3347b1530
3
+ size 4991094616
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30acaf9de3408587611ac2c50a6afe74a4138995f043f78e4f75a516c54ea7e9
3
+ size 1582283096
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
processor_config.json ADDED
@@ -0,0 +1,494 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processor_class": "Gr00tN1d6Processor",
3
+ "processor_kwargs": {
4
+ "modality_configs": {
5
+ "behavior_r1_pro": {
6
+ "video": {
7
+ "delta_indices": [
8
+ 0
9
+ ],
10
+ "modality_keys": [
11
+ "observation.images.rgb.head_256_256",
12
+ "observation.images.rgb.left_wrist_256_256",
13
+ "observation.images.rgb.right_wrist_256_256"
14
+ ],
15
+ "sin_cos_embedding_keys": null,
16
+ "mean_std_embedding_keys": null,
17
+ "action_configs": null
18
+ },
19
+ "state": {
20
+ "delta_indices": [
21
+ 0
22
+ ],
23
+ "modality_keys": [
24
+ "robot_pos",
25
+ "robot_ori_cos",
26
+ "robot_ori_sin",
27
+ "robot_2d_ori",
28
+ "robot_2d_ori_cos",
29
+ "robot_2d_ori_sin",
30
+ "robot_lin_vel",
31
+ "robot_ang_vel",
32
+ "arm_left_qpos",
33
+ "arm_left_qpos_sin",
34
+ "arm_left_qpos_cos",
35
+ "eef_left_pos",
36
+ "eef_left_quat",
37
+ "gripper_left_qpos",
38
+ "arm_right_qpos",
39
+ "arm_right_qpos_sin",
40
+ "arm_right_qpos_cos",
41
+ "eef_right_pos",
42
+ "eef_right_quat",
43
+ "gripper_right_qpos",
44
+ "trunk_qpos"
45
+ ],
46
+ "sin_cos_embedding_keys": null,
47
+ "mean_std_embedding_keys": null,
48
+ "action_configs": null
49
+ },
50
+ "action": {
51
+ "delta_indices": [
52
+ 0,
53
+ 1,
54
+ 2,
55
+ 3,
56
+ 4,
57
+ 5,
58
+ 6,
59
+ 7,
60
+ 8,
61
+ 9,
62
+ 10,
63
+ 11,
64
+ 12,
65
+ 13,
66
+ 14,
67
+ 15,
68
+ 16,
69
+ 17,
70
+ 18,
71
+ 19,
72
+ 20,
73
+ 21,
74
+ 22,
75
+ 23,
76
+ 24,
77
+ 25,
78
+ 26,
79
+ 27,
80
+ 28,
81
+ 29,
82
+ 30,
83
+ 31
84
+ ],
85
+ "modality_keys": [
86
+ "base",
87
+ "torso",
88
+ "left_arm",
89
+ "left_gripper",
90
+ "right_arm",
91
+ "right_gripper"
92
+ ],
93
+ "sin_cos_embedding_keys": null,
94
+ "mean_std_embedding_keys": null,
95
+ "action_configs": [
96
+ {
97
+ "rep": "ABSOLUTE",
98
+ "type": "NON_EEF",
99
+ "format": "DEFAULT",
100
+ "state_key": null
101
+ },
102
+ {
103
+ "rep": "RELATIVE",
104
+ "type": "NON_EEF",
105
+ "format": "DEFAULT",
106
+ "state_key": "trunk_qpos"
107
+ },
108
+ {
109
+ "rep": "RELATIVE",
110
+ "type": "NON_EEF",
111
+ "format": "DEFAULT",
112
+ "state_key": "arm_left_qpos"
113
+ },
114
+ {
115
+ "rep": "ABSOLUTE",
116
+ "type": "NON_EEF",
117
+ "format": "DEFAULT",
118
+ "state_key": null
119
+ },
120
+ {
121
+ "rep": "RELATIVE",
122
+ "type": "NON_EEF",
123
+ "format": "DEFAULT",
124
+ "state_key": "arm_right_qpos"
125
+ },
126
+ {
127
+ "rep": "ABSOLUTE",
128
+ "type": "NON_EEF",
129
+ "format": "DEFAULT",
130
+ "state_key": null
131
+ }
132
+ ]
133
+ },
134
+ "language": {
135
+ "delta_indices": [
136
+ 0
137
+ ],
138
+ "modality_keys": [
139
+ "annotation.human.coarse_action"
140
+ ],
141
+ "sin_cos_embedding_keys": null,
142
+ "mean_std_embedding_keys": null,
143
+ "action_configs": null
144
+ }
145
+ },
146
+ "gr1": {
147
+ "video": {
148
+ "delta_indices": [
149
+ 0
150
+ ],
151
+ "modality_keys": [
152
+ "ego_view_bg_crop_pad_res256_freq20"
153
+ ],
154
+ "sin_cos_embedding_keys": null,
155
+ "mean_std_embedding_keys": null,
156
+ "action_configs": null
157
+ },
158
+ "state": {
159
+ "delta_indices": [
160
+ 0
161
+ ],
162
+ "modality_keys": [
163
+ "left_arm",
164
+ "right_arm",
165
+ "left_hand",
166
+ "right_hand",
167
+ "waist"
168
+ ],
169
+ "sin_cos_embedding_keys": [
170
+ "left_arm",
171
+ "right_arm",
172
+ "left_hand",
173
+ "right_hand",
174
+ "waist"
175
+ ],
176
+ "mean_std_embedding_keys": null,
177
+ "action_configs": null
178
+ },
179
+ "action": {
180
+ "delta_indices": [
181
+ 0,
182
+ 1,
183
+ 2,
184
+ 3,
185
+ 4,
186
+ 5,
187
+ 6,
188
+ 7,
189
+ 8,
190
+ 9,
191
+ 10,
192
+ 11,
193
+ 12,
194
+ 13,
195
+ 14,
196
+ 15
197
+ ],
198
+ "modality_keys": [
199
+ "left_arm",
200
+ "right_arm",
201
+ "left_hand",
202
+ "right_hand",
203
+ "waist"
204
+ ],
205
+ "sin_cos_embedding_keys": null,
206
+ "mean_std_embedding_keys": null,
207
+ "action_configs": [
208
+ {
209
+ "rep": "RELATIVE",
210
+ "type": "NON_EEF",
211
+ "format": "DEFAULT",
212
+ "state_key": null
213
+ },
214
+ {
215
+ "rep": "RELATIVE",
216
+ "type": "NON_EEF",
217
+ "format": "DEFAULT",
218
+ "state_key": null
219
+ },
220
+ {
221
+ "rep": "RELATIVE",
222
+ "type": "NON_EEF",
223
+ "format": "DEFAULT",
224
+ "state_key": null
225
+ },
226
+ {
227
+ "rep": "RELATIVE",
228
+ "type": "NON_EEF",
229
+ "format": "DEFAULT",
230
+ "state_key": null
231
+ },
232
+ {
233
+ "rep": "ABSOLUTE",
234
+ "type": "NON_EEF",
235
+ "format": "DEFAULT",
236
+ "state_key": null
237
+ }
238
+ ]
239
+ },
240
+ "language": {
241
+ "delta_indices": [
242
+ 0
243
+ ],
244
+ "modality_keys": [
245
+ "task"
246
+ ],
247
+ "sin_cos_embedding_keys": null,
248
+ "mean_std_embedding_keys": null,
249
+ "action_configs": null
250
+ }
251
+ },
252
+ "robocasa_panda_omron": {
253
+ "video": {
254
+ "delta_indices": [
255
+ 0
256
+ ],
257
+ "modality_keys": [
258
+ "res256_image_side_0",
259
+ "res256_image_side_1",
260
+ "res256_image_wrist_0"
261
+ ],
262
+ "sin_cos_embedding_keys": null,
263
+ "mean_std_embedding_keys": null,
264
+ "action_configs": null
265
+ },
266
+ "state": {
267
+ "delta_indices": [
268
+ 0
269
+ ],
270
+ "modality_keys": [
271
+ "end_effector_position_relative",
272
+ "end_effector_rotation_relative",
273
+ "gripper_qpos",
274
+ "base_position",
275
+ "base_rotation"
276
+ ],
277
+ "sin_cos_embedding_keys": null,
278
+ "mean_std_embedding_keys": null,
279
+ "action_configs": null
280
+ },
281
+ "action": {
282
+ "delta_indices": [
283
+ 0,
284
+ 1,
285
+ 2,
286
+ 3,
287
+ 4,
288
+ 5,
289
+ 6,
290
+ 7,
291
+ 8,
292
+ 9,
293
+ 10,
294
+ 11,
295
+ 12,
296
+ 13,
297
+ 14,
298
+ 15
299
+ ],
300
+ "modality_keys": [
301
+ "end_effector_position",
302
+ "end_effector_rotation",
303
+ "gripper_close",
304
+ "base_motion",
305
+ "control_mode"
306
+ ],
307
+ "sin_cos_embedding_keys": null,
308
+ "mean_std_embedding_keys": null,
309
+ "action_configs": [
310
+ {
311
+ "rep": "ABSOLUTE",
312
+ "type": "NON_EEF",
313
+ "format": "DEFAULT",
314
+ "state_key": null
315
+ },
316
+ {
317
+ "rep": "ABSOLUTE",
318
+ "type": "NON_EEF",
319
+ "format": "DEFAULT",
320
+ "state_key": null
321
+ },
322
+ {
323
+ "rep": "ABSOLUTE",
324
+ "type": "NON_EEF",
325
+ "format": "DEFAULT",
326
+ "state_key": null
327
+ },
328
+ {
329
+ "rep": "ABSOLUTE",
330
+ "type": "NON_EEF",
331
+ "format": "DEFAULT",
332
+ "state_key": null
333
+ },
334
+ {
335
+ "rep": "ABSOLUTE",
336
+ "type": "NON_EEF",
337
+ "format": "DEFAULT",
338
+ "state_key": null
339
+ }
340
+ ]
341
+ },
342
+ "language": {
343
+ "delta_indices": [
344
+ 0
345
+ ],
346
+ "modality_keys": [
347
+ "annotation.human.action.task_description"
348
+ ],
349
+ "sin_cos_embedding_keys": null,
350
+ "mean_std_embedding_keys": null,
351
+ "action_configs": null
352
+ }
353
+ },
354
+ "oxe_widowx": {
355
+ "video": {
356
+ "delta_indices": [
357
+ 0
358
+ ],
359
+ "modality_keys": [
360
+ "image_0"
361
+ ],
362
+ "sin_cos_embedding_keys": null,
363
+ "mean_std_embedding_keys": null,
364
+ "action_configs": null
365
+ },
366
+ "state": {
367
+ "delta_indices": [
368
+ 0
369
+ ],
370
+ "modality_keys": [
371
+ "x",
372
+ "y",
373
+ "z",
374
+ "roll",
375
+ "pitch",
376
+ "yaw",
377
+ "pad",
378
+ "gripper"
379
+ ],
380
+ "sin_cos_embedding_keys": null,
381
+ "mean_std_embedding_keys": null,
382
+ "action_configs": null
383
+ },
384
+ "action": {
385
+ "delta_indices": [
386
+ 0,
387
+ 1,
388
+ 2,
389
+ 3,
390
+ 4,
391
+ 5,
392
+ 6,
393
+ 7
394
+ ],
395
+ "modality_keys": [
396
+ "x",
397
+ "y",
398
+ "z",
399
+ "roll",
400
+ "pitch",
401
+ "yaw",
402
+ "gripper"
403
+ ],
404
+ "sin_cos_embedding_keys": null,
405
+ "mean_std_embedding_keys": [
406
+ "x",
407
+ "y",
408
+ "z",
409
+ "roll",
410
+ "pitch",
411
+ "yaw"
412
+ ],
413
+ "action_configs": [
414
+ {
415
+ "rep": "ABSOLUTE",
416
+ "type": "NON_EEF",
417
+ "format": "DEFAULT",
418
+ "state_key": null
419
+ },
420
+ {
421
+ "rep": "ABSOLUTE",
422
+ "type": "NON_EEF",
423
+ "format": "DEFAULT",
424
+ "state_key": null
425
+ },
426
+ {
427
+ "rep": "ABSOLUTE",
428
+ "type": "NON_EEF",
429
+ "format": "DEFAULT",
430
+ "state_key": null
431
+ },
432
+ {
433
+ "rep": "ABSOLUTE",
434
+ "type": "NON_EEF",
435
+ "format": "DEFAULT",
436
+ "state_key": null
437
+ },
438
+ {
439
+ "rep": "ABSOLUTE",
440
+ "type": "NON_EEF",
441
+ "format": "DEFAULT",
442
+ "state_key": null
443
+ },
444
+ {
445
+ "rep": "ABSOLUTE",
446
+ "type": "NON_EEF",
447
+ "format": "DEFAULT",
448
+ "state_key": null
449
+ },
450
+ {
451
+ "rep": "ABSOLUTE",
452
+ "type": "NON_EEF",
453
+ "format": "DEFAULT",
454
+ "state_key": null
455
+ }
456
+ ]
457
+ },
458
+ "language": {
459
+ "delta_indices": [
460
+ 0
461
+ ],
462
+ "modality_keys": [
463
+ "annotation.human.action.task_description"
464
+ ],
465
+ "sin_cos_embedding_keys": null,
466
+ "mean_std_embedding_keys": null,
467
+ "action_configs": null
468
+ }
469
+ }
470
+ },
471
+ "image_crop_size": null,
472
+ "image_target_size": null,
473
+ "use_albumentations": true,
474
+ "random_rotation_angle": null,
475
+ "color_jitter_params": {
476
+ "brightness": 0.3,
477
+ "contrast": 0.4,
478
+ "saturation": 0.5,
479
+ "hue": 0.08
480
+ },
481
+ "shortest_image_edge": 256,
482
+ "crop_fraction": 0.95,
483
+ "model_name": "nvidia/Eagle-Block2A-2B-v2",
484
+ "model_type": "eagle",
485
+ "formalize_language": true,
486
+ "max_state_dim": 128,
487
+ "max_action_dim": 128,
488
+ "max_action_horizon": 50,
489
+ "use_percentiles": false,
490
+ "clip_outliers": true,
491
+ "apply_sincos_state_encoding": true,
492
+ "use_relative_action": true
493
+ }
494
+ }
statistics.json ADDED
The diff for this file is too large to render. See raw diff