{ "processor_class": "Gr00tN1d7Processor", "processor_kwargs": { "modality_configs": { "real_g1_relative_eef_relative_joints": { "video": { "delta_indices": [ -20, 0 ], "modality_keys": [ "ego_view" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_wrist_eef_9d", "right_wrist_eef_9d", "left_hand", "right_hand", "left_arm", "right_arm", "waist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "left_wrist_eef_9d", "right_wrist_eef_9d", "left_hand", "right_hand", "left_arm", "right_arm", "waist", "base_height_command", "navigate_command" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_wrist_eef_9d" }, { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_wrist_eef_9d" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_hand" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_hand" }, { "rep": "RELATIVE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_arm" }, { "rep": "RELATIVE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_arm" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "waist" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "base_height_command" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "navigate_command" } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "real_r1_pro_sharpa_relative_eef_mecka": { "video": { "delta_indices": [ -30, 0 ], "modality_keys": [ "ego_view_cropratio_res320x240_freq30" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_wrist_eef" }, { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_wrist_eef" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_hand_joints" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_hand_joints" } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.coarse_action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "real_r1_pro_sharpa_relative_eef_human": { "video": { "delta_indices": [ -20, 0 ], "modality_keys": [ "ego_view_res320x240_freq20", "left_wrist_view_res320x240_freq20", "right_wrist_view_res320x240_freq20" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_wrist_eef" }, { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_wrist_eef" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_hand_joints" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_hand_joints" } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.coarse_action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "real_r1_pro_sharpa_relative_eef": { "video": { "delta_indices": [ -20, 0 ], "modality_keys": [ "ego_view_res320x240_freq20", "left_wrist_view_res320x240_freq20", "right_wrist_view_res320x240_freq20" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_wrist_eef" }, { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_wrist_eef" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_hand_joints" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_hand_joints" } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.coarse_action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "xdof_relative_eef_relative_joint": { "video": { "delta_indices": [ -30, 0 ], "modality_keys": [ "top_camera-images-rgb_320_240", "left_camera-images-rgb_320_240", "right_camera-images-rgb_320_240" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_gripper_pos", "right_gripper_pos", "left_joint_pos", "right_joint_pos" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_gripper_pos", "right_gripper_pos", "left_joint_pos", "right_joint_pos" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_wrist_eef" }, { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_wrist_eef" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_gripper_pos" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_gripper_pos" }, { "rep": "RELATIVE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_joint_pos" }, { "rep": "RELATIVE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_joint_pos" } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.task" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "real_r1_pro_sharpa_relative_eef_maxinsights": { "video": { "delta_indices": [ -30, 0 ], "modality_keys": [ "ego_view_cropratio_res320x240_freq30" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_wrist_eef" }, { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_wrist_eef" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_hand_joints" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_hand_joints" } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.coarse_action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "xdof_relative_eef_relative_joint_subtask": { "video": { "delta_indices": [ -30, 0 ], "modality_keys": [ "top_camera-images-rgb_320_240", "left_camera-images-rgb_320_240", "right_camera-images-rgb_320_240" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_gripper_pos", "right_gripper_pos", "left_joint_pos", "right_joint_pos" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_gripper_pos", "right_gripper_pos", "left_joint_pos", "right_joint_pos" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_wrist_eef" }, { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_wrist_eef" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_gripper_pos" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_gripper_pos" }, { "rep": "RELATIVE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_joint_pos" }, { "rep": "RELATIVE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_joint_pos" } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.sub_task" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "oxe_droid_relative_eef_relative_joint": { "video": { "delta_indices": [ -15, 0 ], "modality_keys": [ "exterior_image_1_left", "wrist_image_left" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "eef_9d", "gripper_position", "joint_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "eef_9d", "gripper_position", "joint_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "eef_9d" }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "gripper_position" }, { "rep": "RELATIVE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "joint_position" } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.language.language_instruction" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "new_embodiment": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "ego_view" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_leg", "right_leg", "waist", "left_arm", "left_hand", "right_arm", "right_hand", "left_wrist_pos", "left_wrist_abs_quat", "right_wrist_pos", "right_wrist_abs_quat", "root_orientation", "projected_gravity", "cpp_rotation_offset", "init_base_quat" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "motion_token", "left_hand_joints", "right_hand_joints", "body_quat_w" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } } }, "image_crop_size": [ 230, 230 ], "image_target_size": [ 256, 256 ], "use_albumentations": true, "random_rotation_angle": 0, "color_jitter_params": { "brightness": 0.3, "contrast": 0.4, "saturation": 0.5, "hue": 0.08 }, "shortest_image_edge": 256, "crop_fraction": 0.95, "letter_box_transform": false, "model_name": "nvidia/Cosmos-Reason2-2B", "model_type": "qwen", "formalize_language": true, "max_state_dim": 132, "max_action_dim": 132, "max_action_horizon": 40, "use_percentiles": true, "use_mean_std": false, "clip_outliers": true, "apply_sincos_state_encoding": false, "use_relative_action": true, "exclude_state": false, "state_dropout_prob": 0.2 } }