{ "processor_class": "Gr00tN1d7Processor", "processor_kwargs": { "modality_configs": { "real_g1_relative_eef_relative_joints": { "video": { "delta_indices": [ -20, 0 ], "modality_keys": [ "ego_view" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_wrist_eef_9d", "right_wrist_eef_9d", "left_hand", "right_hand", "left_arm", "right_arm", "waist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "left_wrist_eef_9d", "right_wrist_eef_9d", "left_hand", "right_hand", "left_arm", "right_arm", "waist", "base_height_command", "navigate_command" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_wrist_eef_9d", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_wrist_eef_9d", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_hand", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_hand", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "RELATIVE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_arm", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "RELATIVE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_arm", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "waist", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "base_height_command", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "navigate_command", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "real_r1_pro_sharpa_relative_eef_mecka": { "video": { "delta_indices": [ -30, 0 ], "modality_keys": [ "ego_view_cropratio_res320x240_freq30" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_wrist_eef", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_wrist_eef", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_hand_joints", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_hand_joints", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.coarse_action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "real_r1_pro_sharpa_relative_eef_human": { "video": { "delta_indices": [ -20, 0 ], "modality_keys": [ "ego_view_res320x240_freq20", "left_wrist_view_res320x240_freq20", "right_wrist_view_res320x240_freq20" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_wrist_eef", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_wrist_eef", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_hand_joints", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_hand_joints", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.coarse_action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "real_r1_pro_sharpa_relative_eef": { "video": { "delta_indices": [ -20, 0 ], "modality_keys": [ "ego_view_res320x240_freq20", "left_wrist_view_res320x240_freq20", "right_wrist_view_res320x240_freq20" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_wrist_eef", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_wrist_eef", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_hand_joints", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_hand_joints", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.coarse_action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "xdof_relative_eef_relative_joint": { "video": { "delta_indices": [ -30, 0 ], "modality_keys": [ "top_camera-images-rgb_320_240", "left_camera-images-rgb_320_240", "right_camera-images-rgb_320_240" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_gripper_pos", "right_gripper_pos", "left_joint_pos", "right_joint_pos" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_gripper_pos", "right_gripper_pos", "left_joint_pos", "right_joint_pos" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_wrist_eef", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_wrist_eef", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_gripper_pos", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_gripper_pos", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "RELATIVE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_joint_pos", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "RELATIVE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_joint_pos", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.task" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "real_r1_pro_sharpa_relative_eef_maxinsights": { "video": { "delta_indices": [ -30, 0 ], "modality_keys": [ "ego_view_cropratio_res320x240_freq30" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_hand_joints", "right_hand_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_wrist_eef", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_wrist_eef", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_hand_joints", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_hand_joints", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.coarse_action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "xdof_relative_eef_relative_joint_subtask": { "video": { "delta_indices": [ -30, 0 ], "modality_keys": [ "top_camera-images-rgb_320_240", "left_camera-images-rgb_320_240", "right_camera-images-rgb_320_240" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_gripper_pos", "right_gripper_pos", "left_joint_pos", "right_joint_pos" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "left_wrist_eef", "right_wrist_eef", "left_gripper_pos", "right_gripper_pos", "left_joint_pos", "right_joint_pos" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_wrist_eef", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_wrist_eef", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_gripper_pos", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_gripper_pos", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "RELATIVE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_joint_pos", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "RELATIVE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_joint_pos", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.sub_task" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "oxe_droid_relative_eef_relative_joint": { "video": { "delta_indices": [ -15, 0 ], "modality_keys": [ "exterior_image_1_left", "wrist_image_left" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "eef_9d", "gripper_position", "joint_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "eef_9d", "gripper_position", "joint_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "RELATIVE", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "eef_9d", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "gripper_position", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "RELATIVE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "joint_position", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.language.language_instruction" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "rob_surgical_bitrack": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "endoscope" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_pose", "right_pose", "aux_pose" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": [ "left_pose", "right_pose", "aux_pose" ], "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 ], "modality_keys": [ "left_pose", "right_pose", "aux_pose" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_pose", "normalization_type": "percentile", "input_rotation_format": "euler", "input_quat_order": "xyzw", "reference_rotation_format": "euler", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_pose", "normalization_type": "percentile", "input_rotation_format": "euler", "input_quat_order": "xyzw", "reference_rotation_format": "euler", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "aux_pose", "normalization_type": "percentile", "input_rotation_format": "euler", "input_quat_order": "xyzw", "reference_rotation_format": "euler", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.instruction" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "ustc_torin_tuodao": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "endoscope_left" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_joints", "right_joints", "left_pose", "right_pose" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": [ "left_joints", "right_joints" ], "min_max_embedding_keys": null, "pass_through_keys": [ "left_pose", "right_pose" ], "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 ], "modality_keys": [ "left_pose", "left_gripper", "right_pose", "right_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.instruction" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "turin_mitic_ex_vivo": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "endoscope_left" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "psm1_joints", "psm2_joints", "psm1_pose", "psm2_pose" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": [ "psm1_joints", "psm2_joints" ], "min_max_embedding_keys": null, "pass_through_keys": [ "psm1_pose", "psm2_pose" ], "action_configs": null }, "action": { "delta_indices": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50 ], "modality_keys": [ "psm1_pose", "psm2_pose" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm1_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm2_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.instruction" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "obuda_dvrk": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "endoscope_left", "wrist_left", "wrist_right" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 ], "modality_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm1_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm2_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "task" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "jhu_imerse_dvrk": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "endoscope_left", "wrist_left", "wrist_right" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50 ], "modality_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm1_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm2_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "jhu_imerse_dvrk_mono": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "endoscope_left" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 ], "modality_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm1_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm2_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "ucb_dvrk": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "camera_left" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "psm1_joints", "psm1_gripper", "psm2_joints", "psm2_gripper", "psm1_pose", "psm2_pose" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": [ "psm1_joints", "psm1_gripper", "psm2_joints", "psm2_gripper" ], "min_max_embedding_keys": null, "pass_through_keys": [ "psm1_pose", "psm2_pose" ], "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 ], "modality_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm1_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm2_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "task" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "tud_tundra_ur5e": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "laparoscope_left" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position", "eef_pose" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": [ "joint_position" ], "min_max_embedding_keys": null, "pass_through_keys": [ "eef_pose" ], "action_configs": null }, "action": { "delta_indices": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50 ], "modality_keys": [ "eef_pose", "gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "eef_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "task" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "ucsd_dvrk": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "camera_left" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "psm_retraction_pose", "psm_retraction_gripper", "psm_cutter_pose", "psm_cutter_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": [ "psm_retraction_pose", "psm_retraction_gripper", "psm_cutter_pose", "psm_cutter_gripper" ], "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50 ], "modality_keys": [ "psm_retraction_pose", "psm_retraction_gripper", "psm_cutter_pose", "psm_cutter_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm_retraction_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "wxyz", "reference_rotation_format": "quat", "reference_quat_order": "wxyz", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm_cutter_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "wxyz", "reference_rotation_format": "quat", "reference_quat_order": "wxyz", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "task" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "hamlyn_dvrk_30hz": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "endoscope", "wrist_left", "wrist_right" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_arm_pose", "left_arm_gripper", "right_arm_pose", "right_arm_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": [ "left_arm_pose", "left_arm_gripper", "right_arm_pose", "right_arm_gripper" ], "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 ], "modality_keys": [ "left_arm_pose", "left_arm_gripper", "right_arm_pose", "right_arm_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_arm_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "wxyz", "reference_rotation_format": "quat", "reference_quat_order": "wxyz", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_arm_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "wxyz", "reference_rotation_format": "quat", "reference_quat_order": "wxyz", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "task" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "cmr_versius": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "endoscope" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_pose", "left_gripper", "right_pose", "right_gripper", "translation_scaling", "rotation_scaling", "hapticengaged_left", "hapticengaged_right" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": [ "left_pose", "left_gripper", "right_pose", "right_gripper" ], "min_max_embedding_keys": null, "pass_through_keys": [ "translation_scaling", "rotation_scaling", "hapticengaged_left", "hapticengaged_right" ], "action_configs": null }, "action": { "delta_indices": [ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100 ], "modality_keys": [ "left_pose", "left_gripper", "right_pose", "right_gripper", "hapticengaged_left", "hapticengaged_right" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": [ "hapticengaged_left", "hapticengaged_right" ], "action_configs": [ { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "left_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": "translation_scaling", "rotation_scaling_key": "rotation_scaling", "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "left_gripper", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": true }, { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "right_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": "translation_scaling", "rotation_scaling_key": "rotation_scaling", "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": "right_gripper", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": true }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "skip", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "skip", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "jhu_imerse_star_il": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "endoscope_left", "wrist_left" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "kuka_joint_pos", "endo360_joint_pos", "kuka_pose" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": [ "kuka_joint_pos", "endo360_joint_pos" ], "min_max_embedding_keys": null, "pass_through_keys": [ "kuka_pose" ], "action_configs": null }, "action": { "delta_indices": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50 ], "modality_keys": [ "kuka_pose" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "kuka_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "jhu_lscr_dvrk_smarts": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "endoscope_left", "camera_side_view" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ], "modality_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm1_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm2_pose", "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "quat", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.task" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } }, "stanford_dvrk_real": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "endoscope_left" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 ], "modality_keys": [ "psm1_pose", "psm1_gripper", "psm2_pose", "psm2_gripper" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": [ { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm1_pose", "normalization_type": "percentile", "input_rotation_format": "euler", "input_quat_order": "xyzw", "reference_rotation_format": "euler", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "REL_XYZ_ROT6D", "type": "EEF", "format": "XYZ_ROT6D", "state_key": "psm2_pose", "normalization_type": "percentile", "input_rotation_format": "euler", "input_quat_order": "xyzw", "reference_rotation_format": "euler", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null, "normalization_type": "percentile", "input_rotation_format": "quat", "input_quat_order": "xyzw", "reference_rotation_format": "rot6d", "reference_quat_order": "xyzw", "translation_scaling_key": null, "rotation_scaling_key": null, "hold_through_clutch": false } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "task" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "min_max_embedding_keys": null, "pass_through_keys": null, "action_configs": null } } }, "image_crop_size": [ 224, 392 ], "image_target_size": [ 236, 414 ], "use_albumentations": false, "random_rotation_angle": 5, "color_jitter_params": { "brightness": 0.12, "contrast": 0.15, "saturation": 0.15, "hue": 0.02 }, "letter_box_transform": false, "shortest_image_edge": null, "crop_fraction": null, "model_name": "nvidia/Cosmos-Reason2-2B", "model_type": "qwen", "formalize_language": true, "max_state_dim": 132, "max_action_dim": 132, "max_action_horizon": 50, "use_percentiles": true, "use_mean_std": false, "clip_outliers": true, "apply_sincos_state_encoding": false, "use_relative_action": true, "exclude_state": false } }