| { |
| "processor_class": "Gr00tN1d7Processor", |
| "processor_kwargs": { |
| "modality_configs": { |
| "real_g1_relative_eef_relative_joints": { |
| "video": { |
| "delta_indices": [ |
| -20, |
| 0 |
| ], |
| "modality_keys": [ |
| "ego_view" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "left_wrist_eef_9d", |
| "right_wrist_eef_9d", |
| "left_hand", |
| "right_hand", |
| "left_arm", |
| "right_arm", |
| "waist" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39 |
| ], |
| "modality_keys": [ |
| "left_wrist_eef_9d", |
| "right_wrist_eef_9d", |
| "left_hand", |
| "right_hand", |
| "left_arm", |
| "right_arm", |
| "waist", |
| "base_height_command", |
| "navigate_command" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "left_wrist_eef_9d", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "right_wrist_eef_9d", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "left_hand", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "right_hand", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "RELATIVE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "left_arm", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "RELATIVE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "right_arm", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "waist", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "base_height_command", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "navigate_command", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.human.task_description" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "real_r1_pro_sharpa_relative_eef_mecka": { |
| "video": { |
| "delta_indices": [ |
| -30, |
| 0 |
| ], |
| "modality_keys": [ |
| "ego_view_cropratio_res320x240_freq30" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "left_wrist_eef", |
| "right_wrist_eef", |
| "left_hand_joints", |
| "right_hand_joints" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39 |
| ], |
| "modality_keys": [ |
| "left_wrist_eef", |
| "right_wrist_eef", |
| "left_hand_joints", |
| "right_hand_joints" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "left_wrist_eef", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "right_wrist_eef", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "left_hand_joints", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "right_hand_joints", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.human.coarse_action" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "real_r1_pro_sharpa_relative_eef_human": { |
| "video": { |
| "delta_indices": [ |
| -20, |
| 0 |
| ], |
| "modality_keys": [ |
| "ego_view_res320x240_freq20", |
| "left_wrist_view_res320x240_freq20", |
| "right_wrist_view_res320x240_freq20" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "left_wrist_eef", |
| "right_wrist_eef", |
| "left_hand_joints", |
| "right_hand_joints" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39 |
| ], |
| "modality_keys": [ |
| "left_wrist_eef", |
| "right_wrist_eef", |
| "left_hand_joints", |
| "right_hand_joints" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "left_wrist_eef", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "right_wrist_eef", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "left_hand_joints", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "right_hand_joints", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.human.coarse_action" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "real_r1_pro_sharpa_relative_eef": { |
| "video": { |
| "delta_indices": [ |
| -20, |
| 0 |
| ], |
| "modality_keys": [ |
| "ego_view_res320x240_freq20", |
| "left_wrist_view_res320x240_freq20", |
| "right_wrist_view_res320x240_freq20" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "left_wrist_eef", |
| "right_wrist_eef", |
| "left_hand_joints", |
| "right_hand_joints" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39 |
| ], |
| "modality_keys": [ |
| "left_wrist_eef", |
| "right_wrist_eef", |
| "left_hand_joints", |
| "right_hand_joints" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "left_wrist_eef", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "right_wrist_eef", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "left_hand_joints", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "right_hand_joints", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.human.coarse_action" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "xdof_relative_eef_relative_joint": { |
| "video": { |
| "delta_indices": [ |
| -30, |
| 0 |
| ], |
| "modality_keys": [ |
| "top_camera-images-rgb_320_240", |
| "left_camera-images-rgb_320_240", |
| "right_camera-images-rgb_320_240" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "left_wrist_eef", |
| "right_wrist_eef", |
| "left_gripper_pos", |
| "right_gripper_pos", |
| "left_joint_pos", |
| "right_joint_pos" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39 |
| ], |
| "modality_keys": [ |
| "left_wrist_eef", |
| "right_wrist_eef", |
| "left_gripper_pos", |
| "right_gripper_pos", |
| "left_joint_pos", |
| "right_joint_pos" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "left_wrist_eef", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "right_wrist_eef", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "left_gripper_pos", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "right_gripper_pos", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "RELATIVE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "left_joint_pos", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "RELATIVE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "right_joint_pos", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.task" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "real_r1_pro_sharpa_relative_eef_maxinsights": { |
| "video": { |
| "delta_indices": [ |
| -30, |
| 0 |
| ], |
| "modality_keys": [ |
| "ego_view_cropratio_res320x240_freq30" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "left_wrist_eef", |
| "right_wrist_eef", |
| "left_hand_joints", |
| "right_hand_joints" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39 |
| ], |
| "modality_keys": [ |
| "left_wrist_eef", |
| "right_wrist_eef", |
| "left_hand_joints", |
| "right_hand_joints" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "left_wrist_eef", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "right_wrist_eef", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "left_hand_joints", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "right_hand_joints", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.human.coarse_action" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "xdof_relative_eef_relative_joint_subtask": { |
| "video": { |
| "delta_indices": [ |
| -30, |
| 0 |
| ], |
| "modality_keys": [ |
| "top_camera-images-rgb_320_240", |
| "left_camera-images-rgb_320_240", |
| "right_camera-images-rgb_320_240" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "left_wrist_eef", |
| "right_wrist_eef", |
| "left_gripper_pos", |
| "right_gripper_pos", |
| "left_joint_pos", |
| "right_joint_pos" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39 |
| ], |
| "modality_keys": [ |
| "left_wrist_eef", |
| "right_wrist_eef", |
| "left_gripper_pos", |
| "right_gripper_pos", |
| "left_joint_pos", |
| "right_joint_pos" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "left_wrist_eef", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "right_wrist_eef", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "left_gripper_pos", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "right_gripper_pos", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "RELATIVE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "left_joint_pos", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "RELATIVE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "right_joint_pos", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.sub_task" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "oxe_droid_relative_eef_relative_joint": { |
| "video": { |
| "delta_indices": [ |
| -15, |
| 0 |
| ], |
| "modality_keys": [ |
| "exterior_image_1_left", |
| "wrist_image_left" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "eef_9d", |
| "gripper_position", |
| "joint_position" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39 |
| ], |
| "modality_keys": [ |
| "eef_9d", |
| "gripper_position", |
| "joint_position" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "RELATIVE", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "eef_9d", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "gripper_position", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "RELATIVE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "joint_position", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.language.language_instruction" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "rob_surgical_bitrack": { |
| "video": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "endoscope" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "left_pose", |
| "right_pose", |
| "aux_pose" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": [ |
| "left_pose", |
| "right_pose", |
| "aux_pose" |
| ], |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39, |
| 40, |
| 41, |
| 42, |
| 43, |
| 44, |
| 45, |
| 46, |
| 47, |
| 48, |
| 49 |
| ], |
| "modality_keys": [ |
| "left_pose", |
| "right_pose", |
| "aux_pose" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "left_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "euler", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "euler", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "right_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "euler", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "euler", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "aux_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "euler", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "euler", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.instruction" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "ustc_torin_tuodao": { |
| "video": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "endoscope_left" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "left_joints", |
| "right_joints", |
| "left_pose", |
| "right_pose" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": [ |
| "left_joints", |
| "right_joints" |
| ], |
| "min_max_embedding_keys": null, |
| "pass_through_keys": [ |
| "left_pose", |
| "right_pose" |
| ], |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39, |
| 40, |
| 41, |
| 42, |
| 43, |
| 44, |
| 45, |
| 46, |
| 47, |
| 48, |
| 49 |
| ], |
| "modality_keys": [ |
| "left_pose", |
| "left_gripper", |
| "right_pose", |
| "right_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "left_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "right_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.instruction" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "turin_mitic_ex_vivo": { |
| "video": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "endoscope_left" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "psm1_joints", |
| "psm2_joints", |
| "psm1_pose", |
| "psm2_pose" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": [ |
| "psm1_joints", |
| "psm2_joints" |
| ], |
| "min_max_embedding_keys": null, |
| "pass_through_keys": [ |
| "psm1_pose", |
| "psm2_pose" |
| ], |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39, |
| 40, |
| 41, |
| 42, |
| 43, |
| 44, |
| 45, |
| 46, |
| 47, |
| 48, |
| 49, |
| 50 |
| ], |
| "modality_keys": [ |
| "psm1_pose", |
| "psm2_pose" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm1_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm2_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.instruction" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "obuda_dvrk": { |
| "video": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "endoscope_left", |
| "wrist_left", |
| "wrist_right" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39, |
| 40, |
| 41, |
| 42, |
| 43, |
| 44, |
| 45, |
| 46, |
| 47, |
| 48, |
| 49 |
| ], |
| "modality_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm1_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm2_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "task" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "jhu_imerse_dvrk": { |
| "video": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "endoscope_left", |
| "wrist_left", |
| "wrist_right" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39, |
| 40, |
| 41, |
| 42, |
| 43, |
| 44, |
| 45, |
| 46, |
| 47, |
| 48, |
| 49, |
| 50 |
| ], |
| "modality_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm1_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm2_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.human.task_description" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "jhu_imerse_dvrk_mono": { |
| "video": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "endoscope_left" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39, |
| 40, |
| 41, |
| 42, |
| 43, |
| 44, |
| 45, |
| 46, |
| 47, |
| 48, |
| 49 |
| ], |
| "modality_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm1_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm2_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.human.task_description" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "ucb_dvrk": { |
| "video": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "camera_left" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "psm1_joints", |
| "psm1_gripper", |
| "psm2_joints", |
| "psm2_gripper", |
| "psm1_pose", |
| "psm2_pose" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": [ |
| "psm1_joints", |
| "psm1_gripper", |
| "psm2_joints", |
| "psm2_gripper" |
| ], |
| "min_max_embedding_keys": null, |
| "pass_through_keys": [ |
| "psm1_pose", |
| "psm2_pose" |
| ], |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39, |
| 40, |
| 41, |
| 42, |
| 43, |
| 44, |
| 45, |
| 46, |
| 47, |
| 48, |
| 49 |
| ], |
| "modality_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm1_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm2_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "task" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "tud_tundra_ur5e": { |
| "video": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "laparoscope_left" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "joint_position", |
| "eef_pose" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": [ |
| "joint_position" |
| ], |
| "min_max_embedding_keys": null, |
| "pass_through_keys": [ |
| "eef_pose" |
| ], |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39, |
| 40, |
| 41, |
| 42, |
| 43, |
| 44, |
| 45, |
| 46, |
| 47, |
| 48, |
| 49, |
| 50 |
| ], |
| "modality_keys": [ |
| "eef_pose", |
| "gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "eef_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "task" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "ucsd_dvrk": { |
| "video": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "camera_left" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "psm_retraction_pose", |
| "psm_retraction_gripper", |
| "psm_cutter_pose", |
| "psm_cutter_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": [ |
| "psm_retraction_pose", |
| "psm_retraction_gripper", |
| "psm_cutter_pose", |
| "psm_cutter_gripper" |
| ], |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39, |
| 40, |
| 41, |
| 42, |
| 43, |
| 44, |
| 45, |
| 46, |
| 47, |
| 48, |
| 49, |
| 50 |
| ], |
| "modality_keys": [ |
| "psm_retraction_pose", |
| "psm_retraction_gripper", |
| "psm_cutter_pose", |
| "psm_cutter_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm_retraction_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "wxyz", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "wxyz", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm_cutter_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "wxyz", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "wxyz", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "task" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "hamlyn_dvrk_30hz": { |
| "video": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "endoscope", |
| "wrist_left", |
| "wrist_right" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "left_arm_pose", |
| "left_arm_gripper", |
| "right_arm_pose", |
| "right_arm_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": [ |
| "left_arm_pose", |
| "left_arm_gripper", |
| "right_arm_pose", |
| "right_arm_gripper" |
| ], |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39, |
| 40, |
| 41, |
| 42, |
| 43, |
| 44, |
| 45, |
| 46, |
| 47, |
| 48, |
| 49 |
| ], |
| "modality_keys": [ |
| "left_arm_pose", |
| "left_arm_gripper", |
| "right_arm_pose", |
| "right_arm_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "left_arm_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "wxyz", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "wxyz", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "right_arm_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "wxyz", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "wxyz", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "task" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "cmr_versius": { |
| "video": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "endoscope" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "left_pose", |
| "left_gripper", |
| "right_pose", |
| "right_gripper", |
| "translation_scaling", |
| "rotation_scaling", |
| "hapticengaged_left", |
| "hapticengaged_right" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": [ |
| "left_pose", |
| "left_gripper", |
| "right_pose", |
| "right_gripper" |
| ], |
| "min_max_embedding_keys": null, |
| "pass_through_keys": [ |
| "translation_scaling", |
| "rotation_scaling", |
| "hapticengaged_left", |
| "hapticengaged_right" |
| ], |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 2, |
| 4, |
| 6, |
| 8, |
| 10, |
| 12, |
| 14, |
| 16, |
| 18, |
| 20, |
| 22, |
| 24, |
| 26, |
| 28, |
| 30, |
| 32, |
| 34, |
| 36, |
| 38, |
| 40, |
| 42, |
| 44, |
| 46, |
| 48, |
| 50, |
| 52, |
| 54, |
| 56, |
| 58, |
| 60, |
| 62, |
| 64, |
| 66, |
| 68, |
| 70, |
| 72, |
| 74, |
| 76, |
| 78, |
| 80, |
| 82, |
| 84, |
| 86, |
| 88, |
| 90, |
| 92, |
| 94, |
| 96, |
| 98, |
| 100 |
| ], |
| "modality_keys": [ |
| "left_pose", |
| "left_gripper", |
| "right_pose", |
| "right_gripper", |
| "hapticengaged_left", |
| "hapticengaged_right" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": [ |
| "hapticengaged_left", |
| "hapticengaged_right" |
| ], |
| "action_configs": [ |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "left_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": "translation_scaling", |
| "rotation_scaling_key": "rotation_scaling", |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "left_gripper", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": true |
| }, |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "right_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": "translation_scaling", |
| "rotation_scaling_key": "rotation_scaling", |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": "right_gripper", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": true |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "skip", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "skip", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.human.task_description" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "jhu_imerse_star_il": { |
| "video": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "endoscope_left", |
| "wrist_left" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "kuka_joint_pos", |
| "endo360_joint_pos", |
| "kuka_pose" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": [ |
| "kuka_joint_pos", |
| "endo360_joint_pos" |
| ], |
| "min_max_embedding_keys": null, |
| "pass_through_keys": [ |
| "kuka_pose" |
| ], |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39, |
| 40, |
| 41, |
| 42, |
| 43, |
| 44, |
| 45, |
| 46, |
| 47, |
| 48, |
| 49, |
| 50 |
| ], |
| "modality_keys": [ |
| "kuka_pose" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "kuka_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.human.task_description" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "jhu_lscr_dvrk_smarts": { |
| "video": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "endoscope_left", |
| "camera_side_view" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16 |
| ], |
| "modality_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm1_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm2_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "quat", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "annotation.task" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| }, |
| "stanford_dvrk_real": { |
| "video": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "endoscope_left" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "state": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| }, |
| "action": { |
| "delta_indices": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39, |
| 40, |
| 41, |
| 42, |
| 43, |
| 44, |
| 45, |
| 46, |
| 47, |
| 48, |
| 49 |
| ], |
| "modality_keys": [ |
| "psm1_pose", |
| "psm1_gripper", |
| "psm2_pose", |
| "psm2_gripper" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": [ |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm1_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "euler", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "euler", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "REL_XYZ_ROT6D", |
| "type": "EEF", |
| "format": "XYZ_ROT6D", |
| "state_key": "psm2_pose", |
| "normalization_type": "percentile", |
| "input_rotation_format": "euler", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "euler", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| }, |
| { |
| "rep": "ABSOLUTE", |
| "type": "NON_EEF", |
| "format": "DEFAULT", |
| "state_key": null, |
| "normalization_type": "percentile", |
| "input_rotation_format": "quat", |
| "input_quat_order": "xyzw", |
| "reference_rotation_format": "rot6d", |
| "reference_quat_order": "xyzw", |
| "translation_scaling_key": null, |
| "rotation_scaling_key": null, |
| "hold_through_clutch": false |
| } |
| ] |
| }, |
| "language": { |
| "delta_indices": [ |
| 0 |
| ], |
| "modality_keys": [ |
| "task" |
| ], |
| "sin_cos_embedding_keys": null, |
| "mean_std_embedding_keys": null, |
| "min_max_embedding_keys": null, |
| "pass_through_keys": null, |
| "action_configs": null |
| } |
| } |
| }, |
| "image_crop_size": [ |
| 224, |
| 392 |
| ], |
| "image_target_size": [ |
| 236, |
| 414 |
| ], |
| "use_albumentations": false, |
| "random_rotation_angle": 5, |
| "color_jitter_params": { |
| "brightness": 0.12, |
| "contrast": 0.15, |
| "saturation": 0.15, |
| "hue": 0.02 |
| }, |
| "letter_box_transform": false, |
| "shortest_image_edge": null, |
| "crop_fraction": null, |
| "model_name": "nvidia/Cosmos-Reason2-2B", |
| "model_type": "qwen", |
| "formalize_language": true, |
| "max_state_dim": 132, |
| "max_action_dim": 132, |
| "max_action_horizon": 50, |
| "use_percentiles": true, |
| "use_mean_std": false, |
| "clip_outliers": true, |
| "apply_sincos_state_encoding": false, |
| "use_relative_action": true, |
| "exclude_state": false |
| } |
| } |
|
|