{ "processor_class": "AlinVLAv0Processor", "processor_kwargs": { "modality_configs": { "humanoid_everyday_g1": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "egocentric_resized" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_arm", "left_hand", "right_arm", "right_hand" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "left_arm", "left_hand", "right_arm", "right_hand" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "kuka": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "cmu_stretch": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "stanford_hydra_dataset_converted_externally_to_rlds": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "iamlab_cmu_pickup_insert_converted_externally_to_rlds": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "berkeley_fanuc_manipulation": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "viola": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "galaxea": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "wrist_left", "wrist_right" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "state" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "utaustin_mutex": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "dobbe": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "taco_play": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "humanoid_everyday_h1": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "egocentric_resized" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_arm", "left_hand", "right_arm", "right_hand" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "left_arm", "left_hand", "right_arm", "right_hand" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "berkeley_autolab_ur5": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "jaco_play": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "furniture_bench_dataset_converted_externally_to_rlds": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "berkeley_cable_routing": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "secondary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "nyu_franka_play_dataset_converted_externally_to_rlds": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "secondary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "bridge_orig": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "secondary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "droid": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "secondary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "agibot_gripper": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "wrist_left", "wrist_right" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "state" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "simulation_gr1": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "ego_view" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_arm", "left_hand", "left_leg", "neck", "right_arm", "right_hand", "right_leg", "waist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "left_arm", "left_hand", "left_leg", "neck", "right_arm", "right_hand", "right_leg", "waist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "austin_sirius_dataset_converted_externally_to_rlds": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "austin_sailor_dataset_converted_externally_to_rlds": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "language_table": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "ucsd_kitchen_dataset_converted_externally_to_rlds": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "dlr_edan_shared_control_converted_externally_to_rlds": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "bc_z": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "austin_buds_dataset_converted_externally_to_rlds": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "agibot_dexhand": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "state" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "fractal20220817_data": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "neural_gr1": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "ego_view" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_arm", "left_hand", "left_leg", "neck", "right_arm", "right_hand", "right_leg", "waist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "left_arm", "left_hand", "left_leg", "neck", "right_arm", "right_hand", "right_leg", "waist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "roboturk": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "none" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "fmb_dataset": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary", "secondary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "toto": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "action_net": { "video": { "delta_indices": [ -6, -4, -2, 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "state" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "new_embodiment": { "video": { "delta_indices": [ -9, -7, -5, -3, -8, -6, -4, -2, -7, -5, -3, -1, -6, -4, -2, 0 ], "modality_keys": [ "camera_ego_left", "camera_ego_right" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ -3, -2, -1, 0 ], "modality_keys": [ "right_arm_joints", "left_arm_joints", "right_hand_joints", "left_hand_joints", "neck_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 ], "modality_keys": [ "right_arm_joints", "left_arm_joints", "right_hand_joints", "left_hand_joints", "neck_joints" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } } }, "image_crop_size": null, "image_target_size": null, "use_albumentations": true, "random_rotation_angle": null, "color_jitter_params": { "brightness": 0.3, "contrast": 0.4, "hue": 0.08, "saturation": 0.5 }, "shortest_image_edge": null, "crop_fraction": 0.95, "max_area": 65536, "resize_m": 32, "model_name": "huiwon/alinvlm_v1_3", "model_type": "contextvla_qwen3_vl_legacy", "formalize_language": true, "max_state_dim": 64, "max_action_dim": 64, "max_action_horizon": 40, "use_percentiles": true, "clip_outliers": true, "apply_sincos_state_encoding": false, "use_relative_action": true, "memory_length": 1, "new_embodiment_train_ratio": 0 } }