| { |
| "modality_configs": "{'xdof': {'video': ModalityConfig(delta_indices=[0], modality_keys=['left_camera-images-rgb_320_240', 'top_camera-images-rgb_320_240', 'right_camera-images-rgb_320_240'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['gripper_pos_obs_left', 'gripper_pos_obs_right', 'joint_pos_obs_left', 'joint_pos_obs_right'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], modality_keys=['gripper_pos_action_left', 'gripper_pos_action_right', 'joint_pos_action_left', 'joint_pos_action_right'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=[ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key='gripper_pos_obs_left'), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key='gripper_pos_obs_right'), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key='joint_pos_obs_left'), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key='joint_pos_obs_right')]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.task'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'unitree_g1_full_body_with_waist_height_nav_cmd': {'video': ModalityConfig(delta_indices=[0], modality_keys=['ego_view'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['left_leg', 'right_leg', 'waist', 'left_arm', 'right_arm', 'left_hand', 'right_hand'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], modality_keys=['left_arm', 'right_arm', 'left_hand', 'right_hand', 'waist', 'base_height_command', 'navigate_command'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=[ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.human.task_description'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'oxe_droid_joint_position_relative': {'video': ModalityConfig(delta_indices=[0], modality_keys=['exterior_image_1_left', 'wrist_image_left'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['joint_position', 'gripper_position'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], modality_keys=['joint_position', 'gripper_position'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=[ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.language.language_instruction', 'annotation.language.language_instruction_2', 'annotation.language.language_instruction_3'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'simpler_env_google': {'video': ModalityConfig(delta_indices=[0], modality_keys=['image'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['x', 'y', 'z', 'rx', 'ry', 'rz', 'rw', 'gripper'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7], modality_keys=['x', 'y', 'z', 'roll', 'pitch', 'yaw', 'gripper'], sin_cos_embedding_keys=None, mean_std_embedding_keys=['x', 'y', 'z', 'roll', 'pitch', 'yaw'], action_configs=[ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.human.action.task_description'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'robocasa_panda_omron': {'video': ModalityConfig(delta_indices=[0], modality_keys=['res256_image_side_0', 'res256_image_side_1', 'res256_image_wrist_0'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['end_effector_position_relative', 'end_effector_rotation_relative', 'gripper_qpos', 'base_position', 'base_rotation'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], modality_keys=['end_effector_position', 'end_effector_rotation', 'gripper_close', 'base_motion', 'control_mode'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=[ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.human.action.task_description'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'gr1_unified': {'video': ModalityConfig(delta_indices=[0], modality_keys=['ego_view_bg_crop_pad_res256_freq20'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['left_arm', 'right_arm', 'left_hand', 'right_hand', 'waist'], sin_cos_embedding_keys=['left_arm', 'right_arm', 'left_hand', 'right_hand', 'waist'], mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], modality_keys=['left_arm', 'right_arm', 'left_hand', 'right_hand', 'waist'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=[ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['task'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'rl_info': ModalityConfig(delta_indices=[0], modality_keys=[], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'sim_behavior_r1_pro': {'video': ModalityConfig(delta_indices=[0], modality_keys=['observation.images.rgb.head_256_256', 'observation.images.rgb.left_wrist_256_256', 'observation.images.rgb.right_wrist_256_256'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['robot_pos', 'robot_ori_cos', 'robot_ori_sin', 'robot_2d_ori', 'robot_2d_ori_cos', 'robot_2d_ori_sin', 'robot_lin_vel', 'robot_ang_vel', 'arm_left_qpos', 'arm_left_qpos_sin', 'arm_left_qpos_cos', 'eef_left_pos', 'eef_left_quat', 'gripper_left_qpos', 'arm_right_qpos', 'arm_right_qpos_sin', 'arm_right_qpos_cos', 'eef_right_pos', 'eef_right_quat', 'gripper_right_qpos', 'trunk_qpos'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], modality_keys=['base', 'torso', 'left_arm', 'left_gripper', 'right_arm', 'right_gripper'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=[ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key='trunk_qpos'), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key='arm_left_qpos'), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key='arm_right_qpos'), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.human.coarse_action'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'simpler_env_widowx': {'video': ModalityConfig(delta_indices=[0], modality_keys=['image_0'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['x', 'y', 'z', 'roll', 'pitch', 'yaw', 'pad', 'gripper'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7], modality_keys=['x', 'y', 'z', 'roll', 'pitch', 'yaw', 'gripper'], sin_cos_embedding_keys=None, mean_std_embedding_keys=['x', 'y', 'z', 'roll', 'pitch', 'yaw'], action_configs=[ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.human.action.task_description'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'agibot': {'video': ModalityConfig(delta_indices=[0], modality_keys=['top_head_pad_res256_freq10', 'hand_left_pad_res256_freq10', 'hand_right_pad_res256_freq10'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['left_arm_joint_position', 'right_arm_joint_position', 'left_effector_position', 'right_effector_position', 'head_position', 'waist_pitch', 'waist_lift'], sin_cos_embedding_keys=['left_arm_joint_position', 'right_arm_joint_position', 'head_position', 'waist_pitch'], mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], modality_keys=['left_arm_joint_position', 'right_arm_joint_position', 'left_effector_position', 'right_effector_position', 'head_position', 'waist_pitch', 'waist_lift', 'robot_velocity'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=[ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.language.action_text'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}}", |
| "state_action_processor": "StateActionProcessor(modality_configs={'xdof': {'video': ModalityConfig(delta_indices=[0], modality_keys=['left_camera-images-rgb_320_240', 'top_camera-images-rgb_320_240', 'right_camera-images-rgb_320_240'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['gripper_pos_obs_left', 'gripper_pos_obs_right', 'joint_pos_obs_left', 'joint_pos_obs_right'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], modality_keys=['gripper_pos_action_left', 'gripper_pos_action_right', 'joint_pos_action_left', 'joint_pos_action_right'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=[ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key='gripper_pos_obs_left'), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key='gripper_pos_obs_right'), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key='joint_pos_obs_left'), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key='joint_pos_obs_right')]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.task'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'unitree_g1_full_body_with_waist_height_nav_cmd': {'video': ModalityConfig(delta_indices=[0], modality_keys=['ego_view'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['left_leg', 'right_leg', 'waist', 'left_arm', 'right_arm', 'left_hand', 'right_hand'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], modality_keys=['left_arm', 'right_arm', 'left_hand', 'right_hand', 'waist', 'base_height_command', 'navigate_command'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=[ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.human.task_description'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'oxe_droid_joint_position_relative': {'video': ModalityConfig(delta_indices=[0], modality_keys=['exterior_image_1_left', 'wrist_image_left'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['joint_position', 'gripper_position'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], modality_keys=['joint_position', 'gripper_position'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=[ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.language.language_instruction', 'annotation.language.language_instruction_2', 'annotation.language.language_instruction_3'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'simpler_env_google': {'video': ModalityConfig(delta_indices=[0], modality_keys=['image'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['x', 'y', 'z', 'rx', 'ry', 'rz', 'rw', 'gripper'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7], modality_keys=['x', 'y', 'z', 'roll', 'pitch', 'yaw', 'gripper'], sin_cos_embedding_keys=None, mean_std_embedding_keys=['x', 'y', 'z', 'roll', 'pitch', 'yaw'], action_configs=[ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.human.action.task_description'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'robocasa_panda_omron': {'video': ModalityConfig(delta_indices=[0], modality_keys=['res256_image_side_0', 'res256_image_side_1', 'res256_image_wrist_0'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['end_effector_position_relative', 'end_effector_rotation_relative', 'gripper_qpos', 'base_position', 'base_rotation'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], modality_keys=['end_effector_position', 'end_effector_rotation', 'gripper_close', 'base_motion', 'control_mode'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=[ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.human.action.task_description'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'gr1_unified': {'video': ModalityConfig(delta_indices=[0], modality_keys=['ego_view_bg_crop_pad_res256_freq20'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['left_arm', 'right_arm', 'left_hand', 'right_hand', 'waist'], sin_cos_embedding_keys=['left_arm', 'right_arm', 'left_hand', 'right_hand', 'waist'], mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], modality_keys=['left_arm', 'right_arm', 'left_hand', 'right_hand', 'waist'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=[ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['task'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'rl_info': ModalityConfig(delta_indices=[0], modality_keys=[], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'sim_behavior_r1_pro': {'video': ModalityConfig(delta_indices=[0], modality_keys=['observation.images.rgb.head_256_256', 'observation.images.rgb.left_wrist_256_256', 'observation.images.rgb.right_wrist_256_256'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['robot_pos', 'robot_ori_cos', 'robot_ori_sin', 'robot_2d_ori', 'robot_2d_ori_cos', 'robot_2d_ori_sin', 'robot_lin_vel', 'robot_ang_vel', 'arm_left_qpos', 'arm_left_qpos_sin', 'arm_left_qpos_cos', 'eef_left_pos', 'eef_left_quat', 'gripper_left_qpos', 'arm_right_qpos', 'arm_right_qpos_sin', 'arm_right_qpos_cos', 'eef_right_pos', 'eef_right_quat', 'gripper_right_qpos', 'trunk_qpos'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], modality_keys=['base', 'torso', 'left_arm', 'left_gripper', 'right_arm', 'right_gripper'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=[ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key='trunk_qpos'), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key='arm_left_qpos'), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key='arm_right_qpos'), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.human.coarse_action'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'simpler_env_widowx': {'video': ModalityConfig(delta_indices=[0], modality_keys=['image_0'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['x', 'y', 'z', 'roll', 'pitch', 'yaw', 'pad', 'gripper'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7], modality_keys=['x', 'y', 'z', 'roll', 'pitch', 'yaw', 'gripper'], sin_cos_embedding_keys=None, mean_std_embedding_keys=['x', 'y', 'z', 'roll', 'pitch', 'yaw'], action_configs=[ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.human.action.task_description'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}, 'agibot': {'video': ModalityConfig(delta_indices=[0], modality_keys=['top_head_pad_res256_freq10', 'hand_left_pad_res256_freq10', 'hand_right_pad_res256_freq10'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None), 'state': ModalityConfig(delta_indices=[0], modality_keys=['left_arm_joint_position', 'right_arm_joint_position', 'left_effector_position', 'right_effector_position', 'head_position', 'waist_pitch', 'waist_lift'], sin_cos_embedding_keys=['left_arm_joint_position', 'right_arm_joint_position', 'head_position', 'waist_pitch'], mean_std_embedding_keys=None, action_configs=None), 'action': ModalityConfig(delta_indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], modality_keys=['left_arm_joint_position', 'right_arm_joint_position', 'left_effector_position', 'right_effector_position', 'head_position', 'waist_pitch', 'waist_lift', 'robot_velocity'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=[ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.RELATIVE: 'relative'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None), ActionConfig(rep=<ActionRepresentation.ABSOLUTE: 'absolute'>, type=<ActionType.NON_EEF: 'non_eef'>, format=<ActionFormat.DEFAULT: 'default'>, state_key=None)]), 'language': ModalityConfig(delta_indices=[0], modality_keys=['annotation.language.action_text'], sin_cos_embedding_keys=None, mean_std_embedding_keys=None, action_configs=None)}}, statistics={}, use_percentiles=False, clip_outliers=True, apply_sincos_state_encoding=True, use_relative_action=True)", |
| "use_percentiles": "False", |
| "clip_outliers": "True", |
| "apply_sincos_state_encoding": "True", |
| "use_relative_action": "True", |
| "formalize_language": "True", |
| "model_name": "nvidia/Cosmos-Reason2-2B", |
| "model_type": "qwen", |
| "processor": "Qwen3VLProcessor:\n- image_processor: Qwen2VLImageProcessorFast {\n \"crop_size\": null,\n \"data_format\": \"channels_first\",\n \"default_to_square\": true,\n \"device\": null,\n \"disable_grouping\": null,\n \"do_center_crop\": null,\n \"do_convert_rgb\": true,\n \"do_normalize\": true,\n \"do_pad\": null,\n \"do_rescale\": true,\n \"do_resize\": true,\n \"image_mean\": [\n 0.5,\n 0.5,\n 0.5\n ],\n \"image_processor_type\": \"Qwen2VLImageProcessorFast\",\n \"image_std\": [\n 0.5,\n 0.5,\n 0.5\n ],\n \"input_data_format\": null,\n \"max_pixels\": null,\n \"merge_size\": 2,\n \"min_pixels\": null,\n \"pad_size\": null,\n \"patch_size\": 16,\n \"processor_class\": \"Qwen3VLProcessor\",\n \"resample\": 3,\n \"rescale_factor\": 0.00392156862745098,\n \"return_tensors\": null,\n \"size\": {\n \"longest_edge\": 16777216,\n \"shortest_edge\": 65536\n },\n \"temporal_patch_size\": 2\n}\n\n- tokenizer: Qwen2TokenizerFast(name_or_path='nvidia/Cosmos-Reason2-2B', vocab_size=151643, model_max_length=262144, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'eos_token': '<|im_end|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>', '<|object_ref_start|>', '<|object_ref_end|>', '<|box_start|>', '<|box_end|>', '<|quad_start|>', '<|quad_end|>', '<|vision_start|>', '<|vision_end|>', '<|vision_pad|>', '<|image_pad|>', '<|video_pad|>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={\n\t151643: AddedToken(\"<|endoftext|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t151644: AddedToken(\"<|im_start|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t151645: AddedToken(\"<|im_end|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t151646: AddedToken(\"<|object_ref_start|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t151647: AddedToken(\"<|object_ref_end|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t151648: AddedToken(\"<|box_start|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t151649: AddedToken(\"<|box_end|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t151650: AddedToken(\"<|quad_start|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t151651: AddedToken(\"<|quad_end|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t151652: AddedToken(\"<|vision_start|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t151653: AddedToken(\"<|vision_end|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t151654: AddedToken(\"<|vision_pad|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t151655: AddedToken(\"<|image_pad|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t151656: AddedToken(\"<|video_pad|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n\t151657: AddedToken(\"<tool_call>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n\t151658: AddedToken(\"</tool_call>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n\t151659: AddedToken(\"<|fim_prefix|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n\t151660: AddedToken(\"<|fim_middle|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n\t151661: AddedToken(\"<|fim_suffix|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n\t151662: AddedToken(\"<|fim_pad|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n\t151663: AddedToken(\"<|repo_name|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n\t151664: AddedToken(\"<|file_sep|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n\t151665: AddedToken(\"<tool_response>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n\t151666: AddedToken(\"</tool_response>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n\t151667: AddedToken(\"<think>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n\t151668: AddedToken(\"</think>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n}\n)\n- video_processor: Qwen3VLVideoProcessor {\n \"crop_size\": null,\n \"data_format\": \"channels_first\",\n \"default_to_square\": true,\n \"device\": null,\n \"do_center_crop\": null,\n \"do_convert_rgb\": true,\n \"do_normalize\": true,\n \"do_rescale\": true,\n \"do_resize\": true,\n \"do_sample_frames\": true,\n \"fps\": 2,\n \"image_mean\": [\n 0.5,\n 0.5,\n 0.5\n ],\n \"image_std\": [\n 0.5,\n 0.5,\n 0.5\n ],\n \"input_data_format\": null,\n \"max_frames\": 768,\n \"merge_size\": 2,\n \"min_frames\": 4,\n \"num_frames\": null,\n \"pad_size\": null,\n \"patch_size\": 16,\n \"processor_class\": \"Qwen3VLProcessor\",\n \"resample\": 3,\n \"rescale_factor\": 0.00392156862745098,\n \"return_metadata\": false,\n \"size\": {\n \"longest_edge\": 25165824,\n \"shortest_edge\": 4096\n },\n \"temporal_patch_size\": 2,\n \"video_metadata\": null,\n \"video_processor_type\": \"Qwen3VLVideoProcessor\"\n}\n\n\n{\n \"processor_class\": \"Qwen3VLProcessor\"\n}\n", |
| "max_state_dim": "128", |
| "max_action_dim": "128", |
| "max_action_horizon": "50", |
| "image_crop_size": "None", |
| "image_target_size": "None", |
| "random_rotation_angle": "None", |
| "color_jitter_params": "{'brightness': 0.1, 'contrast': 0.1, 'saturation': 0.1, 'hue': 0.1}", |
| "embodiment_id_mapping": "{'robocasa_panda_omron': 13, 'oxe_droid': 17, 'oxe_droid_joint_position_relative': 17, 'oxe_fractal': 18, 'oxe_language_table': 19, 'oxe_bridge': 20, 'unknown': 22, 'gr1_unified': 20, 'agibot': 26, 'oxe_mutex': 28, 'oxe_roboset': 29, 'oxe_plex': 30, 'dream': 31, 'language_table_sim': 7, 'gr1_isaac': 0, 'xdof': 23, 'xdof_oss_data': 27, 'xdof_h16': 23, 'sim_behavior_r1_pro': 24, 'unitree_g1_full_body_with_waist_height_nav_cmd': 25, 'unitree_g1_full_body_with_waist_height_nav_cmd_sim': 8, 'unitree_g1_whole_body_teleop_latent': 9, 'unitree_g1_whole_body_teleop_smpl': 16, 'simpler_env_google': 0, 'simpler_env_widowx': 1, 'libero_sim': 2, 'droid_sim': 3, 'real_r1_pro_sharpa': 8, 'r1_pro': 27, 'r1_pro_single-view': 27, 'new_embodiment': 10, 'so100_2rgb': 6, 'so100_3rgb': 6, 'robomind_agilex_3rgb': 4, 'robomind_franka_1rgb': 5, 'robomind_franka_3rgb': 5, 'robomind_tienkung_gello_1rgb': 11, 'robomind_ur_1rgb': 12, 'robomind_tienkung_xsens_1rgb': 13, 'molmoact_franka_3rgb': 14, 'galaxea_r1_4rgb': 15}", |
| "shortest_image_edge": "256", |
| "crop_fraction": "0.95", |
| "use_albumentations": "True", |
| "train_image_transform": "ReplayCompose([\n SmallestMaxSize(p=1.0, max_size=[256], interpolation=3),\n FractionalRandomCrop(p=1.0, crop_fraction=0.95),\n SmallestMaxSize(p=1.0, max_size=[256], interpolation=3),\n ColorJitter(p=1.0, brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9, 1.1), hue=(-0.1, 0.1)),\n], p=1.0, bbox_params=None, keypoint_params=None, additional_targets={}, is_check_shapes=True, save_key='replay')", |
| "eval_image_transform": "Compose([\n SmallestMaxSize(p=1.0, max_size=[256], interpolation=3),\n FractionalCenterCrop(p=1.0, crop_fraction=0.95),\n SmallestMaxSize(p=1.0, max_size=[256], interpolation=3),\n], p=1.0, bbox_params=None, keypoint_params=None, additional_targets={}, is_check_shapes=True)", |
| "_collator": "Gr00tN1d7DataCollator(model_name=nvidia/Cosmos-Reason2-2B, model_type=qwen)", |
| "training": "True" |
| } |
|
|